From 0e19543b7b0cbc47fd9cac01cd7738f14f381c70 Mon Sep 17 00:00:00 2001 From: Yu Liao Date: Wed, 23 Aug 2023 10:00:51 +0800 Subject: [PATCH 01/11] rv: Set variable 'da_mon_##name' to static gcc with W=1 reports kernel/trace/rv/monitors/wip/wip.c:20:1: sparse: sparse: symbol 'da_mon_wip' was not declared. Should it be static? The per-cpu variable 'da_mon_##name' is only used in its defining file, so it should be static. Link: https://lore.kernel.org/linux-trace-kernel/20230823020051.3184953-1-liaoyu15@huawei.com Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202307280030.7EjUG9gR-lkp@intel.com/ Signed-off-by: Yu Liao Acked-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- include/rv/da_monitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h index 9eb75683e012..9705b2a98e49 100644 --- a/include/rv/da_monitor.h +++ b/include/rv/da_monitor.h @@ -262,7 +262,7 @@ static inline void da_monitor_destroy_##name(void) \ /* \ * per-cpu monitor variables \ */ \ -DEFINE_PER_CPU(struct da_monitor, da_mon_##name); \ +static DEFINE_PER_CPU(struct da_monitor, da_mon_##name); \ \ /* \ * da_get_monitor_##name - return current CPU monitor address \ From 2cf0dee989a8b2501929eaab29473b6b1fa11057 Mon Sep 17 00:00:00 2001 From: Mikhail Kobuk Date: Fri, 25 Aug 2023 13:34:30 +0300 Subject: [PATCH 02/11] tracing: Remove extra space at the end of hwlat_detector/mode Space is printed after each mode value including the last one: $ echo \"$(sudo cat /sys/kernel/tracing/hwlat_detector/mode)\" "none [round-robin] per-cpu " Found by Linux Verification Center (linuxtesting.org) with SVACE. Link: https://lore.kernel.org/linux-trace-kernel/20230825103432.7750-1-m.kobuk@ispras.ru Cc: Masami Hiramatsu Fixes: 8fa826b7344d ("trace/hwlat: Implement the mode config option") Signed-off-by: Mikhail Kobuk Reviewed-by: Alexey Khoroshilov Acked-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_hwlat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 2f37a6e68aa9..b791524a6536 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -635,7 +635,7 @@ static int s_mode_show(struct seq_file *s, void *v) else seq_printf(s, "%s", thread_mode_str[mode]); - if (mode != MODE_MAX) + if (mode < MODE_MAX - 1) /* if mode is any but last */ seq_puts(s, " "); return 0; From 3163f635b20e9e1fb4659e74f47918c9dddfe64e Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Thu, 31 Aug 2023 21:27:39 +0800 Subject: [PATCH 03/11] tracing: Fix race issue between cpu buffer write and swap Warning happened in rb_end_commit() at code: if (RB_WARN_ON(cpu_buffer, !local_read(&cpu_buffer->committing))) WARNING: CPU: 0 PID: 139 at kernel/trace/ring_buffer.c:3142 rb_commit+0x402/0x4a0 Call Trace: ring_buffer_unlock_commit+0x42/0x250 trace_buffer_unlock_commit_regs+0x3b/0x250 trace_event_buffer_commit+0xe5/0x440 trace_event_buffer_reserve+0x11c/0x150 trace_event_raw_event_sched_switch+0x23c/0x2c0 __traceiter_sched_switch+0x59/0x80 __schedule+0x72b/0x1580 schedule+0x92/0x120 worker_thread+0xa0/0x6f0 It is because the race between writing event into cpu buffer and swapping cpu buffer through file per_cpu/cpu0/snapshot: Write on CPU 0 Swap buffer by per_cpu/cpu0/snapshot on CPU 1 -------- -------- tracing_snapshot_write() [...] ring_buffer_lock_reserve() cpu_buffer = buffer->buffers[cpu]; // 1. Suppose find 'cpu_buffer_a'; [...] rb_reserve_next_event() [...] ring_buffer_swap_cpu() if (local_read(&cpu_buffer_a->committing)) goto out_dec; if (local_read(&cpu_buffer_b->committing)) goto out_dec; buffer_a->buffers[cpu] = cpu_buffer_b; buffer_b->buffers[cpu] = cpu_buffer_a; // 2. cpu_buffer has swapped here. rb_start_commit(cpu_buffer); if (unlikely(READ_ONCE(cpu_buffer->buffer) != buffer)) { // 3. This check passed due to 'cpu_buffer->buffer' [...] // has not changed here. return NULL; } cpu_buffer_b->buffer = buffer_a; cpu_buffer_a->buffer = buffer_b; [...] // 4. Reserve event from 'cpu_buffer_a'. ring_buffer_unlock_commit() [...] cpu_buffer = buffer->buffers[cpu]; // 5. Now find 'cpu_buffer_b' !!! rb_commit(cpu_buffer) rb_end_commit() // 6. WARN for the wrong 'committing' state !!! Based on above analysis, we can easily reproduce by following testcase: ``` bash #!/bin/bash dmesg -n 7 sysctl -w kernel.panic_on_warn=1 TR=/sys/kernel/tracing echo 7 > ${TR}/buffer_size_kb echo "sched:sched_switch" > ${TR}/set_event while [ true ]; do echo 1 > ${TR}/per_cpu/cpu0/snapshot done & while [ true ]; do echo 1 > ${TR}/per_cpu/cpu0/snapshot done & while [ true ]; do echo 1 > ${TR}/per_cpu/cpu0/snapshot done & ``` To fix it, IIUC, we can use smp_call_function_single() to do the swap on the target cpu where the buffer is located, so that above race would be avoided. Link: https://lore.kernel.org/linux-trace-kernel/20230831132739.4070878-1-zhengyejian1@huawei.com Cc: Fixes: f1affcaaa861 ("tracing: Add snapshot in the per_cpu trace directories") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3e55375f47e0..23579fba1a57 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7599,6 +7599,11 @@ out: return ret; } +static void tracing_swap_cpu_buffer(void *tr) +{ + update_max_tr_single((struct trace_array *)tr, current, smp_processor_id()); +} + static ssize_t tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) @@ -7657,13 +7662,15 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) break; - local_irq_disable(); /* Now, we're going to swap */ - if (iter->cpu_file == RING_BUFFER_ALL_CPUS) + if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { + local_irq_disable(); update_max_tr(tr, current, smp_processor_id(), NULL); - else - update_max_tr_single(tr, current, iter->cpu_file); - local_irq_enable(); + local_irq_enable(); + } else { + smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer, + (void *)tr, 1); + } break; default: if (tr->allocated_snapshot) { From 2933d3cd079d3bf6fded709de7d97c1dc71d9633 Mon Sep 17 00:00:00 2001 From: Azeem Shaikh Date: Thu, 31 Aug 2023 19:42:12 +0000 Subject: [PATCH 04/11] tracing: Replace strlcpy with strscpy in trace/events/task.h strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated [1]. In an effort to remove strlcpy() completely [2], replace strlcpy() here with strscpy(). No return values were used, so direct replacement is safe. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [2] https://github.com/KSPP/linux/issues/89 Link: https://lore.kernel.org/linux-trace-kernel/20230831194212.1529941-1-azeemshaikh38@gmail.com Cc: Masami Hiramatsu Reviewed-by: Kees Cook Signed-off-by: Azeem Shaikh Signed-off-by: Steven Rostedt (Google) --- include/trace/events/task.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/task.h b/include/trace/events/task.h index 64d160930b0d..47b527464d1a 100644 --- a/include/trace/events/task.h +++ b/include/trace/events/task.h @@ -47,7 +47,7 @@ TRACE_EVENT(task_rename, TP_fast_assign( __entry->pid = task->pid; memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN); - strlcpy(entry->newcomm, comm, TASK_COMM_LEN); + strscpy(entry->newcomm, comm, TASK_COMM_LEN); __entry->oom_score_adj = task->signal->oom_score_adj; ), From 13511489046a60f76a9f9fef1335837b28d325e4 Mon Sep 17 00:00:00 2001 From: Levi Yun Date: Thu, 3 Aug 2023 21:52:36 +0100 Subject: [PATCH 05/11] ftrace: Use within_module to check rec->ip within specified module. within_module_core && within_module_init condition is same to within module but it's more readable. Use within_module instead of former condition to check rec->ip within specified module area or not. Link: https://lore.kernel.org/linux-trace-kernel/20230803205236.32201-1-ppbuk5246@gmail.com Signed-off-by: Levi Yun Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 05c0024815bf..c46dd6d97afe 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6779,8 +6779,7 @@ void ftrace_release_mod(struct module *mod) last_pg = &ftrace_pages_start; for (pg = ftrace_pages_start; pg; pg = *last_pg) { rec = &pg->records[0]; - if (within_module_core(rec->ip, mod) || - within_module_init(rec->ip, mod)) { + if (within_module(rec->ip, mod)) { /* * As core pages are first, the first * page should never be a module page. @@ -6852,8 +6851,7 @@ void ftrace_module_enable(struct module *mod) * not part of this module, then skip this pg, * which the "break" will do. */ - if (!within_module_core(rec->ip, mod) && - !within_module_init(rec->ip, mod)) + if (!within_module(rec->ip, mod)) break; /* Weak functions should still be ignored */ From 2a30dbcbef96f4073d3a5f8708865b1aab0bfc6d Mon Sep 17 00:00:00 2001 From: Ruan Jinjie Date: Wed, 9 Aug 2023 15:15:51 +0800 Subject: [PATCH 06/11] ftrace: Use LIST_HEAD to initialize clear_hash Use LIST_HEAD() to initialize clear_hash instead of open-coding it. Link: https://lore.kernel.org/linux-trace-kernel/20230809071551.913041-1-ruanjinjie@huawei.com Cc: Masami Hiramatsu Cc: Mark Rutland Signed-off-by: Ruan Jinjie Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c46dd6d97afe..8de8bec5f366 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7140,9 +7140,7 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) struct dyn_ftrace key; struct ftrace_mod_map *mod_map = NULL; struct ftrace_init_func *func, *func_next; - struct list_head clear_hash; - - INIT_LIST_HEAD(&clear_hash); + LIST_HEAD(clear_hash); key.ip = start; key.flags = end; /* overload flags, as it is unsigned long */ From 3d07fa1dd19035eb0b13ae6697efd5caa9033e74 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 31 Aug 2023 08:55:00 -0400 Subject: [PATCH 07/11] tracing: Zero the pipe cpumask on alloc to avoid spurious -EBUSY The pipe cpumask used to serialize opens between the main and percpu trace pipes is not zeroed or initialized. This can result in spurious -EBUSY returns if underlying memory is not fully zeroed. This has been observed by immediate failure to read the main trace_pipe file on an otherwise newly booted and idle system: # cat /sys/kernel/debug/tracing/trace_pipe cat: /sys/kernel/debug/tracing/trace_pipe: Device or resource busy Zero the allocation of pipe_cpumask to avoid the problem. Link: https://lore.kernel.org/linux-trace-kernel/20230831125500.986862-1-bfoster@redhat.com Cc: stable@vger.kernel.org Fixes: c2489bb7e6be ("tracing: Introduce pipe_cpumask to avoid race on trace_pipes") Reviewed-by: Zheng Yejian Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Brian Foster Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 23579fba1a57..35783a7baf15 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9474,7 +9474,7 @@ static struct trace_array *trace_array_create(const char *name) if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) goto out_free_tr; - if (!alloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) goto out_free_tr; tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; @@ -10419,7 +10419,7 @@ __init static int tracer_alloc_buffers(void) if (trace_create_savedcmd() < 0) goto out_free_temp_buffer; - if (!alloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) goto out_free_savedcmd; /* TODO: make the number of buffers hot pluggable with CPUS */ From 9af4058493c59721eccd90b7c40cad793e4e3c3b Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 1 Sep 2023 17:10:36 +0200 Subject: [PATCH 08/11] tracing/filters: Fix error-handling of cpulist parsing buffer parse_pred() allocates a string buffer to parse the user-provided cpulist, but doesn't check the allocation result nor does it free the buffer once it is no longer needed. Add an allocation check, and free the buffer as soon as it is no longer needed. Link: https://lkml.kernel.org/r/20230901151039.125186-2-vschneid@redhat.com Cc: Masami Hiramatsu Reported-by: Steven Rostedt Reported-by: Josh Poimboeuf Signed-off-by: Valentin Schneider Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_filter.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 3a529214a21b..c06e1d596f4b 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1744,17 +1744,23 @@ static int parse_pred(const char *str, void *data, /* Copy the cpulist between { and } */ tmp = kmalloc((i - maskstart) + 1, GFP_KERNEL); - strscpy(tmp, str + maskstart, (i - maskstart) + 1); - - pred->mask = kzalloc(cpumask_size(), GFP_KERNEL); - if (!pred->mask) + if (!tmp) goto err_mem; + strscpy(tmp, str + maskstart, (i - maskstart) + 1); + pred->mask = kzalloc(cpumask_size(), GFP_KERNEL); + if (!pred->mask) { + kfree(tmp); + goto err_mem; + } + /* Now parse it */ if (cpulist_parse(tmp, pred->mask)) { + kfree(tmp); parse_error(pe, FILT_ERR_INVALID_CPULIST, pos + i); goto err_free; } + kfree(tmp); /* Move along */ i++; From 1caf7adb9e000479d2bd29c86b6d7eaeb24b1b05 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 1 Sep 2023 17:10:37 +0200 Subject: [PATCH 09/11] tracing/filters: Fix double-free of struct filter_pred.mask When a cpulist filter is found to contain a single CPU, that CPU is saved as a scalar and the backing cpumask storage is freed. Also NULL the mask to avoid a double-free once we get down to free_predicate(). Link: https://lkml.kernel.org/r/20230901151039.125186-3-vschneid@redhat.com Cc: Masami Hiramatsu Reported-by: Steven Rostedt Signed-off-by: Valentin Schneider Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index c06e1d596f4b..eb331e8b00b6 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1773,6 +1773,7 @@ static int parse_pred(const char *str, void *data, if (single) { pred->val = cpumask_first(pred->mask); kfree(pred->mask); + pred->mask = NULL; } if (field->filter_type == FILTER_CPUMASK) { From 2900bcbee3899e900853be555665f126673141b7 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 1 Sep 2023 17:10:38 +0200 Subject: [PATCH 10/11] tracing/filters: Change parse_pred() cpulist ternary into an if block Review comments noted that an if block would be clearer than a ternary, so swap it out. No change in behaviour intended Link: https://lkml.kernel.org/r/20230901151039.125186-4-vschneid@redhat.com Cc: Masami Hiramatsu Signed-off-by: Valentin Schneider Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_filter.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index eb331e8b00b6..09b4733a2933 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1782,13 +1782,17 @@ static int parse_pred(const char *str, void *data, FILTER_PRED_FN_CPUMASK; } else if (field->filter_type == FILTER_CPU) { if (single) { - pred->op = pred->op == OP_BAND ? OP_EQ : pred->op; + if (pred->op == OP_BAND) + pred->op = OP_EQ; + pred->fn_num = FILTER_PRED_FN_CPU; } else { pred->fn_num = FILTER_PRED_FN_CPU_CPUMASK; } } else if (single) { - pred->op = pred->op == OP_BAND ? OP_EQ : pred->op; + if (pred->op == OP_BAND) + pred->op = OP_EQ; + pred->fn_num = select_comparison_fn(pred->op, field->size, false); if (pred->op == OP_NE) pred->not = 1; From cbb557ba92f08b945e2cb20b7ab37ef49ab53cdd Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 1 Sep 2023 17:10:39 +0200 Subject: [PATCH 11/11] tracing/filters: Fix coding style issues Recent commits have introduced some coding style issues, fix those up. Link: https://lkml.kernel.org/r/20230901151039.125186-5-vschneid@redhat.com Cc: Masami Hiramatsu Signed-off-by: Valentin Schneider Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_filter.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 09b4733a2933..33264e510d16 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1360,7 +1360,7 @@ int filter_assign_type(const char *type) return FILTER_DYN_STRING; if (strstr(type, "cpumask_t")) return FILTER_CPUMASK; - } + } if (strstr(type, "__rel_loc") && strstr(type, "char")) return FILTER_RDYN_STRING; @@ -1731,7 +1731,9 @@ static int parse_pred(const char *str, void *data, maskstart = i; /* Walk the cpulist until closing } */ - for (; str[i] && str[i] != '}'; i++); + for (; str[i] && str[i] != '}'; i++) + ; + if (str[i] != '}') { parse_error(pe, FILT_ERR_MISSING_BRACE_CLOSE, pos + i); goto err_free;