diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/trace/Kconfig | 10 | ||||
| -rw-r--r-- | kernel/trace/fgraph.c | 155 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 115 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 15 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 11 | ||||
| -rw-r--r-- | kernel/trace/trace_entries.h | 29 | ||||
| -rw-r--r-- | kernel/trace/trace_functions_graph.c | 264 | ||||
| -rw-r--r-- | kernel/trace/trace_selftest.c | 1 |
8 files changed, 451 insertions, 149 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 721c3b221048..74c2b1d43bb9 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -242,6 +242,16 @@ config FUNCTION_GRAPH_RETVAL enable it via the trace option funcgraph-retval. See Documentation/trace/ftrace.rst +config FUNCTION_GRAPH_RETADDR + bool "Kernel Function Graph Return Address" + depends on FUNCTION_GRAPH_TRACER + default n + help + Support recording and printing the function return address when + using function graph tracer. It can be helpful to locate code line that + the function is called. This feature is off by default, and you can + enable it via the trace option funcgraph-retaddr. + config DYNAMIC_FTRACE bool "enable/disable function tracing dynamically" depends on FUNCTION_TRACER diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 69e226a48daa..0bf78517b5d4 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -153,7 +153,7 @@ enum { * SHADOW_STACK_OFFSET: The size in long words of the shadow stack * SHADOW_STACK_MAX_OFFSET: The max offset of the stack for a new frame to be added */ -#define SHADOW_STACK_SIZE (PAGE_SIZE) +#define SHADOW_STACK_SIZE (4096) #define SHADOW_STACK_OFFSET (SHADOW_STACK_SIZE / sizeof(long)) /* Leave on a buffer at the end */ #define SHADOW_STACK_MAX_OFFSET \ @@ -172,6 +172,8 @@ enum { DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph); int ftrace_graph_active; +static struct kmem_cache *fgraph_stack_cachep; + static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE]; static unsigned long fgraph_array_bitmask; @@ -390,21 +392,7 @@ void *fgraph_reserve_data(int idx, int size_bytes) */ void *fgraph_retrieve_data(int idx, int *size_bytes) { - int offset = current->curr_ret_stack - 1; - unsigned long val; - - val = get_fgraph_entry(current, offset); - while (__get_type(val) == FGRAPH_TYPE_DATA) { - if (__get_data_index(val) == idx) - goto found; - offset -= __get_data_size(val) + 1; - val = get_fgraph_entry(current, offset); - } - return NULL; -found: - if (size_bytes) - *size_bytes = __get_data_size(val) * sizeof(long); - return get_data_type_data(current, offset); + return fgraph_retrieve_parent_data(idx, size_bytes, 0); } /** @@ -460,8 +448,56 @@ get_ret_stack(struct task_struct *t, int offset, int *frame_offset) return RET_STACK(t, offset); } +/** + * fgraph_retrieve_parent_data - get data from a parent function + * @idx: The index into the fgraph_array (fgraph_ops::idx) + * @size_bytes: A pointer to retrieved data size + * @depth: The depth to find the parent (0 is the current function) + * + * This is similar to fgraph_retrieve_data() but can be used to retrieve + * data from a parent caller function. + * + * Return: a pointer to the specified parent data or NULL if not found + */ +void *fgraph_retrieve_parent_data(int idx, int *size_bytes, int depth) +{ + struct ftrace_ret_stack *ret_stack = NULL; + int offset = current->curr_ret_stack; + unsigned long val; + + if (offset <= 0) + return NULL; + + for (;;) { + int next_offset; + + ret_stack = get_ret_stack(current, offset, &next_offset); + if (!ret_stack || --depth < 0) + break; + offset = next_offset; + } + + if (!ret_stack) + return NULL; + + offset--; + + val = get_fgraph_entry(current, offset); + while (__get_type(val) == FGRAPH_TYPE_DATA) { + if (__get_data_index(val) == idx) + goto found; + offset -= __get_data_size(val) + 1; + val = get_fgraph_entry(current, offset); + } + return NULL; +found: + if (size_bytes) + *size_bytes = __get_data_size(val) * sizeof(long); + return get_data_type_data(current, offset); +} + /* Both enabled by default (can be cleared by function_graph tracer flags */ -static bool fgraph_sleep_time = true; +bool fgraph_sleep_time = true; #ifdef CONFIG_DYNAMIC_FTRACE /* @@ -524,7 +560,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int fgraph_idx) { struct ftrace_ret_stack *ret_stack; - unsigned long long calltime; unsigned long val; int offset; @@ -554,8 +589,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, return -EBUSY; } - calltime = trace_clock_local(); - offset = READ_ONCE(current->curr_ret_stack); ret_stack = RET_STACK(current, offset); offset += FGRAPH_FRAME_OFFSET; @@ -589,7 +622,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, ret_stack->ret = ret; ret_stack->func = func; - ret_stack->calltime = calltime; #ifdef HAVE_FUNCTION_GRAPH_FP_TEST ret_stack->fp = frame_pointer; #endif @@ -723,7 +755,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, *offset += FGRAPH_FRAME_OFFSET; *ret = ret_stack->ret; trace->func = ret_stack->func; - trace->calltime = ret_stack->calltime; trace->overrun = atomic_read(¤t->trace_overrun); trace->depth = current->curr_ret_depth; /* @@ -868,6 +899,29 @@ ftrace_graph_get_ret_stack(struct task_struct *task, int idx) } /** + * ftrace_graph_top_ret_addr - return the top return address in the shadow stack + * @task: The task to read the shadow stack from. + * + * Return the first return address on the shadow stack of the @task, which is + * not the fgraph's return_to_handler. + */ +unsigned long ftrace_graph_top_ret_addr(struct task_struct *task) +{ + unsigned long return_handler = (unsigned long)dereference_kernel_function_descriptor(return_to_handler); + struct ftrace_ret_stack *ret_stack = NULL; + int offset = task->curr_ret_stack; + + if (offset < 0) + return 0; + + do { + ret_stack = get_ret_stack(task, offset, &offset); + } while (ret_stack && ret_stack->ret == return_handler); + + return ret_stack ? ret_stack->ret : 0; +} + +/** * ftrace_graph_ret_addr - return the original value of the return address * @task: The task the unwinder is being executed on * @idx: An initialized pointer to the next stack index to use @@ -892,7 +946,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, { struct ftrace_ret_stack *ret_stack; unsigned long return_handler = (unsigned long)dereference_kernel_function_descriptor(return_to_handler); - int i = task->curr_ret_stack; + int i; if (ret != return_handler) return ret; @@ -970,8 +1024,11 @@ static int alloc_retstack_tasklist(unsigned long **ret_stack_list) int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE; struct task_struct *g, *t; + if (WARN_ON_ONCE(!fgraph_stack_cachep)) + return -ENOMEM; + for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) { - ret_stack_list[i] = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); + ret_stack_list[i] = kmem_cache_alloc(fgraph_stack_cachep, GFP_KERNEL); if (!ret_stack_list[i]) { start = 0; end = i; @@ -1002,7 +1059,7 @@ unlock: rcu_read_unlock(); free: for (i = start; i < end; i++) - kfree(ret_stack_list[i]); + kmem_cache_free(fgraph_stack_cachep, ret_stack_list[i]); return ret; } @@ -1012,9 +1069,7 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt, struct task_struct *next, unsigned int prev_state) { - struct ftrace_ret_stack *ret_stack; unsigned long long timestamp; - int offset; /* * Does the user want to count the time a function was asleep. @@ -1031,17 +1086,7 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt, if (!next->ftrace_timestamp) return; - /* - * Update all the counters in next to make up for the - * time next was sleeping. - */ - timestamp -= next->ftrace_timestamp; - - for (offset = next->curr_ret_stack; offset > 0; ) { - ret_stack = get_ret_stack(next, offset, &offset); - if (ret_stack) - ret_stack->calltime += timestamp; - } + next->ftrace_sleeptime += timestamp - next->ftrace_timestamp; } static DEFINE_PER_CPU(unsigned long *, idle_ret_stack); @@ -1077,9 +1122,12 @@ void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) if (ftrace_graph_active) { unsigned long *ret_stack; + if (WARN_ON_ONCE(!fgraph_stack_cachep)) + return; + ret_stack = per_cpu(idle_ret_stack, cpu); if (!ret_stack) { - ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); + ret_stack = kmem_cache_alloc(fgraph_stack_cachep, GFP_KERNEL); if (!ret_stack) return; per_cpu(idle_ret_stack, cpu) = ret_stack; @@ -1099,7 +1147,10 @@ void ftrace_graph_init_task(struct task_struct *t) if (ftrace_graph_active) { unsigned long *ret_stack; - ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); + if (WARN_ON_ONCE(!fgraph_stack_cachep)) + return; + + ret_stack = kmem_cache_alloc(fgraph_stack_cachep, GFP_KERNEL); if (!ret_stack) return; graph_init_task(t, ret_stack); @@ -1114,7 +1165,11 @@ void ftrace_graph_exit_task(struct task_struct *t) /* NULL must become visible to IRQs before we free it: */ barrier(); - kfree(ret_stack); + if (ret_stack) { + if (WARN_ON_ONCE(!fgraph_stack_cachep)) + return; + kmem_cache_free(fgraph_stack_cachep, ret_stack); + } } #ifdef CONFIG_DYNAMIC_FTRACE @@ -1254,6 +1309,14 @@ int register_ftrace_graph(struct fgraph_ops *gops) guard(mutex)(&ftrace_lock); + if (!fgraph_stack_cachep) { + fgraph_stack_cachep = kmem_cache_create("fgraph_stack", + SHADOW_STACK_SIZE, + SHADOW_STACK_SIZE, 0, NULL); + if (!fgraph_stack_cachep) + return -ENOMEM; + } + if (!fgraph_initialized) { ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "fgraph:online", fgraph_cpu_init, NULL); @@ -1318,17 +1381,17 @@ void unregister_ftrace_graph(struct fgraph_ops *gops) { int command = 0; - mutex_lock(&ftrace_lock); + guard(mutex)(&ftrace_lock); if (unlikely(!ftrace_graph_active)) - goto out; + return; if (unlikely(gops->idx < 0 || gops->idx >= FGRAPH_ARRAY_SIZE || fgraph_array[gops->idx] != gops)) - goto out; + return; if (fgraph_lru_release_index(gops->idx) < 0) - goto out; + return; fgraph_array[gops->idx] = &fgraph_stub; @@ -1350,7 +1413,5 @@ void unregister_ftrace_graph(struct fgraph_ops *gops) unregister_pm_notifier(&ftrace_suspend_notifier); unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); } - out: gops->saved_func = NULL; - mutex_unlock(&ftrace_lock); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4c28dd177ca6..71e53eaba8bc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -820,10 +820,16 @@ void ftrace_graph_graph_time_control(bool enable) fgraph_graph_time = enable; } +struct profile_fgraph_data { + unsigned long long calltime; + unsigned long long subtime; + unsigned long long sleeptime; +}; + static int profile_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops) { - struct ftrace_ret_stack *ret_stack; + struct profile_fgraph_data *profile_data; function_profile_call(trace->func, 0, NULL, NULL); @@ -831,9 +837,13 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace, if (!current->ret_stack) return 0; - ret_stack = ftrace_graph_get_ret_stack(current, 0); - if (ret_stack) - ret_stack->subtime = 0; + profile_data = fgraph_reserve_data(gops->idx, sizeof(*profile_data)); + if (!profile_data) + return 0; + + profile_data->subtime = 0; + profile_data->sleeptime = current->ftrace_sleeptime; + profile_data->calltime = trace_clock_local(); return 1; } @@ -841,33 +851,42 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace, static void profile_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { - struct ftrace_ret_stack *ret_stack; + struct profile_fgraph_data *profile_data; struct ftrace_profile_stat *stat; unsigned long long calltime; + unsigned long long rettime = trace_clock_local(); struct ftrace_profile *rec; unsigned long flags; + int size; local_irq_save(flags); stat = this_cpu_ptr(&ftrace_profile_stats); if (!stat->hash || !ftrace_profile_enabled) goto out; + profile_data = fgraph_retrieve_data(gops->idx, &size); + /* If the calltime was zero'd ignore it */ - if (!trace->calltime) + if (!profile_data || !profile_data->calltime) goto out; - calltime = trace->rettime - trace->calltime; + calltime = rettime - profile_data->calltime; + + if (!fgraph_sleep_time) { + if (current->ftrace_sleeptime) + calltime -= current->ftrace_sleeptime - profile_data->sleeptime; + } if (!fgraph_graph_time) { + struct profile_fgraph_data *parent_data; /* Append this call time to the parent time to subtract */ - ret_stack = ftrace_graph_get_ret_stack(current, 1); - if (ret_stack) - ret_stack->subtime += calltime; + parent_data = fgraph_retrieve_parent_data(gops->idx, &size, 1); + if (parent_data) + parent_data->subtime += calltime; - ret_stack = ftrace_graph_get_ret_stack(current, 0); - if (ret_stack && ret_stack->subtime < calltime) - calltime -= ret_stack->subtime; + if (profile_data->subtime && profile_data->subtime < calltime) + calltime -= profile_data->subtime; else calltime = 0; } @@ -883,6 +902,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace, } static struct fgraph_ops fprofiler_ops = { + .ops = { + .flags = FTRACE_OPS_FL_INITIALIZED, + INIT_OPS_HASH(fprofiler_ops.ops) + }, .entryfunc = &profile_graph_entry, .retfunc = &profile_graph_return, }; @@ -3663,7 +3686,8 @@ static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops, } -static u64 ftrace_update_time; +u64 ftrace_update_time; +u64 ftrace_total_mod_time; unsigned long ftrace_update_tot_cnt; unsigned long ftrace_number_of_pages; unsigned long ftrace_number_of_groups; @@ -3683,7 +3707,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) bool init_nop = ftrace_need_init_nop(); struct ftrace_page *pg; struct dyn_ftrace *p; - u64 start, stop; + u64 start, stop, update_time; unsigned long update_cnt = 0; unsigned long rec_flags = 0; int i; @@ -3727,7 +3751,11 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) } stop = ftrace_now(raw_smp_processor_id()); - ftrace_update_time = stop - start; + update_time = stop - start; + if (mod) + ftrace_total_mod_time += update_time; + else + ftrace_update_time = update_time; ftrace_update_tot_cnt += update_cnt; return 0; @@ -4806,15 +4834,13 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod) mod_g.len = strlen(mod_g.search); } - mutex_lock(&ftrace_lock); + guard(mutex)(&ftrace_lock); if (unlikely(ftrace_disabled)) - goto out_unlock; + return 0; - if (func_g.type == MATCH_INDEX) { - found = add_rec_by_index(hash, &func_g, clear_filter); - goto out_unlock; - } + if (func_g.type == MATCH_INDEX) + return add_rec_by_index(hash, &func_g, clear_filter); do_for_each_ftrace_rec(pg, rec) { @@ -4823,16 +4849,12 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod) if (ftrace_match_record(rec, &func_g, mod_match, exclude_mod)) { ret = enter_record(hash, rec, clear_filter); - if (ret < 0) { - found = ret; - goto out_unlock; - } + if (ret < 0) + return ret; found = 1; } cond_resched(); } while_for_each_ftrace_rec(); - out_unlock: - mutex_unlock(&ftrace_lock); return found; } @@ -4930,14 +4952,14 @@ static int cache_mod(struct trace_array *tr, { struct ftrace_mod_load *ftrace_mod, *n; struct list_head *head = enable ? &tr->mod_trace : &tr->mod_notrace; - int ret; - mutex_lock(&ftrace_lock); + guard(mutex)(&ftrace_lock); /* We do not cache inverse filters */ if (func[0] == '!') { + int ret = -EINVAL; + func++; - ret = -EINVAL; /* Look to remove this hash */ list_for_each_entry_safe(ftrace_mod, n, head, list) { @@ -4953,20 +4975,15 @@ static int cache_mod(struct trace_array *tr, continue; } } - goto out; + return ret; } - ret = -EINVAL; /* We only care about modules that have not been loaded yet */ if (module_exists(module)) - goto out; + return -EINVAL; /* Save this string off, and execute it when the module is loaded */ - ret = ftrace_add_mod(tr, func, module, enable); - out: - mutex_unlock(&ftrace_lock); - - return ret; + return ftrace_add_mod(tr, func, module, enable); } static int @@ -5276,7 +5293,7 @@ static void release_probe(struct ftrace_func_probe *probe) { struct ftrace_probe_ops *probe_ops; - mutex_lock(&ftrace_lock); + guard(mutex)(&ftrace_lock); WARN_ON(probe->ref <= 0); @@ -5294,7 +5311,6 @@ static void release_probe(struct ftrace_func_probe *probe) list_del(&probe->list); kfree(probe); } - mutex_unlock(&ftrace_lock); } static void acquire_probe_locked(struct ftrace_func_probe *probe) @@ -6805,12 +6821,10 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer) func_g.len = strlen(func_g.search); - mutex_lock(&ftrace_lock); + guard(mutex)(&ftrace_lock); - if (unlikely(ftrace_disabled)) { - mutex_unlock(&ftrace_lock); + if (unlikely(ftrace_disabled)) return -ENODEV; - } do_for_each_ftrace_rec(pg, rec) { @@ -6826,7 +6840,7 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer) if (entry) continue; if (add_hash_entry(hash, rec->ip) == NULL) - goto out; + return 0; } else { if (entry) { free_hash_entry(hash, entry); @@ -6835,13 +6849,8 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer) } } } while_for_each_ftrace_rec(); -out: - mutex_unlock(&ftrace_lock); - if (fail) - return -EINVAL; - - return 0; + return fail ? -EINVAL : 0; } static ssize_t @@ -7920,7 +7929,7 @@ out: void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - kmsan_unpoison_memory(fregs, sizeof(*fregs)); + kmsan_unpoison_memory(fregs, ftrace_regs_size()); __ftrace_ops_list_func(ip, parent_ip, NULL, fregs); } #else diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6a891e00aa7f..103b30b1c73b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8587,15 +8587,22 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf, char *buf; int r; - /* 256 should be plenty to hold the amount needed */ - buf = kmalloc(256, GFP_KERNEL); + /* 512 should be plenty to hold the amount needed */ +#define DYN_INFO_BUF_SIZE 512 + + buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL); if (!buf) return -ENOMEM; - r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n", + r = scnprintf(buf, DYN_INFO_BUF_SIZE, + "%ld pages:%ld groups: %ld\n" + "ftrace boot update time = %llu (ns)\n" + "ftrace module total update time = %llu (ns)\n", ftrace_update_tot_cnt, ftrace_number_of_pages, - ftrace_number_of_groups); + ftrace_number_of_groups, + ftrace_update_time, + ftrace_total_mod_time); ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); kfree(buf); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c866991b9c78..3307dad4d917 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -46,6 +46,7 @@ enum trace_type { TRACE_BRANCH, TRACE_GRAPH_RET, TRACE_GRAPH_ENT, + TRACE_GRAPH_RETADDR_ENT, TRACE_USER_STACK, TRACE_BLK, TRACE_BPUTS, @@ -512,6 +513,8 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ TRACE_GRAPH_ENT); \ + IF_ASSIGN(var, ent, struct fgraph_retaddr_ent_entry,\ + TRACE_GRAPH_RETADDR_ENT); \ IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct func_repeats_entry, \ @@ -772,6 +775,8 @@ extern void trace_event_follow_fork(struct trace_array *tr, bool enable); extern unsigned long ftrace_update_tot_cnt; extern unsigned long ftrace_number_of_pages; extern unsigned long ftrace_number_of_groups; +extern u64 ftrace_update_time; +extern u64 ftrace_total_mod_time; void ftrace_init_trace_array(struct trace_array *tr); #else static inline void ftrace_init_trace_array(struct trace_array *tr) { } @@ -879,6 +884,7 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash) #define TRACE_GRAPH_GRAPH_TIME 0x400 #define TRACE_GRAPH_PRINT_RETVAL 0x800 #define TRACE_GRAPH_PRINT_RETVAL_HEX 0x1000 +#define TRACE_GRAPH_PRINT_RETADDR 0x2000 #define TRACE_GRAPH_PRINT_FILL_SHIFT 28 #define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT) @@ -900,6 +906,10 @@ extern void graph_trace_close(struct trace_iterator *iter); extern int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, unsigned int trace_ctx); +extern int __trace_graph_retaddr_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned int trace_ctx, + unsigned long retaddr); extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned int trace_ctx); @@ -1048,6 +1058,7 @@ static inline void ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftra #endif /* CONFIG_DYNAMIC_FTRACE */ extern unsigned int fgraph_max_depth; +extern bool fgraph_sleep_time; static inline bool ftrace_graph_ignore_func(struct fgraph_ops *gops, struct ftrace_graph_ent *trace) diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index c47422b20908..82fd174ebbe0 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -85,9 +85,35 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry, F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth) ); -/* Function return entry */ +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR + +/* Function call entry with a return address */ +FTRACE_ENTRY_PACKED(fgraph_retaddr_entry, fgraph_retaddr_ent_entry, + + TRACE_GRAPH_RETADDR_ENT, + + F_STRUCT( + __field_struct( struct fgraph_retaddr_ent, graph_ent ) + __field_packed( unsigned long, graph_ent, func ) + __field_packed( int, graph_ent, depth ) + __field_packed( unsigned long, graph_ent, retaddr ) + ), + + F_printk("--> %ps (%d) <- %ps", (void *)__entry->func, __entry->depth, + (void *)__entry->retaddr) +); + +#else + +#ifndef fgraph_retaddr_ent_entry +#define fgraph_retaddr_ent_entry ftrace_graph_ent_entry +#endif + +#endif + #ifdef CONFIG_FUNCTION_GRAPH_RETVAL +/* Function return entry */ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry, TRACE_GRAPH_RET, @@ -110,6 +136,7 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry, #else +/* Function return entry */ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry, TRACE_GRAPH_RET, diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index a569daaac4c4..03c5a0d300a5 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -31,7 +31,10 @@ struct fgraph_data { struct fgraph_cpu_data __percpu *cpu_data; /* Place to preserve last processed entry. */ - struct ftrace_graph_ent_entry ent; + union { + struct ftrace_graph_ent_entry ent; + struct fgraph_retaddr_ent_entry rent; + } ent; struct ftrace_graph_ret_entry ret; int failed; int cpu; @@ -64,6 +67,10 @@ static struct tracer_opt trace_opts[] = { /* Display function return value in hexadecimal format ? */ { TRACER_OPT(funcgraph-retval-hex, TRACE_GRAPH_PRINT_RETVAL_HEX) }, #endif +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR + /* Display function return address ? */ + { TRACER_OPT(funcgraph-retaddr, TRACE_GRAPH_PRINT_RETADDR) }, +#endif /* Include sleep time (scheduled out) between entry and return */ { TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) }, @@ -83,6 +90,11 @@ static struct tracer_flags tracer_flags = { .opts = trace_opts }; +static bool tracer_flags_is_set(u32 flags) +{ + return (tracer_flags.val & flags) == flags; +} + /* * DURATION column is being also used to display IRQ signs, * following values are used by print_graph_irq and others @@ -119,6 +131,38 @@ int __trace_graph_entry(struct trace_array *tr, return 1; } +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR +int __trace_graph_retaddr_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned int trace_ctx, + unsigned long retaddr) +{ + struct ring_buffer_event *event; + struct trace_buffer *buffer = tr->array_buffer.buffer; + struct fgraph_retaddr_ent_entry *entry; + + event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RETADDR_ENT, + sizeof(*entry), trace_ctx); + if (!event) + return 0; + entry = ring_buffer_event_data(event); + entry->graph_ent.func = trace->func; + entry->graph_ent.depth = trace->depth; + entry->graph_ent.retaddr = retaddr; + trace_buffer_unlock_commit_nostack(buffer, event); + + return 1; +} +#else +int __trace_graph_retaddr_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned int trace_ctx, + unsigned long retaddr) +{ + return 1; +} +#endif + static inline int ftrace_graph_ignore_irqs(void) { if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT)) @@ -127,12 +171,18 @@ static inline int ftrace_graph_ignore_irqs(void) return in_hardirq(); } +struct fgraph_times { + unsigned long long calltime; + unsigned long long sleeptime; /* may be optional! */ +}; + int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops) { unsigned long *task_var = fgraph_get_task_var(gops); struct trace_array *tr = gops->private; struct trace_array_cpu *data; + struct fgraph_times *ftimes; unsigned long flags; unsigned int trace_ctx; long disabled; @@ -167,6 +217,19 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, if (ftrace_graph_ignore_irqs()) return 0; + if (fgraph_sleep_time) { + /* Only need to record the calltime */ + ftimes = fgraph_reserve_data(gops->idx, sizeof(ftimes->calltime)); + } else { + ftimes = fgraph_reserve_data(gops->idx, sizeof(*ftimes)); + if (ftimes) + ftimes->sleeptime = current->ftrace_sleeptime; + } + if (!ftimes) + return 0; + + ftimes->calltime = trace_clock_local(); + /* * Stop here if tracing_threshold is set. We only write function return * events to the ring buffer. @@ -180,7 +243,13 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { trace_ctx = tracing_gen_ctx_flags(flags); - ret = __trace_graph_entry(tr, trace, trace_ctx); + if (unlikely(IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) && + tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR))) { + unsigned long retaddr = ftrace_graph_top_ret_addr(current); + + ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, retaddr); + } else + ret = __trace_graph_entry(tr, trace, trace_ctx); } else { ret = 0; } @@ -238,15 +307,27 @@ void __trace_graph_return(struct trace_array *tr, trace_buffer_unlock_commit_nostack(buffer, event); } +static void handle_nosleeptime(struct ftrace_graph_ret *trace, + struct fgraph_times *ftimes, + int size) +{ + if (fgraph_sleep_time || size < sizeof(*ftimes)) + return; + + ftimes->calltime += current->ftrace_sleeptime - ftimes->sleeptime; +} + void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { unsigned long *task_var = fgraph_get_task_var(gops); struct trace_array *tr = gops->private; struct trace_array_cpu *data; + struct fgraph_times *ftimes; unsigned long flags; unsigned int trace_ctx; long disabled; + int size; int cpu; ftrace_graph_addr_finish(gops, trace); @@ -256,6 +337,14 @@ void trace_graph_return(struct ftrace_graph_ret *trace, return; } + ftimes = fgraph_retrieve_data(gops->idx, &size); + if (!ftimes) + return; + + handle_nosleeptime(trace, ftimes, size); + + trace->calltime = ftimes->calltime; + local_irq_save(flags); cpu = raw_smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); @@ -271,6 +360,9 @@ void trace_graph_return(struct ftrace_graph_ret *trace, static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { + struct fgraph_times *ftimes; + int size; + ftrace_graph_addr_finish(gops, trace); if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) { @@ -278,8 +370,16 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, return; } + ftimes = fgraph_retrieve_data(gops->idx, &size); + if (!ftimes) + return; + + handle_nosleeptime(trace, ftimes, size); + + trace->calltime = ftimes->calltime; + if (tracing_thresh && - (trace->rettime - trace->calltime < tracing_thresh)) + (trace->rettime - ftimes->calltime < tracing_thresh)) return; else trace_graph_return(trace, gops); @@ -457,7 +557,7 @@ get_return_for_leaf(struct trace_iterator *iter, * then we just reuse the data from before. */ if (data && data->failed) { - curr = &data->ent; + curr = &data->ent.ent; next = &data->ret; } else { @@ -487,7 +587,10 @@ get_return_for_leaf(struct trace_iterator *iter, * Save current and next entries for later reference * if the output fails. */ - data->ent = *curr; + if (unlikely(curr->ent.type == TRACE_GRAPH_RETADDR_ENT)) + data->ent.rent = *(struct fgraph_retaddr_ent_entry *)curr; + else + data->ent.ent = *curr; /* * If the next event is not a return type, then * we only care about what type it is. Otherwise we can @@ -651,52 +754,96 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration, } #ifdef CONFIG_FUNCTION_GRAPH_RETVAL - #define __TRACE_GRAPH_PRINT_RETVAL TRACE_GRAPH_PRINT_RETVAL +#else +#define __TRACE_GRAPH_PRINT_RETVAL 0 +#endif + +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR +#define __TRACE_GRAPH_PRINT_RETADDR TRACE_GRAPH_PRINT_RETADDR +static void print_graph_retaddr(struct trace_seq *s, struct fgraph_retaddr_ent_entry *entry, + u32 trace_flags, bool comment) +{ + if (comment) + trace_seq_puts(s, " /*"); + + trace_seq_puts(s, " <-"); + seq_print_ip_sym(s, entry->graph_ent.retaddr, trace_flags | TRACE_ITER_SYM_OFFSET); + + if (comment) + trace_seq_puts(s, " */"); +} +#else +#define __TRACE_GRAPH_PRINT_RETADDR 0 +#define print_graph_retaddr(_seq, _entry, _tflags, _comment) do { } while (0) +#endif -static void print_graph_retval(struct trace_seq *s, unsigned long retval, - bool leaf, void *func, bool hex_format) +#if defined(CONFIG_FUNCTION_GRAPH_RETVAL) || defined(CONFIG_FUNCTION_GRAPH_RETADDR) + +static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entry *entry, + struct ftrace_graph_ret *graph_ret, void *func, + u32 opt_flags, u32 trace_flags) { unsigned long err_code = 0; + unsigned long retval = 0; + bool print_retaddr = false; + bool print_retval = false; + bool hex_format = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL_HEX); - if (retval == 0 || hex_format) - goto done; +#ifdef CONFIG_FUNCTION_GRAPH_RETVAL + retval = graph_ret->retval; + print_retval = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL); +#endif - /* Check if the return value matches the negative format */ - if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) && - (((u64)retval) >> 32) == 0) { - /* sign extension */ - err_code = (unsigned long)(s32)retval; - } else { - err_code = retval; +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR + print_retaddr = !!(opt_flags & TRACE_GRAPH_PRINT_RETADDR); +#endif + + if (print_retval && retval && !hex_format) { + /* Check if the return value matches the negative format */ + if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) && + (((u64)retval) >> 32) == 0) { + err_code = sign_extend64(retval, 31); + } else { + err_code = retval; + } + + if (!IS_ERR_VALUE(err_code)) + err_code = 0; } - if (!IS_ERR_VALUE(err_code)) - err_code = 0; + if (entry) { + if (entry->ent.type != TRACE_GRAPH_RETADDR_ENT) + print_retaddr = false; -done: - if (leaf) { - if (hex_format || (err_code == 0)) - trace_seq_printf(s, "%ps(); /* = 0x%lx */\n", - func, retval); + trace_seq_printf(s, "%ps();", func); + if (print_retval || print_retaddr) + trace_seq_puts(s, " /*"); else - trace_seq_printf(s, "%ps(); /* = %ld */\n", - func, err_code); + trace_seq_putc(s, '\n'); } else { + print_retaddr = false; + trace_seq_printf(s, "} /* %ps", func); + } + + if (print_retaddr) + print_graph_retaddr(s, (struct fgraph_retaddr_ent_entry *)entry, + trace_flags, false); + + if (print_retval) { if (hex_format || (err_code == 0)) - trace_seq_printf(s, "} /* %ps = 0x%lx */\n", - func, retval); + trace_seq_printf(s, " ret=0x%lx", retval); else - trace_seq_printf(s, "} /* %ps = %ld */\n", - func, err_code); + trace_seq_printf(s, " ret=%ld", err_code); } + + if (!entry || print_retval || print_retaddr) + trace_seq_puts(s, " */\n"); } #else -#define __TRACE_GRAPH_PRINT_RETVAL 0 - -#define print_graph_retval(_seq, _retval, _leaf, _func, _format) do {} while (0) +#define print_graph_retval(_seq, _ent, _ret, _func, _opt_flags, _trace_flags) do {} while (0) #endif @@ -748,14 +895,15 @@ print_graph_entry_leaf(struct trace_iterator *iter, trace_seq_putc(s, ' '); /* - * Write out the function return value if the option function-retval is - * enabled. + * Write out the function return value or return address */ - if (flags & __TRACE_GRAPH_PRINT_RETVAL) - print_graph_retval(s, graph_ret->retval, true, (void *)func, - !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX)); - else + if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR)) { + print_graph_retval(s, entry, graph_ret, + (void *)graph_ret->func + iter->tr->text_delta, + flags, tr->trace_flags); + } else { trace_seq_printf(s, "%ps();\n", (void *)func); + } print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET, cpu, iter->ent->pid, flags); @@ -796,7 +944,12 @@ print_graph_entry_nested(struct trace_iterator *iter, func = call->func + iter->tr->text_delta; - trace_seq_printf(s, "%ps() {\n", (void *)func); + trace_seq_printf(s, "%ps() {", (void *)func); + if (flags & __TRACE_GRAPH_PRINT_RETADDR && + entry->ent.type == TRACE_GRAPH_RETADDR_ENT) + print_graph_retaddr(s, (struct fgraph_retaddr_ent_entry *)entry, + tr->trace_flags, true); + trace_seq_putc(s, '\n'); if (trace_seq_has_overflowed(s)) return TRACE_TYPE_PARTIAL_LINE; @@ -1043,11 +1196,10 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, /* * Always write out the function name and its return value if the - * function-retval option is enabled. + * funcgraph-retval option is enabled. */ if (flags & __TRACE_GRAPH_PRINT_RETVAL) { - print_graph_retval(s, trace->retval, false, (void *)func, - !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX)); + print_graph_retval(s, NULL, trace, (void *)func, flags, tr->trace_flags); } else { /* * If the return function does not have a matching entry, @@ -1162,7 +1314,7 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) * to print out the missing entry which would never go out. */ if (data && data->failed) { - field = &data->ent; + field = &data->ent.ent; iter->cpu = data->cpu; ret = print_graph_entry(field, s, iter, flags); if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) { @@ -1186,6 +1338,16 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) saved = *field; return print_graph_entry(&saved, s, iter, flags); } +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR + case TRACE_GRAPH_RETADDR_ENT: { + struct fgraph_retaddr_ent_entry saved; + struct fgraph_retaddr_ent_entry *rfield; + + trace_assign_type(rfield, entry); + saved = *rfield; + return print_graph_entry((struct ftrace_graph_ent_entry *)&saved, s, iter, flags); + } +#endif case TRACE_GRAPH_RET: { struct ftrace_graph_ret_entry *field; trace_assign_type(field, entry); @@ -1380,6 +1542,13 @@ static struct trace_event graph_trace_entry_event = { .funcs = &graph_functions, }; +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR +static struct trace_event graph_trace_retaddr_entry_event = { + .type = TRACE_GRAPH_RETADDR_ENT, + .funcs = &graph_functions, +}; +#endif + static struct trace_event graph_trace_ret_event = { .type = TRACE_GRAPH_RET, .funcs = &graph_functions @@ -1466,6 +1635,13 @@ static __init int init_graph_trace(void) return 1; } +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR + if (!register_trace_event(&graph_trace_retaddr_entry_event)) { + pr_warn("Warning: could not register graph trace retaddr events\n"); + return 1; + } +#endif + if (!register_trace_event(&graph_trace_ret_event)) { pr_warn("Warning: could not register graph trace events\n"); return 1; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 1469dd8075fa..38b5754790c9 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_PRINT: case TRACE_BRANCH: case TRACE_GRAPH_ENT: + case TRACE_GRAPH_RETADDR_ENT: case TRACE_GRAPH_RET: return 1; } |
