[RFC PATCH] ftrace stack tracer

This is a discussion on [RFC PATCH] ftrace stack tracer within the Kernel forums, part of the Help category; This is another tracer using the ftrace infrastructure, that examines at each function call the size of the stack. If the stack use is greater than the previous max it ...

Go Back   Unix Linux Forum > Unix > Linux > Help > Kernel

FixUnix.com - Unix Linux Forums

Unix Content Register FAQ Calendar Search Today's Posts Mark Forums Read
  #1  
Old 08-27-2008, 11:40 PM
Default [RFC PATCH] ftrace stack tracer


This is another tracer using the ftrace infrastructure, that examines
at each function call the size of the stack. If the stack use is greater
than the previous max it is recorded.

You can always see (and set) the max stack size seen. By setting it
to zero will start the recording again. The backtrace is also available.

For example:

# cat /debug/tracing/stack_max_size
1856

# cat /debug/tracing/stack_trace
[] stack_trace_call+0x8f/0x101
[] ftrace_call+0x5/0x8
[] clocksource_get_next+0x12/0x48
[] update_wall_time+0x538/0x6d1
[] do_timer+0x23/0xb0
[] tick_do_update_jiffies64+0xd9/0xf1
[] tick_sched_timer+0x4a/0xad
[] __run_hrtimer+0x3e/0x75
[] hrtimer_interrupt+0xf1/0x154
[] smp_apic_timer_interrupt+0x71/0x84
[] apic_timer_interrupt+0x2d/0x34
[] finish_task_switch+0x29/0xa0
[] schedule+0x765/0x7be
[] schedule_timeout+0x1b/0x90
[] wait_for_common+0xab/0x101
[] wait_for_completion+0x12/0x14
[] blk_execute_rq+0x84/0x99
[] scsi_execute+0xc2/0x105
[] scsi_execute_req+0x57/0x7f
[] sr_test_unit_ready+0x3e/0x97
[] sr_media_change+0x43/0x205
[] media_changed+0x48/0x77
[] cdrom_media_changed+0x31/0x37
[] sr_block_media_changed+0x16/0x18
[] check_disk_change+0x1b/0x63
[] cdrom_open+0x7a1/0x806
[] sr_block_open+0x78/0x8d
[] do_open+0x90/0x257
[] blkdev_open+0x2d/0x56
[] __dentry_open+0x14d/0x23c
[] nameidata_to_filp+0x24/0x38
[] do_filp_open+0x347/0x626
[] do_sys_open+0x47/0xbc
[] sys_open+0x23/0x2b
[] sysenter_do_call+0x12/0x26

I've tested this on both x86_64 and i386.

Signed-off-by: Steven Rostedt
---
kernel/trace/Kconfig | 9 +
kernel/trace/Makefile | 1
kernel/trace/trace_stack.c | 254 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 264 insertions(+)

Index: linux-tip.git/kernel/trace/Kconfig
================================================== =================
--- linux-tip.git.orig/kernel/trace/Kconfig 2008-08-27 18:47:03.000000000 -0400
+++ linux-tip.git/kernel/trace/Kconfig 2008-08-27 22:28:36.000000000 -0400
@@ -106,6 +106,15 @@ config CONTEXT_SWITCH_TRACER
This tracer gets called from the context switch and records
all switching of tasks.

+config STACK_TRACER
+ bool "Trace max stack"
+ depends on HAVE_FTRACE
+ select FTRACE
+ select STACKTRACE
+ help
+ This tracer records the max stack of the kernel, and displays
+ it in debugfs/tracing/stack_trace
+
config DYNAMIC_FTRACE
bool "enable/disable ftrace tracepoints dynamically"
depends on FTRACE
Index: linux-tip.git/kernel/trace/Makefile
================================================== =================
--- linux-tip.git.orig/kernel/trace/Makefile 2008-08-27 18:47:03.000000000 -0400
+++ linux-tip.git/kernel/trace/Makefile 2008-08-27 22:28:36.000000000 -0400
@@ -19,6 +19,7 @@ obj-$(CONFIG_FTRACE) += trace_functions.
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
+obj-$(CONFIG_STACK_TRACER) += trace_stack.o
obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o

libftrace-y := ftrace.o
Index: linux-tip.git/kernel/trace/trace_stack.c
================================================== =================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-tip.git/kernel/trace/trace_stack.c 2008-08-27 22:40:29.000000000 -0400
@@ -0,0 +1,254 @@
+/*
+ * Copyright (C) 2008 Steven Rostedt
+ *
+ */
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "trace.h"
+
+#define STACK_TRACE_ENTRIES 500
+
+static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES] =
+ { [0 ... (STACK_TRACE_ENTRIES-1)] = ULONG_MAX };
+static struct stack_trace max_stack_trace = {
+ .max_entries = STACK_TRACE_ENTRIES,
+ .entries = stack_dump_trace,
+};
+
+static unsigned long max_stack_size;
+static raw_spinlock_t max_stack_lock =
+ (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+static int stack_trace_disabled __read_mostly;
+static DEFINE_PER_CPU(int, trace_active);
+
+static inline void check_stack(void)
+{
+ unsigned long this_size;
+ unsigned long flags;
+
+ this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
+ this_size = THREAD_SIZE - this_size;
+
+ if (this_size <= max_stack_size)
+ return;
+
+ raw_local_irq_save(flags);
+ __raw_spin_lock(&max_stack_lock);
+
+ /* a race could have already updated it */
+ if (this_size <= max_stack_size)
+ goto out;
+
+ max_stack_size = this_size;
+
+ max_stack_trace.nr_entries = 0;
+ max_stack_trace.skip = 1;
+
+ save_stack_trace(&max_stack_trace);
+
+ out:
+ __raw_spin_unlock(&max_stack_lock);
+ raw_local_irq_restore(flags);
+}
+
+static void
+stack_trace_call(unsigned long ip, unsigned long parent_ip)
+{
+ int cpu, resched;
+
+ if (unlikely(!ftrace_enabled || stack_trace_disabled))
+ return;
+
+ resched = need_resched();
+ preempt_disable_notrace();
+
+ cpu = raw_smp_processor_id();
+ /* no atomic needed, we only modify this variable by this cpu */
+ if (per_cpu(trace_active, cpu)++ != 0)
+ goto out;
+
+ check_stack();
+
+ out:
+ per_cpu(trace_active, cpu)--;
+ /* prevent recursion in schedule */
+ if (resched)
+ preempt_enable_no_resched_notrace();
+ else
+ preempt_enable_notrace();
+}
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+ .func = stack_trace_call,
+};
+
+static ssize_t
+stack_max_size_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long *ptr = filp->private_data;
+ char buf[64];
+ int r;
+
+ r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
+ if (r > sizeof(buf))
+ r = sizeof(buf);
+ return simple_read_from_buffer(ubuf, count, ppos, buf, r);
+}
+
+static ssize_t
+stack_max_size_write(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ long *ptr = filp->private_data;
+ unsigned long val, flags;
+ char buf[64];
+ int ret;
+
+ if (count >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, count))
+ return -EFAULT;
+
+ buf[count] = 0;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ raw_local_irq_save(flags);
+ __raw_spin_lock(&max_stack_lock);
+ *ptr = val;
+ __raw_spin_unlock(&max_stack_lock);
+ raw_local_irq_restore(flags);
+
+ return count;
+}
+
+static struct file_operations stack_max_size_fops = {
+ .open = tracing_open_generic,
+ .read = stack_max_size_read,
+ .write = stack_max_size_write,
+};
+
+static void *
+t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ unsigned long *t = m->private;
+
+ (*pos)++;
+
+ if (!t || *t == ULONG_MAX)
+ return NULL;
+
+ t++;
+ m->private = t;
+
+ return t;
+}
+
+static void *t_start(struct seq_file *m, loff_t *pos)
+{
+ unsigned long *t = m->private;
+ loff_t l = 0;
+
+ local_irq_disable();
+ __raw_spin_lock(&max_stack_lock);
+
+ for (; t && l < *pos; t = t_next(m, t, &l))
+ ;
+
+ return t;
+}
+
+static void t_stop(struct seq_file *m, void *p)
+{
+ __raw_spin_unlock(&max_stack_lock);
+ local_irq_enable();
+}
+
+static int trace_lookup_stack(struct seq_file *m, unsigned long addr)
+{
+#ifdef CONFIG_KALLSYMS
+ char str[KSYM_SYMBOL_LEN];
+
+ sprint_symbol(str, addr);
+
+ return seq_printf(m, "[<%p>] %s\n", (void*)addr, str);
+#else
+ return seq_printf(m, "%p\n", (void*)addr);
+#endif
+}
+
+static int t_show(struct seq_file *m, void *v)
+{
+ unsigned long *t = v;
+
+ if (!t || *t == ULONG_MAX)
+ return 0;
+
+ trace_lookup_stack(m, *t);
+
+ return 0;
+}
+
+static struct seq_operations stack_trace_seq_ops = {
+ .start = t_start,
+ .next = t_next,
+ .stop = t_stop,
+ .show = t_show,
+};
+
+static int stack_trace_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ ret = seq_open(file, &stack_trace_seq_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = stack_dump_trace;
+ }
+
+ return ret;
+}
+
+static struct file_operations stack_trace_fops = {
+ .open = stack_trace_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+};
+
+static __init int stack_trace_init(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+
+ entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
+ &max_stack_size, &stack_max_size_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'stack_max_size' entry\n");
+
+ entry = debugfs_create_file("stack_trace", 0444, d_tracer,
+ NULL, &stack_trace_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'stack_trace' entry\n");
+
+ register_ftrace_function(&trace_ops);
+
+ return 0;
+}
+
+device_initcall(stack_trace_init);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Reply With Quote
  #2  
Old 08-28-2008, 03:50 AM
Default Re: [RFC PATCH] ftrace stack tracer


* Steven Rostedt wrote:

> This is another tracer using the ftrace infrastructure, that examines
> at each function call the size of the stack. If the stack use is
> greater than the previous max it is recorded.
>
> You can always see (and set) the max stack size seen. By setting it to
> zero will start the recording again. The backtrace is also available.
>
> For example:
>
> # cat /debug/tracing/stack_max_size
> 1856
>
> # cat /debug/tracing/stack_trace
> [] stack_trace_call+0x8f/0x101
> [] ftrace_call+0x5/0x8
> [] clocksource_get_next+0x12/0x48
> [] update_wall_time+0x538/0x6d1
> [] do_timer+0x23/0xb0
> [] tick_do_update_jiffies64+0xd9/0xf1
> [] tick_sched_timer+0x4a/0xad
> [] __run_hrtimer+0x3e/0x75
> [] hrtimer_interrupt+0xf1/0x154
> [] smp_apic_timer_interrupt+0x71/0x84
> [] apic_timer_interrupt+0x2d/0x34
> [] finish_task_switch+0x29/0xa0
> [] schedule+0x765/0x7be
> [] schedule_timeout+0x1b/0x90
> [] wait_for_common+0xab/0x101
> [] wait_for_completion+0x12/0x14
> [] blk_execute_rq+0x84/0x99
> [] scsi_execute+0xc2/0x105
> [] scsi_execute_req+0x57/0x7f
> [] sr_test_unit_ready+0x3e/0x97
> [] sr_media_change+0x43/0x205
> [] media_changed+0x48/0x77
> [] cdrom_media_changed+0x31/0x37
> [] sr_block_media_changed+0x16/0x18
> [] check_disk_change+0x1b/0x63
> [] cdrom_open+0x7a1/0x806
> [] sr_block_open+0x78/0x8d
> [] do_open+0x90/0x257
> [] blkdev_open+0x2d/0x56
> [] __dentry_open+0x14d/0x23c
> [] nameidata_to_filp+0x24/0x38
> [] do_filp_open+0x347/0x626
> [] do_sys_open+0x47/0xbc
> [] sys_open+0x23/0x2b
> [] sysenter_do_call+0x12/0x26
>
> I've tested this on both x86_64 and i386.


very nice!

This is even more efficient in practice i think than 'make stackcheck'
type of analysis, because it measures the stack footprint in practice.

That way we can prove/disprove theories whether a regression was caused
by stack overflow or not.

We can also do practical profiling of worst-case stack footprint - much
like the latency tracer works. (just in a different 'space' of
'latency'.)

A couple of suggestions:

- could we somehow include the per stacktrace line stack frame size
information as well? This probably needs an extension to
save_stack_trace() though.

- it would be nice to introduce a treshold and automation to emit a
WARN_ON() exactly once during bootup if this threshold is ever
exceeded. That way automated testing efforts like randconfig
testing would automatically do worst-case-stack-footprint testing as
well.

- please link this tracer plugin into the stack redzone check mechanism
we already have. Right now our stack overflow warnings are
statistical: they only happen if an irq handler happens to notice a
deep stack, or if task teardown happens to see corruption of a
specific area of the stack. Neither of which are particularly
efficient in practice. One thing to be careful about is when to
print: i'd suggest to introduce a stack overflow warning that uses
only early_printk() and not the regular printk. This is a truly
emergency mechanism and getting the message out in time (before we
self-destruct via corrupting the task structure) is important.

Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Reply With Quote
  #3  
Old 08-28-2008, 03:50 AM
Default Re: [RFC PATCH] ftrace stack tracer


* Steven Rostedt wrote:

> This is another tracer using the ftrace infrastructure, that examines
> at each function call the size of the stack. If the stack use is
> greater than the previous max it is recorded.


i've created a new, -git based topic for this: tip/tracing/stack-tracer,
applied your new stacktrace plugin and started testing it.

Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Reply With Quote
Reply

Thread Tools


All times are GMT -5. The time now is 05:53 PM.

In an effort to better serve ads to our visitors, cookies are used on Fixunix.com. For more information, check out our Privacy Policy.

Powered by vBulletin® Version 3.7.2
Copyright ©2000 - 2008, Jelsoft Enterprises Ltd.
Search Engine Friendly URLs by vBSEO 3.2.0
Ad Management by RedTyger