[PATCH 0/2] ftrace: handle NMIs safely - Kernel

This is a discussion on [PATCH 0/2] ftrace: handle NMIs safely - Kernel ; The robustness of ftrace has been the focus of the code modification in 2.6.28. There is one remaining issue that needed to be addressed. This was the case of NMIs. To state the problem: if a process on one CPU ...

+ Reply to Thread
Results 1 to 17 of 17

Thread: [PATCH 0/2] ftrace: handle NMIs safely

  1. [PATCH 0/2] ftrace: handle NMIs safely

    The robustness of ftrace has been the focus of the code modification in
    2.6.28. There is one remaining issue that needed to be addressed.
    This was the case of NMIs.

    To state the problem: if a process on one CPU is modifying code that is
    being executed on another CPU, that CPU will have undefined results,
    and possibly issue a GPF. To cover this with dynamic ftrace, it calls
    kstop_machine to prevent the other CPUs from issuing interrupts or
    executing any other process. The problem we have is that this does not
    protect us against NMIs.

    Discussing this with Arjan and Ingo, we came up with this RCU like solution.
    When the patcher wants to modify code, it must do the following steps:

    1) load the instruction pointer to modify into an IP buffer and the
    contents that will be modified into a "code" buffer.
    2) set a write_bit flag.
    3) wait for any running NMIs to finish.
    4) modify the code.
    5) clear the write bit flag.
    6) wait for any running NMIs to finish
    7) return the status of the write.

    In the NMI handler, the first thing it will do is to check if the
    write_bit is set, and if so, the NMI performs the write.
    We do not worry about multiple writers writing to the code at the
    same time, they are all writing the same thing.

    The idea is that executing code will not have a problem if another
    CPU has a process writing to that code with the same value as
    what is in the code. In-other-words, the code does not change.

    To help verify this, I wrote this module and let it run for a while:

    -----------------------------------------------
    #include
    #include

    #define WRITE_SIZE 50

    static int write_thread(void *arg)
    {
    unsigned char *ptr = (unsigned char *)schedule;
    static unsigned char write_buf[WRITE_SIZE];


    memcpy(write_buf, ptr, WRITE_SIZE);

    while (!kthread_should_stop()) {
    memcpy(ptr, write_buf, WRITE_SIZE);
    msleep(1);
    }

    return 0;
    }

    static struct task_struct *writer;

    static int __init writer_torture_init(void)
    {
    int ret;

    writer = kthread_run(write_thread, NULL, "write_tortured");
    ret = PTR_ERR(writer);

    if (IS_ERR(writer)) {
    writer = NULL;
    return ret;
    }

    return 0;
    }

    static void writer_torture_exit(void)
    {
    if (writer)
    kthread_stop(writer);
    }

    module_init(writer_torture_init);
    module_exit(writer_torture_exit);

    MODULE_AUTHOR("Steven Rostedt");
    MODULE_DESCRIPTION("Write code torture");
    MODULE_LICENSE("GPL");
    -----------------------------------------------

    Do not run the dynamic ftrace while running this module ;-)

    The second patch adds stats to the dyn_ftrace_total_info that adds
    NR-times-nmi-detect NR-times-NMI-wrote

    -- Steve

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. [PATCH 2/2] ftrace: nmi update statistics

    This patch adds dynamic ftrace NMI update statistics to the
    /debugfs/tracing/dyn_ftrace_total_info stat file.

    Signed-off-by: Steven Rostedt
    ---
    arch/x86/kernel/ftrace.c | 26 ++++++++++++++++++++++++--
    kernel/trace/trace.c | 31 ++++++++++++++++++++++++-------
    2 files changed, 48 insertions(+), 9 deletions(-)

    Index: linux-tip.git/arch/x86/kernel/ftrace.c
    ================================================== =================
    --- linux-tip.git.orig/arch/x86/kernel/ftrace.c 2008-10-30 15:30:12.000000000 -0400
    +++ linux-tip.git/arch/x86/kernel/ftrace.c 2008-10-30 15:51:51.000000000 -0400
    @@ -91,6 +91,19 @@ static int mod_code_write;
    static void *mod_code_ip;
    static void *mod_code_newcode;

    +static int nmi_wait_count;
    +static atomic_t nmi_update_count;
    +
    +int ftrace_arch_read_dyn_info(char *buf, int size)
    +{
    + int r;
    +
    + r = snprintf(buf, size, "%u %u",
    + nmi_wait_count,
    + atomic_read(&nmi_update_count));
    + return r;
    +}
    +
    static void ftrace_mod_code(void)
    {
    /*
    @@ -109,8 +122,10 @@ void ftrace_nmi_enter(void)
    atomic_inc(&in_nmi);
    /* Must have in_nmi seen before reading write flag */
    smp_mb();
    - if (mod_code_write)
    + if (mod_code_write) {
    ftrace_mod_code();
    + atomic_inc(&nmi_update_count);
    + }
    }

    void ftrace_nmi_exit(void)
    @@ -122,8 +137,15 @@ void ftrace_nmi_exit(void)

    static void wait_for_nmi(void)
    {
    - while (atomic_read(&in_nmi))
    + int waited = 0;
    +
    + while (atomic_read(&in_nmi)) {
    + waited = 1;
    cpu_relax();
    + }
    +
    + if (waited)
    + nmi_wait_count++;
    }

    static int
    Index: linux-tip.git/kernel/trace/trace.c
    ================================================== =================
    --- linux-tip.git.orig/kernel/trace/trace.c 2008-10-30 15:18:45.000000000 -0400
    +++ linux-tip.git/kernel/trace/trace.c 2008-10-30 15:51:51.000000000 -0400
    @@ -2837,22 +2837,39 @@ static struct file_operations tracing_ma

    #ifdef CONFIG_DYNAMIC_FTRACE

    +#define DYN_INFO_BUF_SIZE 1023
    +static char ftrace_dyn_info_buffer[DYN_INFO_BUF_SIZE+1];
    +static DEFINE_MUTEX(dyn_info_mutex);
    +
    +int __weak ftrace_arch_read_dyn_info(char *buf, int size)
    +{
    + return 0;
    +}
    +
    static ssize_t
    -tracing_read_long(struct file *filp, char __user *ubuf,
    +tracing_read_dyn_info(struct file *filp, char __user *ubuf,
    size_t cnt, loff_t *ppos)
    {
    unsigned long *p = filp->private_data;
    - char buf[64];
    + char *buf = ftrace_dyn_info_buffer;
    int r;

    - r = sprintf(buf, "%ld\n", *p);
    + mutex_lock(&dyn_info_mutex);
    + r = sprintf(buf, "%ld ", *p);
    +
    + r += ftrace_arch_read_dyn_info(buf+r, DYN_INFO_BUF_SIZE-r);
    + buf[r++] = '\n';
    +
    + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
    +
    + mutex_unlock(&dyn_info_mutex);

    - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
    + return r;
    }

    -static struct file_operations tracing_read_long_fops = {
    +static struct file_operations tracing_dyn_info_fops = {
    .open = tracing_open_generic,
    - .read = tracing_read_long,
    + .read = tracing_read_dyn_info,
    };
    #endif

    @@ -2961,7 +2978,7 @@ static __init int tracer_init_debugfs(vo
    #ifdef CONFIG_DYNAMIC_FTRACE
    entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
    &ftrace_update_tot_cnt,
    - &tracing_read_long_fops);
    + &tracing_dyn_info_fops);
    if (!entry)
    pr_warning("Could not create debugfs "
    "'dyn_ftrace_total_info' entry\n");

    --
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  3. Re: [PATCH 2/2] ftrace: nmi update statistics

    On Thu, 30 Oct 2008 16:08:33 -0400
    Steven Rostedt wrote:

    > This patch adds dynamic ftrace NMI update statistics to the
    > /debugfs/tracing/dyn_ftrace_total_info stat file.
    >
    > Signed-off-by: Steven Rostedt
    > ---
    > arch/x86/kernel/ftrace.c | 26 ++++++++++++++++++++++++--
    > kernel/trace/trace.c | 31 ++++++++++++++++++++++++-------


    No header files were modified, but ftrace_arch_read_dyn_info() should
    have been declared so that the above two compilation units see the
    declaration.

    >
    > Index: linux-tip.git/arch/x86/kernel/ftrace.c
    > ================================================== =================
    > --- linux-tip.git.orig/arch/x86/kernel/ftrace.c 2008-10-30 15:30:12.000000000 -0400
    > +++ linux-tip.git/arch/x86/kernel/ftrace.c 2008-10-30 15:51:51.000000000 -0400
    > @@ -91,6 +91,19 @@ static int mod_code_write;
    > static void *mod_code_ip;
    > static void *mod_code_newcode;
    >
    > +static int nmi_wait_count;
    > +static atomic_t nmi_update_count;




    > +int ftrace_arch_read_dyn_info(char *buf, int size)
    > +{
    > + int r;
    > +
    > + r = snprintf(buf, size, "%u %u",
    > + nmi_wait_count,
    > + atomic_read(&nmi_update_count));
    > + return r;
    > +}


    Yes, nmi_wait_count is an unsigned quantity. Might as well define it
    thusly?

    > static void ftrace_mod_code(void)
    > {
    > /*
    > @@ -109,8 +122,10 @@ void ftrace_nmi_enter(void)
    > atomic_inc(&in_nmi);
    > /* Must have in_nmi seen before reading write flag */
    > smp_mb();
    > - if (mod_code_write)
    > + if (mod_code_write) {
    > ftrace_mod_code();
    > + atomic_inc(&nmi_update_count);
    > + }
    > }
    >
    > void ftrace_nmi_exit(void)
    > @@ -122,8 +137,15 @@ void ftrace_nmi_exit(void)
    >
    > static void wait_for_nmi(void)
    > {
    > - while (atomic_read(&in_nmi))
    > + int waited = 0;
    > +
    > + while (atomic_read(&in_nmi)) {
    > + waited = 1;
    > cpu_relax();
    > + }
    > +
    > + if (waited)
    > + nmi_wait_count++;
    > }
    >
    > static int
    > Index: linux-tip.git/kernel/trace/trace.c
    > ================================================== =================
    > --- linux-tip.git.orig/kernel/trace/trace.c 2008-10-30 15:18:45.000000000 -0400
    > +++ linux-tip.git/kernel/trace/trace.c 2008-10-30 15:51:51.000000000 -0400
    > @@ -2837,22 +2837,39 @@ static struct file_operations tracing_ma
    >
    > #ifdef CONFIG_DYNAMIC_FTRACE
    >
    > +#define DYN_INFO_BUF_SIZE 1023
    > +static char ftrace_dyn_info_buffer[DYN_INFO_BUF_SIZE+1];


    Could be made local to tracing_read_dyn_info().


    Could just be

    static char ftrace_dyn_info_buffer[1024];

    then use sizeof/ARRAY_SIZE elsewhere. I think this is a bit safer and
    more readable - the reader doesn't have to run around the code checking
    that the correct #define was used in both places.

    > +static DEFINE_MUTEX(dyn_info_mutex);
    > +
    > +int __weak ftrace_arch_read_dyn_info(char *buf, int size)
    > +{
    > + return 0;
    > +}
    > +
    > static ssize_t
    > -tracing_read_long(struct file *filp, char __user *ubuf,
    > +tracing_read_dyn_info(struct file *filp, char __user *ubuf,
    > size_t cnt, loff_t *ppos)
    > {
    > unsigned long *p = filp->private_data;
    > - char buf[64];
    > + char *buf = ftrace_dyn_info_buffer;
    > int r;
    >
    > - r = sprintf(buf, "%ld\n", *p);
    > + mutex_lock(&dyn_info_mutex);
    > + r = sprintf(buf, "%ld ", *p);
    > +
    > + r += ftrace_arch_read_dyn_info(buf+r, DYN_INFO_BUF_SIZE-r);
    > + buf[r++] = '\n';
    > +
    > + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
    > +
    > + mutex_unlock(&dyn_info_mutex);
    >
    > - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
    > + return r;
    > }
    >
    > -static struct file_operations tracing_read_long_fops = {
    > +static struct file_operations tracing_dyn_info_fops = {
    > .open = tracing_open_generic,
    > - .read = tracing_read_long,
    > + .read = tracing_read_dyn_info,
    > };
    > #endif
    >
    > @@ -2961,7 +2978,7 @@ static __init int tracer_init_debugfs(vo
    > #ifdef CONFIG_DYNAMIC_FTRACE
    > entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
    > &ftrace_update_tot_cnt,
    > - &tracing_read_long_fops);
    > + &tracing_dyn_info_fops);
    > if (!entry)
    > pr_warning("Could not create debugfs "
    > "'dyn_ftrace_total_info' entry\n");
    >


    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  4. Re: [PATCH 0/2] ftrace: handle NMIs safely


    * Steven Rostedt wrote:

    > The robustness of ftrace has been the focus of the code modification
    > in 2.6.28. There is one remaining issue that needed to be addressed.
    > This was the case of NMIs.


    applied to tip/tracing/nmisafe, thanks Steve!

    this looks a lot nicer approach than either putting some sort of lock
    into NMI context (yuck) or the disabling of NMIs (not really possible
    in a generic way architecturally).

    the impact is quite non-trivial, so i dont think this is v2.6.28
    material.

    Ingo
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  5. Re: [PATCH 1/2] ftrace: nmi safe code modification

    On Thu, 30 Oct 2008 16:08:32 -0400
    Steven Rostedt wrote:

    > Modifying code is something that needs special care. On SMP boxes,
    > if code that is being modified is also being executed on another CPU,
    > that CPU will have undefined results.
    >
    > The dynamic ftrace uses kstop_machine to make the system act like a
    > uniprocessor system. But this does not address NMIs, that can still
    > run on other CPUs.
    >
    > One approach to handle this is to make all code that are used by NMIs
    > not be traced. But NMIs can call notifiers that spread throughout the
    > kernel and this will be very hard to maintain, and the chance of missing
    > a function is very high.
    >
    > The approach that this patch takes is to have the NMIs modify the code
    > if the modification is taking place. The way this works is that just
    > writing to code executing on another CPU is not harmful if what is
    > written is the same as what exists.


    Seems like it'll work.

    > +#ifndef __ASSEMBLY__
    > +#define ftrace_nmi_enter() do { } while (0)
    > +#define ftrace_nmi_exit() do { } while (0)
    > +#endif
    > ...
    > +#ifndef __ASSEMBLY__
    > +#define ftrace_nmi_enter() do { } while (0)
    > +#define ftrace_nmi_exit() do { } while (0)
    > +#endif


    These could all be written in C. If there's a reson to write them in
    cpp then the `#ifndef __ASSEMBLY__' isn't really needed.

    > +#endif
    > +
    > #ifdef CONFIG_MCOUNT
    > #define MCOUNT_ADDR ((long)(_mcount))
    > #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
    > Index: linux-tip.git/arch/x86/include/asm/ftrace.h
    > ================================================== =================
    > --- linux-tip.git.orig/arch/x86/include/asm/ftrace.h 2008-10-30 10:26:28.000000000 -0400
    > +++ linux-tip.git/arch/x86/include/asm/ftrace.h 2008-10-30 13:16:22.000000000 -0400
    > @@ -17,6 +17,21 @@ static inline unsigned long ftrace_call_
    > */
    > return addr - 1;
    > }
    > +
    > +#ifdef CONFIG_DYNAMIC_FTRACE
    > +extern void ftrace_nmi_enter(void);
    > +extern void ftrace_nmi_exit(void);
    > +#else
    > +#define ftrace_nmi_enter() do { } while (0)
    > +#define ftrace_nmi_exit() do { } while (0)


    Either this one misses the `#ifndef __ASSEMBLY__' ...

    > +#endif
    > +#endif
    > +
    > +#else /* CONFIG_FUNCTION_TRACER */
    > +
    > +#ifndef __ASSEMBLY__
    > +#define ftrace_nmi_enter() do { } while (0)
    > +#define ftrace_nmi_exit() do { } while (0)
    > #endif


    or this one didn't need it.

    >
    > #endif /* CONFIG_FUNCTION_TRACER */
    > Index: linux-tip.git/arch/x86/kernel/ftrace.c
    > ================================================== =================
    > --- linux-tip.git.orig/arch/x86/kernel/ftrace.c 2008-10-30 10:27:07.000000000 -0400
    > +++ linux-tip.git/arch/x86/kernel/ftrace.c 2008-10-30 16:06:42.000000000 -0400
    > @@ -56,6 +56,111 @@ unsigned char *ftrace_call_replace(unsig
    > return calc.code;
    > }
    >
    > +/*
    > + * Modifying code must take extra care. On an SMP machine, if
    > + * the code being modified is also being executed on another CPU
    > + * that CPU will have undefined results and possibly take a GPF.
    > + * We use kstop_machine to stop other CPUS from exectuing code.
    > + * But this does not stop NMIs from happening. We still need
    > + * to protect against that. We separate out the modification of
    > + * the code to take care of this.
    > + *
    > + * Two buffers are added: An IP buffer and a "code" buffer.
    > + *
    > + * 1) Put in the instruction pointer into the IP buffer


    s/in //

    > + * and the new code into the "code" buffer.
    > + * 2) Set a flag that says we are modifying code
    > + * 3) Wait for any running NMIs to finish.
    > + * 4) Write the code
    > + * 5) clear the flag.
    > + * 6) Wait for any running NMIs to finish.
    > + *
    > + * If an NMI is executed, the first thing it does is to call
    > + * "ftrace_nmi_enter". This will check if the flag is set to write
    > + * and if it is, it will write what is in the IP and "code" buffers.
    > + *
    > + * The trick is, it does not matter if everyone is writing the same
    > + * content to the code location. Also, if a CPU is executing code
    > + * it is OK to write to that code location if the contents being written
    > + * are the same as what exists.
    > + */
    > +
    > +static atomic_t in_nmi;


    Should formally have an ATONIC_INIT. If we're going to formalise the
    "all zeroes is OK for atomic_t's" rule then we can leave this as-is.

    > +static int mod_code_status;
    > +static int mod_code_write;
    > +static void *mod_code_ip;
    > +static void *mod_code_newcode;


    Some comments decribing the global state would be nice.

    > +static void ftrace_mod_code(void)
    > +{
    > + /*
    > + * Yes, more than one CPU process can be writing to mod_code_status.
    > + * (and the code itself)
    > + * But if one were to fail, then they all should, and if one were
    > + * to succeed, then they all should.
    > + */
    > + mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
    > + MCOUNT_INSN_SIZE);
    > +
    > +}
    > +
    > +void ftrace_nmi_enter(void)
    > +{
    > + atomic_inc(&in_nmi);
    > + /* Must have in_nmi seen before reading write flag */
    > + smp_mb();
    > + if (mod_code_write)
    > + ftrace_mod_code();
    > +}
    > +
    > +void ftrace_nmi_exit(void)
    > +{
    > + /* Finish all executions before clearing in_nmi */
    > + smp_wmb();
    > + atomic_dec(&in_nmi);
    > +}
    > +
    > +static void wait_for_nmi(void)
    > +{
    > + while (atomic_read(&in_nmi))
    > + cpu_relax();
    > +}
    > +
    > +static int
    > +do_ftrace_mod_code(unsigned long ip, void *new_code)
    > +{
    > + mod_code_ip = (void *)ip;
    > + mod_code_newcode = new_code;
    > +
    > + /* The buffers need to be visible before we let NMIs write them */
    > + smp_wmb();
    > +
    > + mod_code_write = 1;
    > +
    > + /* Make sure write bit is visible before we wait on NMIs */
    > + smp_mb();
    > +
    > + wait_for_nmi();
    > +
    > + /* Make sure all running NMIs have finished before we write the code */
    > + smp_mb();
    > +
    > + ftrace_mod_code();
    > +
    > + /* Make sure the write happens before clearing the bit */
    > + smp_wmb();
    > +
    > + mod_code_write = 0;
    > +
    > + /* make sure NMIs see the cleared bit */
    > + smp_mb();
    > +
    > + wait_for_nmi();
    > +
    > + return mod_code_status;
    > +}


    I guess the weakness here is that the code will only allow a single
    contiguous hunk of text to be modified. One could envisage situations
    where two or more separate areas of memory need to be modified
    atomically/together.

    I guess we can cross that bridge when we fall off it.

    > --- linux-tip.git.orig/include/linux/hardirq.h 2008-10-30 10:27:07.000000000 -0400
    > +++ linux-tip.git/include/linux/hardirq.h 2008-10-30 10:27:09.000000000 -0400
    > @@ -5,6 +5,7 @@
    > #include
    > #include
    > #include
    > +#include
    > #include
    >
    > /*
    > @@ -161,7 +162,17 @@ extern void irq_enter(void);
    > */
    > extern void irq_exit(void);
    >
    > -#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0)
    > -#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0)
    > +#define nmi_enter() \
    > + do { \
    > + ftrace_nmi_enter(); \
    > + lockdep_off(); \
    > + __irq_enter(); \
    > + } while (0)
    > +#define nmi_exit() \
    > + do { \
    > + __irq_exit(); \
    > + lockdep_on(); \
    > + ftrace_nmi_exit(); \
    > + } while (0)
    >



    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  6. Re: [PATCH 1/2] ftrace: nmi safe code modification


    * Andrew Morton wrote:

    > > + /* make sure NMIs see the cleared bit */
    > > + smp_mb();
    > > +
    > > + wait_for_nmi();
    > > +
    > > + return mod_code_status;
    > > +}

    >
    > I guess the weakness here is that the code will only allow a single
    > contiguous hunk of text to be modified. One could envisage
    > situations where two or more separate areas of memory need to be
    > modified atomically/together.
    >
    > I guess we can cross that bridge when we fall off it.


    yeah - the whole 'transaction' concept can be extended easily, in just
    one place.

    Not that i think that it would really be useful to go beyond the
    current (target,len) abstraction - the less complex code patching is
    done, the better.

    Ingo
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  7. Re: [PATCH 1/2] ftrace: nmi safe code modification


    On Thu, 30 Oct 2008, Andrew Morton wrote:
    >
    > > +#ifndef __ASSEMBLY__
    > > +#define ftrace_nmi_enter() do { } while (0)
    > > +#define ftrace_nmi_exit() do { } while (0)
    > > +#endif
    > > ...
    > > +#ifndef __ASSEMBLY__
    > > +#define ftrace_nmi_enter() do { } while (0)
    > > +#define ftrace_nmi_exit() do { } while (0)
    > > +#endif

    >
    > These could all be written in C. If there's a reson to write them in
    > cpp then the `#ifndef __ASSEMBLY__' isn't really needed.


    I could do the C macro, and you are right, I did not need the __ASSEMBLY__
    part. I guess that was me just being over-protective :-/

    Which would you prefer? Changing to C or removing the __ASSEMBLY__?

    >
    > > +#endif
    > > +
    > > #ifdef CONFIG_MCOUNT
    > > #define MCOUNT_ADDR ((long)(_mcount))
    > > #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
    > > Index: linux-tip.git/arch/x86/include/asm/ftrace.h
    > > ================================================== =================
    > > --- linux-tip.git.orig/arch/x86/include/asm/ftrace.h 2008-10-30 10:26:28.000000000 -0400
    > > +++ linux-tip.git/arch/x86/include/asm/ftrace.h 2008-10-30 13:16:22.000000000 -0400
    > > @@ -17,6 +17,21 @@ static inline unsigned long ftrace_call_
    > > */
    > > return addr - 1;
    > > }
    > > +
    > > +#ifdef CONFIG_DYNAMIC_FTRACE
    > > +extern void ftrace_nmi_enter(void);
    > > +extern void ftrace_nmi_exit(void);
    > > +#else
    > > +#define ftrace_nmi_enter() do { } while (0)
    > > +#define ftrace_nmi_exit() do { } while (0)

    >
    > Either this one misses the `#ifndef __ASSEMBLY__' ...
    >
    > > +#endif
    > > +#endif
    > > +
    > > +#else /* CONFIG_FUNCTION_TRACER */
    > > +
    > > +#ifndef __ASSEMBLY__
    > > +#define ftrace_nmi_enter() do { } while (0)
    > > +#define ftrace_nmi_exit() do { } while (0)
    > > #endif

    >
    > or this one didn't need it.


    Wrong in both cases (If we ignore the fact that MACROS do not need the
    __ASSEMBLY__) ;-)

    Notice the two #endif in a row? Here's the full code:

    #ifdef CONFIG_FUNCTION_TRACER
    #define MCOUNT_ADDR ((long)(mcount))
    #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */

    #ifndef __ASSEMBLY__
    extern void mcount(void);

    static inline unsigned long ftrace_call_adjust(unsigned long addr)
    {
    /*
    * call mcount is "e8 <4 byte offset>"
    * The addr points to the 4 byte offset and the caller of this
    * function wants the pointer to e8. Simply subtract one.
    */
    return addr - 1;
    }

    #ifdef CONFIG_DYNAMIC_FTRACE
    extern void ftrace_nmi_enter(void);
    extern void ftrace_nmi_exit(void);
    #else
    #define ftrace_nmi_enter() do { } while (0)
    #define ftrace_nmi_exit() do { } while (0)
    #endif
    #endif

    #else /* CONFIG_FUNCTION_TRACER */

    #ifndef __ASSEMBLY__
    #define ftrace_nmi_enter() do { } while (0)
    #define ftrace_nmi_exit() do { } while (0)
    #endif

    #endif /* CONFIG_FUNCTION_TRACER */


    I guess I could add a comment /* __ASSEMBLY__ */ on that second endif.

    >
    > >
    > > #endif /* CONFIG_FUNCTION_TRACER */
    > > Index: linux-tip.git/arch/x86/kernel/ftrace.c
    > > ================================================== =================
    > > --- linux-tip.git.orig/arch/x86/kernel/ftrace.c 2008-10-30 10:27:07.000000000 -0400
    > > +++ linux-tip.git/arch/x86/kernel/ftrace.c 2008-10-30 16:06:42.000000000 -0400
    > > @@ -56,6 +56,111 @@ unsigned char *ftrace_call_replace(unsig
    > > return calc.code;
    > > }
    > >
    > > +/*
    > > + * Modifying code must take extra care. On an SMP machine, if
    > > + * the code being modified is also being executed on another CPU
    > > + * that CPU will have undefined results and possibly take a GPF.
    > > + * We use kstop_machine to stop other CPUS from exectuing code.
    > > + * But this does not stop NMIs from happening. We still need
    > > + * to protect against that. We separate out the modification of
    > > + * the code to take care of this.
    > > + *
    > > + * Two buffers are added: An IP buffer and a "code" buffer.
    > > + *
    > > + * 1) Put in the instruction pointer into the IP buffer

    >
    > s/in //


    OK.

    >
    > > + * and the new code into the "code" buffer.
    > > + * 2) Set a flag that says we are modifying code
    > > + * 3) Wait for any running NMIs to finish.
    > > + * 4) Write the code
    > > + * 5) clear the flag.
    > > + * 6) Wait for any running NMIs to finish.
    > > + *
    > > + * If an NMI is executed, the first thing it does is to call
    > > + * "ftrace_nmi_enter". This will check if the flag is set to write
    > > + * and if it is, it will write what is in the IP and "code" buffers.
    > > + *
    > > + * The trick is, it does not matter if everyone is writing the same
    > > + * content to the code location. Also, if a CPU is executing code
    > > + * it is OK to write to that code location if the contents being written
    > > + * are the same as what exists.
    > > + */
    > > +
    > > +static atomic_t in_nmi;

    >
    > Should formally have an ATONIC_INIT. If we're going to formalise the
    > "all zeroes is OK for atomic_t's" rule then we can leave this as-is.


    This is a bad habit of mine :-/ I would like to formalize the "all zeroes
    is OK for atomic_t's" rule, but I'm not in that position to do so.

    >
    > > +static int mod_code_status;
    > > +static int mod_code_write;
    > > +static void *mod_code_ip;
    > > +static void *mod_code_newcode;

    >
    > Some comments decribing the global state would be nice.


    Hmm, I thought I commented enough about this in the above comments. But
    I guess it would not hurt to add more.

    >
    > > +static void ftrace_mod_code(void)
    > > +{
    > > + /*
    > > + * Yes, more than one CPU process can be writing to mod_code_status.
    > > + * (and the code itself)
    > > + * But if one were to fail, then they all should, and if one were
    > > + * to succeed, then they all should.
    > > + */
    > > + mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
    > > + MCOUNT_INSN_SIZE);
    > > +
    > > +}
    > > +
    > > +void ftrace_nmi_enter(void)
    > > +{
    > > + atomic_inc(&in_nmi);
    > > + /* Must have in_nmi seen before reading write flag */
    > > + smp_mb();
    > > + if (mod_code_write)
    > > + ftrace_mod_code();
    > > +}
    > > +
    > > +void ftrace_nmi_exit(void)
    > > +{
    > > + /* Finish all executions before clearing in_nmi */
    > > + smp_wmb();
    > > + atomic_dec(&in_nmi);
    > > +}
    > > +
    > > +static void wait_for_nmi(void)
    > > +{
    > > + while (atomic_read(&in_nmi))
    > > + cpu_relax();
    > > +}
    > > +
    > > +static int
    > > +do_ftrace_mod_code(unsigned long ip, void *new_code)
    > > +{
    > > + mod_code_ip = (void *)ip;
    > > + mod_code_newcode = new_code;
    > > +
    > > + /* The buffers need to be visible before we let NMIs write them */
    > > + smp_wmb();
    > > +
    > > + mod_code_write = 1;
    > > +
    > > + /* Make sure write bit is visible before we wait on NMIs */
    > > + smp_mb();
    > > +
    > > + wait_for_nmi();
    > > +
    > > + /* Make sure all running NMIs have finished before we write the code */
    > > + smp_mb();
    > > +
    > > + ftrace_mod_code();
    > > +
    > > + /* Make sure the write happens before clearing the bit */
    > > + smp_wmb();
    > > +
    > > + mod_code_write = 0;
    > > +
    > > + /* make sure NMIs see the cleared bit */
    > > + smp_mb();
    > > +
    > > + wait_for_nmi();
    > > +
    > > + return mod_code_status;
    > > +}

    >
    > I guess the weakness here is that the code will only allow a single
    > contiguous hunk of text to be modified. One could envisage situations
    > where two or more separate areas of memory need to be modified
    > atomically/together.
    >
    > I guess we can cross that bridge when we fall off it.


    Well the current code (luckily) does not need that. As Ingo has stated,
    the simpler this code is the better. There's just too much that can
    go wrong if this code is defective.

    >
    > > --- linux-tip.git.orig/include/linux/hardirq.h 2008-10-30 10:27:07.000000000 -0400
    > > +++ linux-tip.git/include/linux/hardirq.h 2008-10-30 10:27:09.000000000 -0400
    > > @@ -5,6 +5,7 @@
    > > #include
    > > #include
    > > #include
    > > +#include
    > > #include
    > >
    > > /*
    > > @@ -161,7 +162,17 @@ extern void irq_enter(void);
    > > */
    > > extern void irq_exit(void);
    > >
    > > -#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0)
    > > -#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0)
    > > +#define nmi_enter() \
    > > + do { \
    > > + ftrace_nmi_enter(); \
    > > + lockdep_off(); \
    > > + __irq_enter(); \
    > > + } while (0)
    > > +#define nmi_exit() \
    > > + do { \
    > > + __irq_exit(); \
    > > + lockdep_on(); \
    > > + ftrace_nmi_exit(); \
    > > + } while (0)
    > >

    >
    >


    A clean up patch can follow this. I would not do it here, because it
    follows the style of the irq_enter and irq_exit macros just above this
    code.

    I do not see why this could not be converted to a static inline, but
    that should be a separate patch that udpates both this and the
    irq_enter/exit macros together.

    -- Steve

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  8. Re: [PATCH 2/2] ftrace: nmi update statistics


    On Thu, 30 Oct 2008, Andrew Morton wrote:

    > On Thu, 30 Oct 2008 16:08:33 -0400
    > Steven Rostedt wrote:
    >
    > > This patch adds dynamic ftrace NMI update statistics to the
    > > /debugfs/tracing/dyn_ftrace_total_info stat file.
    > >
    > > Signed-off-by: Steven Rostedt
    > > ---
    > > arch/x86/kernel/ftrace.c | 26 ++++++++++++++++++++++++--
    > > kernel/trace/trace.c | 31 ++++++++++++++++++++++++-------

    >
    > No header files were modified, but ftrace_arch_read_dyn_info() should
    > have been declared so that the above two compilation units see the
    > declaration.


    Ah, you caught me being lazy ;-) Since both also define the function, I
    figured I'd be OK. But you are right, this would prevent bugs where the
    two functions somehow got different parameters.

    Will fix.

    >
    > >
    > > Index: linux-tip.git/arch/x86/kernel/ftrace.c
    > > ================================================== =================
    > > --- linux-tip.git.orig/arch/x86/kernel/ftrace.c 2008-10-30 15:30:12.000000000 -0400
    > > +++ linux-tip.git/arch/x86/kernel/ftrace.c 2008-10-30 15:51:51.000000000 -0400
    > > @@ -91,6 +91,19 @@ static int mod_code_write;
    > > static void *mod_code_ip;
    > > static void *mod_code_newcode;
    > >
    > > +static int nmi_wait_count;
    > > +static atomic_t nmi_update_count;

    >
    >


    Hmm, this being arch specific code, is there such a thing as "atomic
    declared as zero on x86 is OK" rule?

    >
    > > +int ftrace_arch_read_dyn_info(char *buf, int size)
    > > +{
    > > + int r;
    > > +
    > > + r = snprintf(buf, size, "%u %u",
    > > + nmi_wait_count,
    > > + atomic_read(&nmi_update_count));
    > > + return r;
    > > +}

    >
    > Yes, nmi_wait_count is an unsigned quantity. Might as well define it
    > thusly?


    OK.

    >
    > > static void ftrace_mod_code(void)
    > > {
    > > /*
    > > @@ -109,8 +122,10 @@ void ftrace_nmi_enter(void)
    > > atomic_inc(&in_nmi);
    > > /* Must have in_nmi seen before reading write flag */
    > > smp_mb();
    > > - if (mod_code_write)
    > > + if (mod_code_write) {
    > > ftrace_mod_code();
    > > + atomic_inc(&nmi_update_count);
    > > + }
    > > }
    > >
    > > void ftrace_nmi_exit(void)
    > > @@ -122,8 +137,15 @@ void ftrace_nmi_exit(void)
    > >
    > > static void wait_for_nmi(void)
    > > {
    > > - while (atomic_read(&in_nmi))
    > > + int waited = 0;
    > > +
    > > + while (atomic_read(&in_nmi)) {
    > > + waited = 1;
    > > cpu_relax();
    > > + }
    > > +
    > > + if (waited)
    > > + nmi_wait_count++;
    > > }
    > >
    > > static int
    > > Index: linux-tip.git/kernel/trace/trace.c
    > > ================================================== =================
    > > --- linux-tip.git.orig/kernel/trace/trace.c 2008-10-30 15:18:45.000000000 -0400
    > > +++ linux-tip.git/kernel/trace/trace.c 2008-10-30 15:51:51.000000000 -0400
    > > @@ -2837,22 +2837,39 @@ static struct file_operations tracing_ma
    > >
    > > #ifdef CONFIG_DYNAMIC_FTRACE
    > >
    > > +#define DYN_INFO_BUF_SIZE 1023
    > > +static char ftrace_dyn_info_buffer[DYN_INFO_BUF_SIZE+1];

    >
    > Could be made local to tracing_read_dyn_info().
    >
    >
    > Could just be
    >
    > static char ftrace_dyn_info_buffer[1024];
    >
    > then use sizeof/ARRAY_SIZE elsewhere. I think this is a bit safer and


    I think there's a defined macro for that... (goes look)

    Yep! it's called ARRAY_SIZE(arr)

    > more readable - the reader doesn't have to run around the code checking
    > that the correct #define was used in both places.


    Will fix.

    Thanks,

    -- Steve
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  9. Re: [PATCH 0/2] ftrace: handle NMIs safely


    On Thu, 30 Oct 2008, Ingo Molnar wrote:

    >
    > * Steven Rostedt wrote:
    >
    > > The robustness of ftrace has been the focus of the code modification
    > > in 2.6.28. There is one remaining issue that needed to be addressed.
    > > This was the case of NMIs.

    >
    > applied to tip/tracing/nmisafe, thanks Steve!
    >
    > this looks a lot nicer approach than either putting some sort of lock
    > into NMI context (yuck) or the disabling of NMIs (not really possible
    > in a generic way architecturally).
    >
    > the impact is quite non-trivial, so i dont think this is v2.6.28
    > material.


    Ingo,

    Do you want me to send patches on top of these to address Andrew's
    comements? Or do you want me to resend these with the updates.

    I prefer to send patches on top, that way Andrew can make sure I did his
    changes correctly ;-)

    -- Steve

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  10. Re: [PATCH 1/2] ftrace: nmi safe code modification

    On Thu, 30 Oct 2008 16:58:55 -0400 (EDT)
    Steven Rostedt wrote:

    > On Thu, 30 Oct 2008, Andrew Morton wrote:
    > >
    > > > +#ifndef __ASSEMBLY__
    > > > +#define ftrace_nmi_enter() do { } while (0)
    > > > +#define ftrace_nmi_exit() do { } while (0)
    > > > +#endif
    > > > ...
    > > > +#ifndef __ASSEMBLY__
    > > > +#define ftrace_nmi_enter() do { } while (0)
    > > > +#define ftrace_nmi_exit() do { } while (0)
    > > > +#endif

    > >
    > > These could all be written in C. If there's a reson to write them in
    > > cpp then the `#ifndef __ASSEMBLY__' isn't really needed.

    >
    > I could do the C macro, and you are right, I did not need the __ASSEMBLY__
    > part. I guess that was me just being over-protective :-/
    >
    > Which would you prefer? Changing to C or removing the __ASSEMBLY__?


    From a general perspective, C is better. Has typechecking, adds a ref
    to the arguments which can prevent unused-var warnings, easier to read
    and maintain, more likely to be commented, known about by debug info,
    doesn't all get clumped into a single line in debug info, easier/safer
    to uninline, blah, blah.

    Also it seems a bit weird to do

    #ifdef SOMETHING
    #define foo(...) ...
    #else
    extern void foo(...);
    #endif

    Doing it in C has the downside that more things need to be visible at
    the definition site, so more includes might be needed. Often fixable
    by uninlining.


    I dunno. People seem to instinctively reach for a macro without
    thinking, because that's how grandpa did it or something.

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  11. Re: [PATCH 0/2] ftrace: handle NMIs safely


    * Steven Rostedt wrote:

    >
    > On Thu, 30 Oct 2008, Ingo Molnar wrote:
    >
    > >
    > > * Steven Rostedt wrote:
    > >
    > > > The robustness of ftrace has been the focus of the code modification
    > > > in 2.6.28. There is one remaining issue that needed to be addressed.
    > > > This was the case of NMIs.

    > >
    > > applied to tip/tracing/nmisafe, thanks Steve!
    > >
    > > this looks a lot nicer approach than either putting some sort of lock
    > > into NMI context (yuck) or the disabling of NMIs (not really possible
    > > in a generic way architecturally).
    > >
    > > the impact is quite non-trivial, so i dont think this is v2.6.28
    > > material.

    >
    > Ingo,
    >
    > Do you want me to send patches on top of these to address Andrew's
    > comements? Or do you want me to resend these with the updates.
    >
    > I prefer to send patches on top, that way Andrew can make sure I did
    > his changes correctly ;-)


    yeah, please do it that way. It's a separate topic so we can fold them
    back if they are also bugfixes. (and they are all cleanliness related
    and i wanted to see the stability impact ASAP)

    Ingo
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  12. [PATCH] ftrace: nmi safe code clean ups


    This patch cleans up the NMI safe code for dynamic ftrace as suggested
    by Andrew Morton.

    Signed-off-by: Steven Rostedt
    ---
    arch/arm/include/asm/ftrace.h | 4 ++--
    arch/powerpc/include/asm/ftrace.h | 4 ++--
    arch/sh/include/asm/ftrace.h | 4 ++--
    arch/sparc/include/asm/ftrace.h | 4 ++--
    arch/x86/include/asm/ftrace.h | 10 +++++-----
    arch/x86/kernel/ftrace.c | 16 ++++++++--------
    include/linux/ftrace.h | 3 +++
    kernel/trace/trace.c | 9 ++++-----
    8 files changed, 28 insertions(+), 26 deletions(-)

    Index: linux-tip.git/arch/arm/include/asm/ftrace.h
    ================================================== =================
    --- linux-tip.git.orig/arch/arm/include/asm/ftrace.h 2008-10-30 10:27:09.000000000 -0400
    +++ linux-tip.git/arch/arm/include/asm/ftrace.h 2008-10-30 23:24:18.000000000 -0400
    @@ -2,8 +2,8 @@
    #define _ASM_ARM_FTRACE

    #ifndef __ASSEMBLY__
    -#define ftrace_nmi_enter() do { } while (0)
    -#define ftrace_nmi_exit() do { } while (0)
    +static inline void ftrace_nmi_enter(void) { }
    +static inline void ftrace_nmi_exit(void) { }
    #endif

    #ifdef CONFIG_FUNCTION_TRACER
    Index: linux-tip.git/arch/powerpc/include/asm/ftrace.h
    ================================================== =================
    --- linux-tip.git.orig/arch/powerpc/include/asm/ftrace.h 2008-10-30 10:27:09.000000000 -0400
    +++ linux-tip.git/arch/powerpc/include/asm/ftrace.h 2008-10-30 23:24:30.000000000 -0400
    @@ -2,8 +2,8 @@
    #define _ASM_POWERPC_FTRACE

    #ifndef __ASSEMBLY__
    -#define ftrace_nmi_enter() do { } while (0)
    -#define ftrace_nmi_exit() do { } while (0)
    +static inline void ftrace_nmi_enter(void) { }
    +static inline void ftrace_nmi_exit(void) { }
    #endif

    #ifdef CONFIG_FUNCTION_TRACER
    Index: linux-tip.git/arch/sh/include/asm/ftrace.h
    ================================================== =================
    --- linux-tip.git.orig/arch/sh/include/asm/ftrace.h 2008-10-30 10:27:09.000000000 -0400
    +++ linux-tip.git/arch/sh/include/asm/ftrace.h 2008-10-30 23:24:43.000000000 -0400
    @@ -2,8 +2,8 @@
    #define __ASM_SH_FTRACE_H

    #ifndef __ASSEMBLY__
    -#define ftrace_nmi_enter() do { } while (0)
    -#define ftrace_nmi_exit() do { } while (0)
    +static inline void ftrace_nmi_enter(void) { }
    +static inline void ftrace_nmi_exit(void) { }
    #endif

    #ifndef __ASSEMBLY__
    Index: linux-tip.git/arch/sparc/include/asm/ftrace.h
    ================================================== =================
    --- linux-tip.git.orig/arch/sparc/include/asm/ftrace.h 2008-10-30 10:27:09.000000000 -0400
    +++ linux-tip.git/arch/sparc/include/asm/ftrace.h 2008-10-30 23:24:50.000000000 -0400
    @@ -2,8 +2,8 @@
    #define _ASM_SPARC64_FTRACE

    #ifndef __ASSEMBLY__
    -#define ftrace_nmi_enter() do { } while (0)
    -#define ftrace_nmi_exit() do { } while (0)
    +static inline void ftrace_nmi_enter(void) { }
    +static inline void ftrace_nmi_exit(void) { }
    #endif

    #ifdef CONFIG_MCOUNT
    Index: linux-tip.git/arch/x86/include/asm/ftrace.h
    ================================================== =================
    --- linux-tip.git.orig/arch/x86/include/asm/ftrace.h 2008-10-30 13:16:22.000000000 -0400
    +++ linux-tip.git/arch/x86/include/asm/ftrace.h 2008-10-30 23:25:26.000000000 -0400
    @@ -22,16 +22,16 @@ static inline unsigned long ftrace_call_
    extern void ftrace_nmi_enter(void);
    extern void ftrace_nmi_exit(void);
    #else
    -#define ftrace_nmi_enter() do { } while (0)
    -#define ftrace_nmi_exit() do { } while (0)
    -#endif
    +static inline void ftrace_nmi_enter(void) { }
    +static inline void ftrace_nmi_exit(void) { }
    #endif
    +#endif /* __ASSEMBLY__ */

    #else /* CONFIG_FUNCTION_TRACER */

    #ifndef __ASSEMBLY__
    -#define ftrace_nmi_enter() do { } while (0)
    -#define ftrace_nmi_exit() do { } while (0)
    +static inline void ftrace_nmi_enter(void) { }
    +static inline void ftrace_nmi_exit(void) { }
    #endif

    #endif /* CONFIG_FUNCTION_TRACER */
    Index: linux-tip.git/arch/x86/kernel/ftrace.c
    ================================================== =================
    --- linux-tip.git.orig/arch/x86/kernel/ftrace.c 2008-10-30 16:07:05.000000000 -0400
    +++ linux-tip.git/arch/x86/kernel/ftrace.c 2008-10-30 23:30:19.000000000 -0400
    @@ -67,7 +67,7 @@ unsigned char *ftrace_call_replace(unsig
    *
    * Two buffers are added: An IP buffer and a "code" buffer.
    *
    - * 1) Put in the instruction pointer into the IP buffer
    + * 1) Put the instruction pointer into the IP buffer
    * and the new code into the "code" buffer.
    * 2) Set a flag that says we are modifying code
    * 3) Wait for any running NMIs to finish.
    @@ -85,14 +85,14 @@ unsigned char *ftrace_call_replace(unsig
    * are the same as what exists.
    */

    -static atomic_t in_nmi;
    -static int mod_code_status;
    -static int mod_code_write;
    -static void *mod_code_ip;
    -static void *mod_code_newcode;
    +static atomic_t in_nmi = ATOMIC_INIT(0);
    +static int mod_code_status; /* holds return value of text write */
    +static int mod_code_write; /* set when NMI should do the write */
    +static void *mod_code_ip; /* holds the IP to write to */
    +static void *mod_code_newcode; /* holds the text to write to the IP */

    -static int nmi_wait_count;
    -static atomic_t nmi_update_count;
    +static unsigned nmi_wait_count;
    +static atomic_t nmi_update_count = ATOMIC_INIT(0);

    int ftrace_arch_read_dyn_info(char *buf, int size)
    {
    Index: linux-tip.git/include/linux/ftrace.h
    ================================================== =================
    --- linux-tip.git.orig/include/linux/ftrace.h 2008-10-30 10:27:07.000000000 -0400
    +++ linux-tip.git/include/linux/ftrace.h 2008-10-30 23:31:33.000000000 -0400
    @@ -74,6 +74,9 @@ extern void ftrace_caller(void);
    extern void ftrace_call(void);
    extern void mcount_call(void);

    +/* May be defined in arch */
    +extern int ftrace_arch_read_dyn_info(char *buf, int size);
    +
    /**
    * ftrace_modify_code - modify code segment
    * @ip: the address of the code segment
    Index: linux-tip.git/kernel/trace/trace.c
    ================================================== =================
    --- linux-tip.git.orig/kernel/trace/trace.c 2008-10-30 16:07:05.000000000 -0400
    +++ linux-tip.git/kernel/trace/trace.c 2008-10-30 23:33:41.000000000 -0400
    @@ -2837,10 +2837,6 @@ static struct file_operations tracing_ma

    #ifdef CONFIG_DYNAMIC_FTRACE

    -#define DYN_INFO_BUF_SIZE 1023
    -static char ftrace_dyn_info_buffer[DYN_INFO_BUF_SIZE+1];
    -static DEFINE_MUTEX(dyn_info_mutex);
    -
    int __weak ftrace_arch_read_dyn_info(char *buf, int size)
    {
    return 0;
    @@ -2850,14 +2846,17 @@ static ssize_t
    tracing_read_dyn_info(struct file *filp, char __user *ubuf,
    size_t cnt, loff_t *ppos)
    {
    + static char ftrace_dyn_info_buffer[1024];
    + static DEFINE_MUTEX(dyn_info_mutex);
    unsigned long *p = filp->private_data;
    char *buf = ftrace_dyn_info_buffer;
    + int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
    int r;

    mutex_lock(&dyn_info_mutex);
    r = sprintf(buf, "%ld ", *p);

    - r += ftrace_arch_read_dyn_info(buf+r, DYN_INFO_BUF_SIZE-r);
    + r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
    buf[r++] = '\n';

    r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);


    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  13. [PATCH] hardirq.h clean up


    This patch converts the CPP macros of __irq_enter, __irq_exit,
    nmi_enter, and nmi_exit into static inlines.

    Signed-off-by: Steven Rostedt
    ---
    include/linux/hardirq.h | 53 ++++++++++++++++++++++++------------------------
    1 file changed, 27 insertions(+), 26 deletions(-)

    Index: linux-tip.git/include/linux/hardirq.h
    ================================================== =================
    --- linux-tip.git.orig/include/linux/hardirq.h 2008-10-30 10:27:09.000000000 -0400
    +++ linux-tip.git/include/linux/hardirq.h 2008-10-31 00:07:46.000000000 -0400
    @@ -133,13 +133,13 @@ extern void rcu_irq_exit(void);
    * always balanced, so the interrupted value of ->hardirq_context
    * will always be restored.
    */
    -#define __irq_enter() \
    - do { \
    - rcu_irq_enter(); \
    - account_system_vtime(current); \
    - add_preempt_count(HARDIRQ_OFFSET); \
    - trace_hardirq_enter(); \
    - } while (0)
    +static inline void __irq_enter(void)
    +{
    + rcu_irq_enter();
    + account_system_vtime(current);
    + add_preempt_count(HARDIRQ_OFFSET);
    + trace_hardirq_enter();
    +}

    /*
    * Enter irq context (on NO_HZ, update jiffies):
    @@ -149,30 +149,31 @@ extern void irq_enter(void);
    /*
    * Exit irq context without processing softirqs:
    */
    -#define __irq_exit() \
    - do { \
    - trace_hardirq_exit(); \
    - account_system_vtime(current); \
    - sub_preempt_count(HARDIRQ_OFFSET); \
    - rcu_irq_exit(); \
    - } while (0)
    +static inline void __irq_exit(void)
    +{
    + trace_hardirq_exit();
    + account_system_vtime(current);
    + sub_preempt_count(HARDIRQ_OFFSET);
    + rcu_irq_exit();
    +}

    /*
    * Exit irq context and process softirqs if needed:
    */
    extern void irq_exit(void);

    -#define nmi_enter() \
    - do { \
    - ftrace_nmi_enter(); \
    - lockdep_off(); \
    - __irq_enter(); \
    - } while (0)
    -#define nmi_exit() \
    - do { \
    - __irq_exit(); \
    - lockdep_on(); \
    - ftrace_nmi_exit(); \
    - } while (0)
    +static inline void nmi_enter(void)
    +{
    + ftrace_nmi_enter();
    + lockdep_off();
    + __irq_enter();
    +}
    +
    +static inline void nmi_exit(void)
    +{
    + __irq_exit();
    + lockdep_on();
    + ftrace_nmi_exit();
    +}

    #endif /* LINUX_HARDIRQ_H */


    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  14. Re: [PATCH] ftrace: nmi safe code clean ups


    * Steven Rostedt wrote:

    >
    > This patch cleans up the NMI safe code for dynamic ftrace as suggested
    > by Andrew Morton.
    >
    > Signed-off-by: Steven Rostedt
    > ---
    > arch/arm/include/asm/ftrace.h | 4 ++--
    > arch/powerpc/include/asm/ftrace.h | 4 ++--
    > arch/sh/include/asm/ftrace.h | 4 ++--
    > arch/sparc/include/asm/ftrace.h | 4 ++--
    > arch/x86/include/asm/ftrace.h | 10 +++++-----
    > arch/x86/kernel/ftrace.c | 16 ++++++++--------
    > include/linux/ftrace.h | 3 +++
    > kernel/trace/trace.c | 9 ++++-----
    > 8 files changed, 28 insertions(+), 26 deletions(-)


    applied to tip/tracing/nmisafe, thanks Steve!

    Ingo
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  15. Re: [PATCH] hardirq.h clean up


    * Steven Rostedt wrote:

    >
    > This patch converts the CPP macros of __irq_enter, __irq_exit,
    > nmi_enter, and nmi_exit into static inlines.
    >
    > Signed-off-by: Steven Rostedt
    > ---
    > include/linux/hardirq.h | 53 ++++++++++++++++++++++++------------------------
    > 1 file changed, 27 insertions(+), 26 deletions(-)


    makes sense, but have you done build-testing (and cross-build-testing)
    of this? I remember that this area was include-file-dependencies-hell
    in the past.

    perhaps with your simulate-old-arch patch on x86 we could tickle some
    of the issues that trigger here.

    Ingo
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  16. [PATCH] ftrace: fix hardirq header for non ftrace archs


    Not all archs implement ftrace, and therefore do not have an asm/ftrace.h.
    This patch corrects the problem.

    The ftrace_nmi_enter/exit now must be defined for all archs that implement
    dynamic ftrace. Currently, only x86 does.

    Signed-off-by: Steven Rostedt
    ---
    arch/arm/include/asm/ftrace.h | 5 -----
    arch/powerpc/include/asm/ftrace.h | 5 -----
    arch/sh/include/asm/ftrace.h | 5 -----
    arch/sparc/include/asm/ftrace.h | 5 -----
    arch/x86/include/asm/ftrace.h | 16 ----------------
    include/linux/ftrace.h | 5 ++++-
    include/linux/hardirq.h | 2 +-
    7 files changed, 5 insertions(+), 38 deletions(-)

    Index: linux-tip.git/arch/arm/include/asm/ftrace.h
    ================================================== =================
    --- linux-tip.git.orig/arch/arm/include/asm/ftrace.h 2008-10-30 23:24:18.000000000 -0400
    +++ linux-tip.git/arch/arm/include/asm/ftrace.h 2008-10-31 08:48:47.000000000 -0400
    @@ -1,11 +1,6 @@
    #ifndef _ASM_ARM_FTRACE
    #define _ASM_ARM_FTRACE

    -#ifndef __ASSEMBLY__
    -static inline void ftrace_nmi_enter(void) { }
    -static inline void ftrace_nmi_exit(void) { }
    -#endif
    -
    #ifdef CONFIG_FUNCTION_TRACER
    #define MCOUNT_ADDR ((long)(mcount))
    #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
    Index: linux-tip.git/arch/powerpc/include/asm/ftrace.h
    ================================================== =================
    --- linux-tip.git.orig/arch/powerpc/include/asm/ftrace.h 2008-10-30 23:24:30.000000000 -0400
    +++ linux-tip.git/arch/powerpc/include/asm/ftrace.h 2008-10-31 08:48:52.000000000 -0400
    @@ -1,11 +1,6 @@
    #ifndef _ASM_POWERPC_FTRACE
    #define _ASM_POWERPC_FTRACE

    -#ifndef __ASSEMBLY__
    -static inline void ftrace_nmi_enter(void) { }
    -static inline void ftrace_nmi_exit(void) { }
    -#endif
    -
    #ifdef CONFIG_FUNCTION_TRACER
    #define MCOUNT_ADDR ((long)(_mcount))
    #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
    Index: linux-tip.git/arch/sh/include/asm/ftrace.h
    ================================================== =================
    --- linux-tip.git.orig/arch/sh/include/asm/ftrace.h 2008-10-30 23:24:43.000000000 -0400
    +++ linux-tip.git/arch/sh/include/asm/ftrace.h 2008-10-31 08:48:58.000000000 -0400
    @@ -2,11 +2,6 @@
    #define __ASM_SH_FTRACE_H

    #ifndef __ASSEMBLY__
    -static inline void ftrace_nmi_enter(void) { }
    -static inline void ftrace_nmi_exit(void) { }
    -#endif
    -
    -#ifndef __ASSEMBLY__
    extern void mcount(void);
    #endif

    Index: linux-tip.git/arch/sparc/include/asm/ftrace.h
    ================================================== =================
    --- linux-tip.git.orig/arch/sparc/include/asm/ftrace.h 2008-10-30 23:24:50.000000000 -0400
    +++ linux-tip.git/arch/sparc/include/asm/ftrace.h 2008-10-31 08:49:01.000000000 -0400
    @@ -1,11 +1,6 @@
    #ifndef _ASM_SPARC64_FTRACE
    #define _ASM_SPARC64_FTRACE

    -#ifndef __ASSEMBLY__
    -static inline void ftrace_nmi_enter(void) { }
    -static inline void ftrace_nmi_exit(void) { }
    -#endif
    -
    #ifdef CONFIG_MCOUNT
    #define MCOUNT_ADDR ((long)(_mcount))
    #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
    Index: linux-tip.git/arch/x86/include/asm/ftrace.h
    ================================================== =================
    --- linux-tip.git.orig/arch/x86/include/asm/ftrace.h 2008-10-30 23:25:26.000000000 -0400
    +++ linux-tip.git/arch/x86/include/asm/ftrace.h 2008-10-31 08:52:44.000000000 -0400
    @@ -17,23 +17,7 @@ static inline unsigned long ftrace_call_
    */
    return addr - 1;
    }
    -
    -#ifdef CONFIG_DYNAMIC_FTRACE
    -extern void ftrace_nmi_enter(void);
    -extern void ftrace_nmi_exit(void);
    -#else
    -static inline void ftrace_nmi_enter(void) { }
    -static inline void ftrace_nmi_exit(void) { }
    -#endif
    #endif /* __ASSEMBLY__ */
    -
    -#else /* CONFIG_FUNCTION_TRACER */
    -
    -#ifndef __ASSEMBLY__
    -static inline void ftrace_nmi_enter(void) { }
    -static inline void ftrace_nmi_exit(void) { }
    -#endif
    -
    #endif /* CONFIG_FUNCTION_TRACER */

    #endif /* _ASM_X86_FTRACE_H */
    Index: linux-tip.git/include/linux/ftrace.h
    ================================================== =================
    --- linux-tip.git.orig/include/linux/ftrace.h 2008-10-30 23:31:33.000000000 -0400
    +++ linux-tip.git/include/linux/ftrace.h 2008-10-31 08:52:29.000000000 -0400
    @@ -44,7 +44,6 @@ static inline void ftrace_kill(void) { }
    #endif /* CONFIG_FUNCTION_TRACER */

    #ifdef CONFIG_DYNAMIC_FTRACE
    -
    enum {
    FTRACE_FL_FREE = (1 << 0),
    FTRACE_FL_FAILED = (1 << 1),
    @@ -105,6 +104,8 @@ extern void ftrace_release(void *start,

    extern void ftrace_disable_daemon(void);
    extern void ftrace_enable_daemon(void);
    +extern void ftrace_nmi_enter(void);
    +extern void ftrace_nmi_exit(void);

    #else
    # define skip_trace(ip) ({ 0; })
    @@ -113,6 +114,8 @@ extern void ftrace_enable_daemon(void);
    # define ftrace_disable_daemon() do { } while (0)
    # define ftrace_enable_daemon() do { } while (0)
    static inline void ftrace_release(void *start, unsigned long size) { }
    +static inline void ftrace_nmi_enter(void) { }
    +static inline void ftrace_nmi_exit(void) { }
    #endif /* CONFIG_DYNAMIC_FTRACE */

    /* totally disable ftrace - can not re-enable after this */
    Index: linux-tip.git/include/linux/hardirq.h
    ================================================== =================
    --- linux-tip.git.orig/include/linux/hardirq.h 2008-10-31 00:07:46.000000000 -0400
    +++ linux-tip.git/include/linux/hardirq.h 2008-10-31 08:53:01.000000000 -0400
    @@ -4,8 +4,8 @@
    #include
    #include
    #include
    +#include
    #include
    -#include
    #include

    /*


    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  17. Re: [PATCH] ftrace: fix hardirq header for non ftrace archs


    * Steven Rostedt wrote:

    >
    > Not all archs implement ftrace, and therefore do not have an asm/ftrace.h.
    > This patch corrects the problem.
    >
    > The ftrace_nmi_enter/exit now must be defined for all archs that implement
    > dynamic ftrace. Currently, only x86 does.
    >
    > Signed-off-by: Steven Rostedt
    > ---
    > arch/arm/include/asm/ftrace.h | 5 -----
    > arch/powerpc/include/asm/ftrace.h | 5 -----
    > arch/sh/include/asm/ftrace.h | 5 -----
    > arch/sparc/include/asm/ftrace.h | 5 -----
    > arch/x86/include/asm/ftrace.h | 16 ----------------
    > include/linux/ftrace.h | 5 ++++-
    > include/linux/hardirq.h | 2 +-
    > 7 files changed, 5 insertions(+), 38 deletions(-)


    applied to tip/tracing/nmisafe, thanks Steve!

    Ingo
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread