[PATCH 0/2] ftrace: clean ups and sanity checks - Kernel

This is a discussion on [PATCH 0/2] ftrace: clean ups and sanity checks - Kernel ; Ingo, Can you please pull in these two patches and run them through your tests. I've tested them, but I know you have a more elaborate setup. Then can you send them over to Linus for 2.6.28. These are clean ...

+ Reply to Thread
Results 1 to 9 of 9

Thread: [PATCH 0/2] ftrace: clean ups and sanity checks

  1. [PATCH 0/2] ftrace: clean ups and sanity checks

    Ingo,

    Can you please pull in these two patches and run them through your tests.
    I've tested them, but I know you have a more elaborate setup. Then can you send them over to Linus for 2.6.28.

    These are clean ups and robustness patches. No new features here.

    Thanks,

    -- Steve
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. [PATCH 1/2] ftrace: make dynamic ftrace more robust

    This patch cleans up some of the ftrace code as well as adds some
    more sanity checks. If any of the sanity checks fail, a warning will
    be printed and ftrace will be disabled.

    Signed-off-by: Steven Rostedt
    ---
    arch/x86/kernel/ftrace.c | 15 ++++++++++-----
    include/linux/ftrace.h | 7 ++++++-
    include/linux/init.h | 10 +++++-----
    kernel/trace/ftrace.c | 35 +++++++++++++++++++++++++++++++----
    4 files changed, 52 insertions(+), 15 deletions(-)

    Index: linux-compile.git/arch/x86/kernel/ftrace.c
    ================================================== =================
    --- linux-compile.git.orig/arch/x86/kernel/ftrace.c 2008-10-20 19:39:54.000000000 -0400
    +++ linux-compile.git/arch/x86/kernel/ftrace.c 2008-10-20 19:42:02.000000000 -0400
    @@ -66,19 +66,24 @@ ftrace_modify_code(unsigned long ip, uns
    /*
    * Note: Due to modules and __init, code can
    * disappear and change, we need to protect against faulting
    - * as well as code changing.
    + * as well as code changing. We do this by using the
    + * __copy_*_user functions.
    *
    * No real locking needed, this code is run through
    * kstop_machine, or before SMP starts.
    */
    +
    + /* read the text we want to modify */
    if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE))
    - return 1;
    + return FTRACE_CODE_FAILED_READ;

    + /* Make sure it is what we expect it to be */
    if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
    - return 2;
    + return FTRACE_CODE_FAILED_CMP;

    - WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code,
    - MCOUNT_INSN_SIZE));
    + /* replace the text with the new text */
    + if (__copy_to_user_inatomic((char __user *)ip, new_code, MCOUNT_INSN_SIZE))
    + return FTRACE_CODE_FAILED_WRITE;

    sync_core();

    Index: linux-compile.git/include/linux/ftrace.h
    ================================================== =================
    --- linux-compile.git.orig/include/linux/ftrace.h 2008-10-20 19:39:54.000000000 -0400
    +++ linux-compile.git/include/linux/ftrace.h 2008-10-20 19:47:23.000000000 -0400
    @@ -232,6 +232,11 @@ static inline void start_boot_trace(void
    static inline void stop_boot_trace(void) { }
    #endif

    -
    +enum {
    + FTRACE_CODE_MODIFIED,
    + FTRACE_CODE_FAILED_READ,
    + FTRACE_CODE_FAILED_CMP,
    + FTRACE_CODE_FAILED_WRITE,
    +};

    #endif /* _LINUX_FTRACE_H */
    Index: linux-compile.git/include/linux/init.h
    ================================================== =================
    --- linux-compile.git.orig/include/linux/init.h 2008-10-20 19:39:54.000000000 -0400
    +++ linux-compile.git/include/linux/init.h 2008-10-20 19:40:06.000000000 -0400
    @@ -75,15 +75,15 @@


    #ifdef MODULE
    -#define __exitused
    +#define __exitused notrace
    #else
    -#define __exitused __used
    +#define __exitused __used notrace
    #endif

    #define __exit __section(.exit.text) __exitused __cold

    /* Used for HOTPLUG */
    -#define __devinit __section(.devinit.text) __cold
    +#define __devinit __section(.devinit.text) __cold notrace
    #define __devinitdata __section(.devinit.data)
    #define __devinitconst __section(.devinit.rodata)
    #define __devexit __section(.devexit.text) __exitused __cold
    @@ -91,7 +91,7 @@
    #define __devexitconst __section(.devexit.rodata)

    /* Used for HOTPLUG_CPU */
    -#define __cpuinit __section(.cpuinit.text) __cold
    +#define __cpuinit __section(.cpuinit.text) __cold notrace
    #define __cpuinitdata __section(.cpuinit.data)
    #define __cpuinitconst __section(.cpuinit.rodata)
    #define __cpuexit __section(.cpuexit.text) __exitused __cold
    @@ -99,7 +99,7 @@
    #define __cpuexitconst __section(.cpuexit.rodata)

    /* Used for MEMORY_HOTPLUG */
    -#define __meminit __section(.meminit.text) __cold
    +#define __meminit __section(.meminit.text) __cold notrace
    #define __meminitdata __section(.meminit.data)
    #define __meminitconst __section(.meminit.rodata)
    #define __memexit __section(.memexit.text) __exitused __cold
    Index: linux-compile.git/kernel/trace/ftrace.c
    ================================================== =================
    --- linux-compile.git.orig/kernel/trace/ftrace.c 2008-10-20 19:39:54.000000000 -0400
    +++ linux-compile.git/kernel/trace/ftrace.c 2008-10-20 19:46:00.000000000 -0400
    @@ -318,6 +318,14 @@ static inline void ftrace_del_hash(struc

    static void ftrace_free_rec(struct dyn_ftrace *rec)
    {
    + /*
    + * No locking, only called from kstop_machine, or
    + * from module unloading with module locks and interrupts
    + * disabled to prevent kstop machine from running.
    + */
    +
    + WARN_ON(rec->flags & FTRACE_FL_FREE);
    +
    rec->ip = (unsigned long)ftrace_free_records;
    ftrace_free_records = rec;
    rec->flags |= FTRACE_FL_FREE;
    @@ -346,7 +354,6 @@ void ftrace_release(void *start, unsigne
    }
    }
    spin_unlock(&ftrace_lock);
    -
    }

    static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
    @@ -556,9 +563,12 @@ static void ftrace_replace_code(int enab

    failed = __ftrace_replace_code(rec, old, new, enable);
    if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
    + /* kprobes can cause this failure */
    rec->flags |= FTRACE_FL_FAILED;
    if ((system_state == SYSTEM_BOOTING) ||
    !core_kernel_text(rec->ip)) {
    + /* kprobes was not the fault */
    + ftrace_kill_atomic();
    ftrace_del_hash(rec);
    ftrace_free_rec(rec);
    }
    @@ -601,12 +611,12 @@ ftrace_code_disable(struct dyn_ftrace *r
    failed = ftrace_modify_code(ip, call, nop);
    if (failed) {
    switch (failed) {
    - case 1:
    + case FTRACE_CODE_FAILED_READ:
    WARN_ON_ONCE(1);
    pr_info("ftrace faulted on modifying ");
    print_ip_sym(ip);
    break;
    - case 2:
    + case FTRACE_CODE_FAILED_CMP:
    WARN_ON_ONCE(1);
    pr_info("ftrace failed to modify ");
    print_ip_sym(ip);
    @@ -615,7 +625,18 @@ ftrace_code_disable(struct dyn_ftrace *r
    print_ip_ins(" replace: ", nop);
    printk(KERN_CONT "\n");
    break;
    + case FTRACE_CODE_FAILED_WRITE:
    + WARN_ON_ONCE(1);
    + pr_info("ftrace failed to write ");
    + print_ip_sym(ip);
    + break;
    + default:
    + WARN_ON_ONCE(1);
    + pr_info("ftrace unkown error ");
    + print_ip_sym(ip);
    + break;
    }
    + ftrace_kill_atomic();

    rec->flags |= FTRACE_FL_FAILED;
    return 0;
    @@ -789,6 +810,11 @@ static int __ftrace_update_code(void *ig

    /* No locks needed, the machine is stopped! */
    for (i = 0; i < FTRACE_HASHSIZE; i++) {
    +
    + /* If something went wrong, bail without enabling anything */
    + if (unlikely(ftrace_disabled))
    + return;
    +
    INIT_HLIST_HEAD(&temp_list);
    head = &ftrace_hash[i];

    @@ -796,7 +822,8 @@ static int __ftrace_update_code(void *ig
    hlist_for_each_entry_safe(p, t, n, head, node) {
    /* Skip over failed records which have not been
    * freed. */
    - if (p->flags & FTRACE_FL_FAILED)
    + if (p->flags & FTRACE_FL_FAILED ||
    + p->flags & FTRACE_FL_FREE)
    continue;

    /* Unconverted records are always at the head of the

    --
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  3. [PATCH 2/2] ftrace: release functions from hash

    The x86 architecture uses a static recording of mcount caller locations
    and is not affected by this patch.

    For architectures still using the dynamic ftrace daemon, this patch is
    critical. It removes the race between the recording of a function that
    calls mcount, the unloading of a module, and the ftrace daemon updating
    the call sites.

    This patch adds the releasing of the hash functions that the daemon uses
    to update the mcount call sites. When a module is unloaded, not only
    are the replaced call site table update, but now so is the hash recorded
    functions that the ftrace daemon will use.

    Again, architectures that implement MCOUNT_RECORD are not affected by
    this (which currently only x86 has).

    Signed-off-by: Steven Rostedt
    ---
    kernel/trace/ftrace.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
    1 file changed, 44 insertions(+)

    Index: linux-compile.git/kernel/trace/ftrace.c
    ================================================== =================
    --- linux-compile.git.orig/kernel/trace/ftrace.c 2008-10-21 09:34:51.000000000 -0400
    +++ linux-compile.git/kernel/trace/ftrace.c 2008-10-21 10:31:24.000000000 -0400
    @@ -164,10 +164,14 @@ static DEFINE_SPINLOCK(ftrace_hash_lock)
    #define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
    #define ftrace_hash_unlock(flags) \
    spin_unlock_irqrestore(&ftrace_hash_lock, flags)
    +static void ftrace_release_hash(unsigned long start, unsigned long end);
    #else
    /* This is protected via the ftrace_lock with MCOUNT_RECORD. */
    #define ftrace_hash_lock(flags) do { (void)(flags); } while (0)
    #define ftrace_hash_unlock(flags) do { } while(0)
    +static inline void ftrace_release_hash(unsigned long start, unsigned long end)
    +{
    +}
    #endif

    /*
    @@ -354,6 +358,8 @@ void ftrace_release(void *start, unsigne
    }
    }
    spin_unlock(&ftrace_lock);
    +
    + ftrace_release_hash(s, e);
    }

    static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
    @@ -1686,6 +1692,44 @@ void __init ftrace_init(void)
    ftrace_disabled = 1;
    }
    #else /* CONFIG_FTRACE_MCOUNT_RECORD */
    +
    +static void ftrace_release_hash(unsigned long start, unsigned long end)
    +{
    + struct dyn_ftrace *rec;
    + struct hlist_node *t, *n;
    + struct hlist_head *head, temp_list;
    + unsigned long flags;
    + int i, cpu;
    +
    + preempt_disable_notrace();
    +
    + /* disable incase we call something that calls mcount */
    + cpu = raw_smp_processor_id();
    + per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
    +
    + ftrace_hash_lock(flags);
    +
    + for (i = 0; i < FTRACE_HASHSIZE; i++) {
    + INIT_HLIST_HEAD(&temp_list);
    + head = &ftrace_hash[i];
    +
    + /* all CPUS are stopped, we are safe to modify code */
    + hlist_for_each_entry_safe(rec, t, n, head, node) {
    + if (rec->flags & FTRACE_FL_FREE)
    + continue;
    +
    + if ((rec->ip >= start) && (rec->ip < end))
    + ftrace_free_rec(rec);
    + }
    + }
    +
    + ftrace_hash_unlock(flags);
    +
    + per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
    + preempt_enable_notrace();
    +
    +}
    +
    static int ftraced(void *ignore)
    {
    unsigned long usecs;

    --
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  4. Re: [PATCH 2/2] ftrace: release functions from hash



    Ingo,

    I found a bug with this patch. Do not incorporate it. I'll come up
    with a new patch to replace this one.

    Thanks,

    -- Steve

    On Tue, 21 Oct 2008, Steven Rostedt wrote:

    > The x86 architecture uses a static recording of mcount caller locations
    > and is not affected by this patch.
    >
    > For architectures still using the dynamic ftrace daemon, this patch is
    > critical. It removes the race between the recording of a function that
    > calls mcount, the unloading of a module, and the ftrace daemon updating
    > the call sites.
    >
    > This patch adds the releasing of the hash functions that the daemon uses
    > to update the mcount call sites. When a module is unloaded, not only
    > are the replaced call site table update, but now so is the hash recorded
    > functions that the ftrace daemon will use.
    >
    > Again, architectures that implement MCOUNT_RECORD are not affected by
    > this (which currently only x86 has).
    >
    > Signed-off-by: Steven Rostedt
    > ---
    > kernel/trace/ftrace.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
    > 1 file changed, 44 insertions(+)
    >
    > Index: linux-compile.git/kernel/trace/ftrace.c
    > ================================================== =================
    > --- linux-compile.git.orig/kernel/trace/ftrace.c 2008-10-21 09:34:51.000000000 -0400
    > +++ linux-compile.git/kernel/trace/ftrace.c 2008-10-21 10:31:24.000000000 -0400
    > @@ -164,10 +164,14 @@ static DEFINE_SPINLOCK(ftrace_hash_lock)
    > #define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
    > #define ftrace_hash_unlock(flags) \
    > spin_unlock_irqrestore(&ftrace_hash_lock, flags)
    > +static void ftrace_release_hash(unsigned long start, unsigned long end);
    > #else
    > /* This is protected via the ftrace_lock with MCOUNT_RECORD. */
    > #define ftrace_hash_lock(flags) do { (void)(flags); } while (0)
    > #define ftrace_hash_unlock(flags) do { } while(0)
    > +static inline void ftrace_release_hash(unsigned long start, unsigned long end)
    > +{
    > +}
    > #endif
    >
    > /*
    > @@ -354,6 +358,8 @@ void ftrace_release(void *start, unsigne
    > }
    > }
    > spin_unlock(&ftrace_lock);
    > +
    > + ftrace_release_hash(s, e);
    > }
    >
    > static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
    > @@ -1686,6 +1692,44 @@ void __init ftrace_init(void)
    > ftrace_disabled = 1;
    > }
    > #else /* CONFIG_FTRACE_MCOUNT_RECORD */
    > +
    > +static void ftrace_release_hash(unsigned long start, unsigned long end)
    > +{
    > + struct dyn_ftrace *rec;
    > + struct hlist_node *t, *n;
    > + struct hlist_head *head, temp_list;
    > + unsigned long flags;
    > + int i, cpu;
    > +
    > + preempt_disable_notrace();
    > +
    > + /* disable incase we call something that calls mcount */
    > + cpu = raw_smp_processor_id();
    > + per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
    > +
    > + ftrace_hash_lock(flags);
    > +
    > + for (i = 0; i < FTRACE_HASHSIZE; i++) {
    > + INIT_HLIST_HEAD(&temp_list);
    > + head = &ftrace_hash[i];
    > +
    > + /* all CPUS are stopped, we are safe to modify code */
    > + hlist_for_each_entry_safe(rec, t, n, head, node) {
    > + if (rec->flags & FTRACE_FL_FREE)
    > + continue;
    > +
    > + if ((rec->ip >= start) && (rec->ip < end))
    > + ftrace_free_rec(rec);
    > + }
    > + }
    > +
    > + ftrace_hash_unlock(flags);
    > +
    > + per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
    > + preempt_enable_notrace();
    > +
    > +}
    > +
    > static int ftraced(void *ignore)
    > {
    > unsigned long usecs;
    >
    > --
    >
    >

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  5. Re: [PATCH 1/2] ftrace: make dynamic ftrace more robust


    * Steven Rostedt wrote:

    > +enum {
    > + FTRACE_CODE_MODIFIED,


    i'd suggest to name it FTRACE_CODE_MODIFIED_OK here, to make it stand
    out from the failure codes.

    > + FTRACE_CODE_FAILED_READ,
    > + FTRACE_CODE_FAILED_CMP,
    > + FTRACE_CODE_FAILED_WRITE,


    but maybe we should just use the standard kernel return codes. 0 for
    success, -EINVAL for the rest. Is there any real value to know exactly
    why it failed? We just know the modification was fishy (this is an
    exception situation), and want to stop ftrace ASAP and then print a
    warning so a kernel developer can debug it.

    Complicating error handling by introducing similar-looking return code
    names just makes it easier to mess up accidentally, hence it _reduces_
    robustness.

    > --- linux-compile.git.orig/include/linux/init.h 2008-10-20 19:39:54.000000000 -0400
    > +++ linux-compile.git/include/linux/init.h 2008-10-20 19:40:06.000000000 -0400
    > @@ -75,15 +75,15 @@
    >
    >
    > #ifdef MODULE
    > -#define __exitused
    > +#define __exitused notrace
    > #else
    > -#define __exitused __used
    > +#define __exitused __used notrace
    > #endif
    >
    > #define __exit __section(.exit.text) __exitused __cold
    >
    > /* Used for HOTPLUG */
    > -#define __devinit __section(.devinit.text) __cold
    > +#define __devinit __section(.devinit.text) __cold notrace
    > #define __devinitdata __section(.devinit.data)
    > #define __devinitconst __section(.devinit.rodata)
    > #define __devexit __section(.devexit.text) __exitused __cold
    > @@ -91,7 +91,7 @@
    > #define __devexitconst __section(.devexit.rodata)
    >
    > /* Used for HOTPLUG_CPU */
    > -#define __cpuinit __section(.cpuinit.text) __cold
    > +#define __cpuinit __section(.cpuinit.text) __cold notrace
    > #define __cpuinitdata __section(.cpuinit.data)
    > #define __cpuinitconst __section(.cpuinit.rodata)
    > #define __cpuexit __section(.cpuexit.text) __exitused __cold
    > @@ -99,7 +99,7 @@
    > #define __cpuexitconst __section(.cpuexit.rodata)
    >
    > /* Used for MEMORY_HOTPLUG */
    > -#define __meminit __section(.meminit.text) __cold
    > +#define __meminit __section(.meminit.text) __cold notrace
    > #define __meminitdata __section(.meminit.data)
    > #define __meminitconst __section(.meminit.rodata)
    > #define __memexit __section(.memexit.text) __exitused __cold


    there's no justification given for this in the changelog and the change
    looks fishy.

    > static void ftrace_free_rec(struct dyn_ftrace *rec)
    > {
    > + /*
    > + * No locking, only called from kstop_machine, or
    > + * from module unloading with module locks and interrupts
    > + * disabled to prevent kstop machine from running.
    > + */
    > +
    > + WARN_ON(rec->flags & FTRACE_FL_FREE);


    this should _NOT_ be just a WARN_ON(). It should immediately stop ftrace
    entirely, then print _one_ warning. Then it should never ever run up to
    the next reboot.

    this is a basic principle for instrumentation. If we detect a bug we
    disable ourselves immediately and print a _single_ warning.

    Do _not_ print possibly thousands of warnings and continue as if nothing
    happened ...

    > + /* kprobes was not the fault */
    > + ftrace_kill_atomic();


    while at it, ftrace_kill_atomic() is a misnomer.

    Please use something more understandable and less ambigious, like
    "ftrace_turn_off()". Both 'kill' and 'atomic' are heavily laden phrases
    used for many other things in the kernel.

    And any such facility must work from any context, because we might call
    it from crash paths, etc. So dont name it _atomic() - it must obviously
    be atomic.

    Ingo
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  6. Re: [PATCH 1/2] ftrace: make dynamic ftrace more robust


    On Wed, 22 Oct 2008, Ingo Molnar wrote:

    >
    > * Steven Rostedt wrote:
    >
    > > +enum {
    > > + FTRACE_CODE_MODIFIED,

    >
    > i'd suggest to name it FTRACE_CODE_MODIFIED_OK here, to make it stand
    > out from the failure codes.
    >
    > > + FTRACE_CODE_FAILED_READ,
    > > + FTRACE_CODE_FAILED_CMP,
    > > + FTRACE_CODE_FAILED_WRITE,

    >
    > but maybe we should just use the standard kernel return codes. 0 for
    > success, -EINVAL for the rest. Is there any real value to know exactly
    > why it failed? We just know the modification was fishy (this is an
    > exception situation), and want to stop ftrace ASAP and then print a
    > warning so a kernel developer can debug it.


    Yes it is important to know the reason of failure, since it helps with
    diagnosing the issue.

    >
    > Complicating error handling by introducing similar-looking return code
    > names just makes it easier to mess up accidentally, hence it _reduces_
    > robustness.


    I had in mind for 2.6.29 that I would let an arch add another non-error
    code that says, "FAIL NICELY". This is a way, for example, to let an
    arch not be able to modify the code because it does not have the ability
    yet. Like with the trampoline example. I wanted to let the arch say,
    I do not make this kind of change, but it is not a bug (I didn't modify
    anything) simply ignore. And have ftrace simply remove the record and go
    on.

    >
    > > --- linux-compile.git.orig/include/linux/init.h 2008-10-20 19:39:54.000000000 -0400
    > > +++ linux-compile.git/include/linux/init.h 2008-10-20 19:40:06.000000000 -0400
    > > @@ -75,15 +75,15 @@
    > >
    > >
    > > #ifdef MODULE
    > > -#define __exitused
    > > +#define __exitused notrace
    > > #else
    > > -#define __exitused __used
    > > +#define __exitused __used notrace
    > > #endif
    > >
    > > #define __exit __section(.exit.text) __exitused __cold
    > >
    > > /* Used for HOTPLUG */
    > > -#define __devinit __section(.devinit.text) __cold
    > > +#define __devinit __section(.devinit.text) __cold notrace
    > > #define __devinitdata __section(.devinit.data)
    > > #define __devinitconst __section(.devinit.rodata)
    > > #define __devexit __section(.devexit.text) __exitused __cold
    > > @@ -91,7 +91,7 @@
    > > #define __devexitconst __section(.devexit.rodata)
    > >
    > > /* Used for HOTPLUG_CPU */
    > > -#define __cpuinit __section(.cpuinit.text) __cold
    > > +#define __cpuinit __section(.cpuinit.text) __cold notrace
    > > #define __cpuinitdata __section(.cpuinit.data)
    > > #define __cpuinitconst __section(.cpuinit.rodata)
    > > #define __cpuexit __section(.cpuexit.text) __exitused __cold
    > > @@ -99,7 +99,7 @@
    > > #define __cpuexitconst __section(.cpuexit.rodata)
    > >
    > > /* Used for MEMORY_HOTPLUG */
    > > -#define __meminit __section(.meminit.text) __cold
    > > +#define __meminit __section(.meminit.text) __cold notrace
    > > #define __meminitdata __section(.meminit.data)
    > > #define __meminitconst __section(.meminit.rodata)
    > > #define __memexit __section(.memexit.text) __exitused __cold

    >
    > there's no justification given for this in the changelog and the change
    > looks fishy.


    Sorry, I missed writing this. I had it in other patches, but forgot to
    add the change log here. These are areas, just like the __init section
    that I have no way ok finding out in an arch independent way, what to
    remove from the ftrace records. So by not adding these notraces, we are
    guaranteed to hit the warnings above!

    >
    > > static void ftrace_free_rec(struct dyn_ftrace *rec)
    > > {
    > > + /*
    > > + * No locking, only called from kstop_machine, or
    > > + * from module unloading with module locks and interrupts
    > > + * disabled to prevent kstop machine from running.
    > > + */
    > > +
    > > + WARN_ON(rec->flags & FTRACE_FL_FREE);

    >
    > this should _NOT_ be just a WARN_ON(). It should immediately stop ftrace
    > entirely, then print _one_ warning. Then it should never ever run up to
    > the next reboot.
    >
    > this is a basic principle for instrumentation. If we detect a bug we
    > disable ourselves immediately and print a _single_ warning.
    >
    > Do _not_ print possibly thousands of warnings and continue as if nothing
    > happened ...


    Fine. I'll replace all WARN_ONs with FTRACE_WARN_ONS.

    >
    > > + /* kprobes was not the fault */
    > > + ftrace_kill_atomic();

    >
    > while at it, ftrace_kill_atomic() is a misnomer.
    >
    > Please use something more understandable and less ambigious, like
    > "ftrace_turn_off()". Both 'kill' and 'atomic' are heavily laden phrases
    > used for many other things in the kernel.
    >
    > And any such facility must work from any context, because we might call
    > it from crash paths, etc. So dont name it _atomic() - it must obviously
    > be atomic.


    The reason for the naming was that ftrace_kill was used when I knew
    something was wrong but not seriously wrong. But enough to disable ftrace
    with the kstop_machine. But fine, I'll fix it.

    -- Steve

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  7. Re: [PATCH 1/2] ftrace: make dynamic ftrace more robust


    On Wed, 22 Oct 2008, Steven Rostedt wrote:
    > >
    > > > + /* kprobes was not the fault */
    > > > + ftrace_kill_atomic();

    > >
    > > while at it, ftrace_kill_atomic() is a misnomer.
    > >
    > > Please use something more understandable and less ambigious, like
    > > "ftrace_turn_off()". Both 'kill' and 'atomic' are heavily laden phrases
    > > used for many other things in the kernel.


    I agree with your "atomic" part but the 'kill' I do not. Yes, it is
    unfortunate that Unix used 'kill' to send signals. But the Unix name is
    the misnomer. The problem with a "ftrace_turn_off" or anything similar,
    is that people will likely use it to temporarily disable ftrace when they
    need to do some sensitive code that they can not allow tracing on.
    Then they will be confused when they can not find a "ftrace_turn_on()".

    We already use "disable" to shut down ftrace and put it back into the
    "NOP" state. We have "stop" and "start" to stop function tracing quickly
    (just a bit is set, no conversion of code).

    I figured "kill" is self explanatory. You use it when you want to kill
    ftrace and do not want it to come back. We have no "ftrace_resurrect"
    (well, not yet ;-)

    I think most developers know the "kill" meaning. If you do not like the
    name, you can change it.

    -- Steve
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  8. Re: [PATCH 1/2] ftrace: make dynamic ftrace more robust


    * Steven Rostedt wrote:

    > > i'd suggest to name it FTRACE_CODE_MODIFIED_OK here, to make it
    > > stand out from the failure codes.
    > >
    > > > + FTRACE_CODE_FAILED_READ,
    > > > + FTRACE_CODE_FAILED_CMP,
    > > > + FTRACE_CODE_FAILED_WRITE,

    > >
    > > but maybe we should just use the standard kernel return codes. 0 for
    > > success, -EINVAL for the rest. Is there any real value to know
    > > exactly why it failed? We just know the modification was fishy (this
    > > is an exception situation), and want to stop ftrace ASAP and then
    > > print a warning so a kernel developer can debug it.

    >
    > Yes it is important to know the reason of failure, since it helps with
    > diagnosing the issue.


    we have everything we need: a warning message. We only add "reason
    debugging" _if and only if_ problems are so frequent in an area of code
    that it's absolutely needed. Otherwise we just fix the bugs, whenever
    they happen.

    > > Complicating error handling by introducing similar-looking return
    > > code names just makes it easier to mess up accidentally, hence it
    > > _reduces_ robustness.

    >
    > I had in mind for 2.6.29 that I would let an arch add another
    > non-error code that says, "FAIL NICELY". [...]


    no ... you are really thinking about robustness in the wrong way.

    This code runs in the deepest guts of the kernel and hence is playing
    with fire and it must be absolutely robust. Not 'nicely diagnosable',
    not 'fail nicely'. But utterly robust in stopping whatever it does early
    enough to make that problem reportable, should it trigger. (which it
    really should not)

    > > > /* Used for MEMORY_HOTPLUG */
    > > > -#define __meminit __section(.meminit.text) __cold
    > > > +#define __meminit __section(.meminit.text) __cold notrace
    > > > #define __meminitdata __section(.meminit.data)
    > > > #define __meminitconst __section(.meminit.rodata)
    > > > #define __memexit __section(.memexit.text) __exitused __cold

    > >
    > > there's no justification given for this in the changelog and the change
    > > looks fishy.

    >
    > Sorry, I missed writing this. I had it in other patches, but forgot to
    > add the change log here. These are areas, just like the __init section
    > that I have no way ok finding out in an arch independent way, what to
    > remove from the ftrace records. So by not adding these notraces, we
    > are guaranteed to hit the warnings above!


    this is utterly fragile and might miss places that insert symbols into
    some of these sections manually.

    the robust approach is to make sure these things are never in an ftrace
    record to begin with. scripts/recordmcount.pl should be taught to only
    record places that it is _100% sure of is traceable_. Not "everything
    and we'll sort out the stuff that we think is not okay".

    if that needs arch dependent smarts then so be it - ftrace has to be
    enabled per arch anyway.

    Ingo
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  9. Re: [PATCH 1/2] ftrace: make dynamic ftrace more robust


    On Wed, 22 Oct 2008, Ingo Molnar wrote:
    >
    > > > > /* Used for MEMORY_HOTPLUG */
    > > > > -#define __meminit __section(.meminit.text) __cold
    > > > > +#define __meminit __section(.meminit.text) __cold notrace
    > > > > #define __meminitdata __section(.meminit.data)
    > > > > #define __meminitconst __section(.meminit.rodata)
    > > > > #define __memexit __section(.memexit.text) __exitused __cold
    > > >
    > > > there's no justification given for this in the changelog and the change
    > > > looks fishy.

    > >
    > > Sorry, I missed writing this. I had it in other patches, but forgot to
    > > add the change log here. These are areas, just like the __init section
    > > that I have no way ok finding out in an arch independent way, what to
    > > remove from the ftrace records. So by not adding these notraces, we
    > > are guaranteed to hit the warnings above!

    >
    > this is utterly fragile and might miss places that insert symbols into
    > some of these sections manually.
    >
    > the robust approach is to make sure these things are never in an ftrace
    > record to begin with. scripts/recordmcount.pl should be taught to only
    > record places that it is _100% sure of is traceable_. Not "everything
    > and we'll sort out the stuff that we think is not okay".
    >
    > if that needs arch dependent smarts then so be it - ftrace has to be
    > enabled per arch anyway.


    In that case, the only reasonable patch, until we do the above is this.

    Signed-off-by: Steven Rostedt
    ---
    kernel/trace/Kconfig | 3 ++-
    1 file changed, 2 insertions(+), 1 deletion(-)

    Index: linux-compile.git/kernel/trace/Kconfig
    ================================================== =================
    --- linux-compile.git.orig/kernel/trace/Kconfig 2008-10-21 17:07:47.000000000 -0400
    +++ linux-compile.git/kernel/trace/Kconfig 2008-10-22 08:05:40.000000000 -0400
    @@ -159,10 +159,11 @@ config STACK_TRACER
    Say N if unsure.

    config DYNAMIC_FTRACE
    - bool "enable/disable ftrace tracepoints dynamically"
    + bool "enable/disable ftrace tracepoints dynamically (BROKEN)"
    depends on FTRACE
    depends on HAVE_DYNAMIC_FTRACE
    depends on DEBUG_KERNEL
    + depends on BROKEN
    default y
    help
    This option will modify all the calls to ftrace dynamically
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread