[RFC patch 00/27] Jump-based NMI-safe immediate values and markers for sched-devel.git - Kernel

This is a discussion on [RFC patch 00/27] Jump-based NMI-safe immediate values and markers for sched-devel.git - Kernel ; Let markers use the heavily optimized imv_cond() version of immediate values. Signed-off-by: Mathieu Desnoyers --- include/linux/marker.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: linux-2.6-lttng/include/linux/marker.h ================================================== ================= --- linux-2.6-lttng.orig/include/linux/marker.h 2008-04-16 00:16:52.000000000 -0400 +++ linux-2.6-lttng/include/linux/marker.h 2008-04-16 00:17:12.000000000 -0400 ...

+ Reply to Thread
Page 1 of 2 1 2 LastLast
Results 1 to 20 of 36

Thread: [RFC patch 00/27] Jump-based NMI-safe immediate values and markers for sched-devel.git

  1. [RFC patch 27/27] Markers use imv jump

    Let markers use the heavily optimized imv_cond() version of immediate values.

    Signed-off-by: Mathieu Desnoyers
    ---
    include/linux/marker.h | 2 +-
    1 file changed, 1 insertion(+), 1 deletion(-)

    Index: linux-2.6-lttng/include/linux/marker.h
    ================================================== =================
    --- linux-2.6-lttng.orig/include/linux/marker.h 2008-04-16 00:16:52.000000000 -0400
    +++ linux-2.6-lttng/include/linux/marker.h 2008-04-16 00:17:12.000000000 -0400
    @@ -76,7 +76,7 @@ struct marker {
    { __mark_empty_function, NULL}, NULL }; \
    __mark_check_format(format, ## args); \
    if (!generic) { \
    - if (unlikely(imv_read(__mark_##name.state))) \
    + if (unlikely(imv_cond(__mark_##name.state))) \
    (*__mark_##name.call) \
    (&__mark_##name, call_private, \
    ## args); \

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. [RFC patch 24/27] Immediate Values Use Arch NMI and MCE Support

    Remove the architecture agnostic code now replaced by architecture specific,
    atomic instruction updates.

    Signed-off-by: Mathieu Desnoyers
    ---
    include/linux/immediate.h | 11 ------
    kernel/immediate.c | 73 +---------------------------------------------
    2 files changed, 3 insertions(+), 81 deletions(-)

    Index: linux-2.6-lttng/kernel/immediate.c
    ================================================== =================
    --- linux-2.6-lttng.orig/kernel/immediate.c 2008-04-11 09:41:33.000000000 -0400
    +++ linux-2.6-lttng/kernel/immediate.c 2008-04-14 18:48:05.000000000 -0400
    @@ -19,92 +19,23 @@
    #include
    #include
    #include
    -#include
    -#include

    #include
    -#include

    /*
    * Kernel ready to execute the SMP update that may depend on trap and ipi.
    */
    static int imv_early_boot_complete;
    -static int wrote_text;

    extern struct __imv __start___imv[];
    extern struct __imv __stop___imv[];

    -static int stop_machine_imv_update(void *imv_ptr)
    -{
    - struct __imv *imv = imv_ptr;
    -
    - if (!wrote_text) {
    - text_poke((void *)imv->imv, (void *)imv->var, imv->size);
    - wrote_text = 1;
    - smp_wmb(); /* make sure other cpus see that this has run */
    - } else
    - sync_core();
    -
    - flush_icache_range(imv->imv, imv->imv + imv->size);
    -
    - return 0;
    -}
    -
    /*
    * imv_mutex nests inside module_mutex. imv_mutex protects builtin
    * immediates and module immediates.
    */
    static DEFINE_MUTEX(imv_mutex);

    -
    -/**
    - * apply_imv_update - update one immediate value
    - * @imv: pointer of type const struct __imv to update
    - *
    - * Update one immediate value. Must be called with imv_mutex held.
    - * It makes sure all CPUs are not executing the modified code by having them
    - * busy looping with interrupts disabled.
    - * It does _not_ protect against NMI and MCE (could be a problem with Intel's
    - * errata if we use immediate values in their code path).
    - */
    -static int apply_imv_update(const struct __imv *imv)
    -{
    - /*
    - * If the variable and the instruction have the same value, there is
    - * nothing to do.
    - */
    - switch (imv->size) {
    - case 1: if (*(uint8_t *)imv->imv
    - == *(uint8_t *)imv->var)
    - return 0;
    - break;
    - case 2: if (*(uint16_t *)imv->imv
    - == *(uint16_t *)imv->var)
    - return 0;
    - break;
    - case 4: if (*(uint32_t *)imv->imv
    - == *(uint32_t *)imv->var)
    - return 0;
    - break;
    - case 8: if (*(uint64_t *)imv->imv
    - == *(uint64_t *)imv->var)
    - return 0;
    - break;
    - default:return -EINVAL;
    - }
    -
    - if (imv_early_boot_complete) {
    - kernel_text_lock();
    - wrote_text = 0;
    - stop_machine_run(stop_machine_imv_update, (void *)imv,
    - ALL_CPUS);
    - kernel_text_unlock();
    - } else
    - text_poke_early((void *)imv->imv, (void *)imv->var,
    - imv->size);
    - return 0;
    -}
    -
    /**
    * imv_update_range - Update immediate values in a range
    * @begin: pointer to the beginning of the range
    @@ -121,7 +52,9 @@ void imv_update_range(const struct __imv
    mutex_lock(&imv_mutex);
    if (!iter->imv) /* Skip removed __init immediate values */
    goto skip;
    - ret = apply_imv_update(iter);
    + kernel_text_lock();
    + ret = arch_imv_update(iter, !imv_early_boot_complete);
    + kernel_text_unlock();
    if (imv_early_boot_complete && ret)
    printk(KERN_WARNING
    "Invalid immediate value. "
    Index: linux-2.6-lttng/include/linux/immediate.h
    ================================================== =================
    --- linux-2.6-lttng.orig/include/linux/immediate.h 2008-04-11 09:36:58.000000000 -0400
    +++ linux-2.6-lttng/include/linux/immediate.h 2008-04-14 18:46:47.000000000 -0400
    @@ -12,17 +12,6 @@

    #ifdef CONFIG_IMMEDIATE

    -struct __imv {
    - unsigned long var; /* Pointer to the identifier variable of the
    - * immediate value
    - */
    - unsigned long imv; /*
    - * Pointer to the memory location of the
    - * immediate value within the instruction.
    - */
    - unsigned char size; /* Type size. */
    -} __attribute__ ((packed));
    -
    #include

    /**

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  3. [RFC patch 18/27] Markers - remove extra format argument


    Denys Vlasenko :

    > Not in this patch, but I noticed:
    >
    > #define __trace_mark(name, call_private, format, args...) \
    > do { \
    > static const char __mstrtab_##name[] \
    > __attribute__((section("__markers_strings"))) \
    > = #name "\0" format; \
    > static struct marker __mark_##name \
    > __attribute__((section("__markers"), aligned(8))) = \
    > { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \
    > 0, 0, marker_probe_cb, \
    > { __mark_empty_function, NULL}, NULL }; \
    > __mark_check_format(format, ## args); \
    > if (unlikely(__mark_##name.state)) { \
    > (*__mark_##name.call) \
    > (&__mark_##name, call_private, \
    > format, ## args); \
    > } \
    > } while (0)
    >
    > In this call:
    >
    > (*__mark_##name.call) \
    > (&__mark_##name, call_private, \
    > format, ## args); \
    >
    > you make gcc allocate duplicate format string. You can use
    > &__mstrtab_##name[sizeof(#name)] instead since it holds the same string,
    > or drop ", format," above and "const char *fmt" from here:
    >
    > void (*call)(const struct marker *mdata, /* Probe wrapper */
    > void *call_private, const char *fmt, ...);
    >
    > since mdata->format is the same and all callees which need it can take it there.


    Very good point. I actually thought about dropping it, since it would
    remove an unnecessary argument from the stack. And actually, since I now
    have the marker_probe_cb sitting between the marker site and the
    callbacks, there is no API change required. Thanks

    Mathieu

    Signed-off-by: Mathieu Desnoyers
    CC: Denys Vlasenko
    ---
    include/linux/marker.h | 11 +++++------
    kernel/marker.c | 30 ++++++++++++++----------------
    2 files changed, 19 insertions(+), 22 deletions(-)

    Index: linux-2.6-lttng/include/linux/marker.h
    ================================================== =================
    --- linux-2.6-lttng.orig/include/linux/marker.h 2008-03-27 20:51:34.000000000 -0400
    +++ linux-2.6-lttng/include/linux/marker.h 2008-03-27 20:54:55.000000000 -0400
    @@ -44,8 +44,8 @@ struct marker {
    */
    char state; /* Marker state. */
    char ptype; /* probe type : 0 : single, 1 : multi */
    - void (*call)(const struct marker *mdata, /* Probe wrapper */
    - void *call_private, const char *fmt, ...);
    + /* Probe wrapper */
    + void (*call)(const struct marker *mdata, void *call_private, ...);
    struct marker_probe_closure single;
    struct marker_probe_closure *multi;
    } __attribute__((aligned(8)));
    @@ -72,8 +72,7 @@ struct marker {
    __mark_check_format(format, ## args); \
    if (unlikely(__mark_##name.state)) { \
    (*__mark_##name.call) \
    - (&__mark_##name, call_private, \
    - format, ## args); \
    + (&__mark_##name, call_private, ## args);\
    } \
    } while (0)

    @@ -117,9 +116,9 @@ static inline void __printf(1, 2) ___mar
    extern marker_probe_func __mark_empty_function;

    extern void marker_probe_cb(const struct marker *mdata,
    - void *call_private, const char *fmt, ...);
    + void *call_private, ...);
    extern void marker_probe_cb_noarg(const struct marker *mdata,
    - void *call_private, const char *fmt, ...);
    + void *call_private, ...);

    /*
    * Connect a probe to a marker.
    Index: linux-2.6-lttng/kernel/marker.c
    ================================================== =================
    --- linux-2.6-lttng.orig/kernel/marker.c 2008-03-27 20:52:09.000000000 -0400
    +++ linux-2.6-lttng/kernel/marker.c 2008-03-27 20:56:13.000000000 -0400
    @@ -54,8 +54,8 @@ static DEFINE_MUTEX(markers_mutex);
    struct marker_entry {
    struct hlist_node hlist;
    char *format;
    - void (*call)(const struct marker *mdata, /* Probe wrapper */
    - void *call_private, const char *fmt, ...);
    + /* Probe wrapper */
    + void (*call)(const struct marker *mdata, void *call_private, ...);
    struct marker_probe_closure single;
    struct marker_probe_closure *multi;
    int refcount; /* Number of times armed. 0 if disarmed. */
    @@ -90,15 +90,13 @@ EXPORT_SYMBOL_GPL(__mark_empty_function)
    * marker_probe_cb Callback that prepares the variable argument list for probes.
    * @mdata: pointer of type struct marker
    * @call_private: caller site private data
    - * @fmt: format string
    * @...: Variable argument list.
    *
    * Since we do not use "typical" pointer based RCU in the 1 argument case, we
    * need to put a full smp_rmb() in this branch. This is why we do not use
    * rcu_dereference() for the pointer read.
    */
    -void marker_probe_cb(const struct marker *mdata, void *call_private,
    - const char *fmt, ...)
    +void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
    {
    va_list args;
    char ptype;
    @@ -119,8 +117,9 @@ void marker_probe_cb(const struct marker
    /* Must read the ptr before private data. They are not data
    * dependant, so we put an explicit smp_rmb() here. */
    smp_rmb();
    - va_start(args, fmt);
    - func(mdata->single.probe_private, call_private, fmt, &args);
    + va_start(args, call_private);
    + func(mdata->single.probe_private, call_private, mdata->format,
    + &args);
    va_end(args);
    } else {
    struct marker_probe_closure *multi;
    @@ -135,9 +134,9 @@ void marker_probe_cb(const struct marker
    smp_read_barrier_depends();
    multi = mdata->multi;
    for (i = 0; multi[i].func; i++) {
    - va_start(args, fmt);
    - multi[i].func(multi[i].probe_private, call_private, fmt,
    - &args);
    + va_start(args, call_private);
    + multi[i].func(multi[i].probe_private, call_private,
    + mdata->format, &args);
    va_end(args);
    }
    }
    @@ -149,13 +148,11 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
    * marker_probe_cb Callback that does not prepare the variable argument list.
    * @mdata: pointer of type struct marker
    * @call_private: caller site private data
    - * @fmt: format string
    * @...: Variable argument list.
    *
    * Should be connected to markers "MARK_NOARGS".
    */
    -void marker_probe_cb_noarg(const struct marker *mdata,
    - void *call_private, const char *fmt, ...)
    +void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
    {
    va_list args; /* not initialized */
    char ptype;
    @@ -171,7 +168,8 @@ void marker_probe_cb_noarg(const struct
    /* Must read the ptr before private data. They are not data
    * dependant, so we put an explicit smp_rmb() here. */
    smp_rmb();
    - func(mdata->single.probe_private, call_private, fmt, &args);
    + func(mdata->single.probe_private, call_private, mdata->format,
    + &args);
    } else {
    struct marker_probe_closure *multi;
    int i;
    @@ -185,8 +183,8 @@ void marker_probe_cb_noarg(const struct
    smp_read_barrier_depends();
    multi = mdata->multi;
    for (i = 0; multi[i].func; i++)
    - multi[i].func(multi[i].probe_private, call_private, fmt,
    - &args);
    + multi[i].func(multi[i].probe_private, call_private,
    + mdata->format, &args);
    }
    preempt_enable();
    }

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  4. [RFC patch 00/27] Jump-based NMI-safe immediate values and markers for sched-devel.git

    Hi Ingo,

    Here is the patchset you requested. I did not port the marker reintegration
    to your sched-devel tree though, because many changes happened since you have
    done the original work.

    It applies on top of sched-devel.git latest.

    You will notice that I implemented what we discussed yesterday : using nops and
    jump for the heavily optimized version of markers. Comments are welcome. Running
    this with my ~120 LTTng markers on x86_32 detects 97% of the sites. 4 out of 120
    did had to fall back on the standard immediate values because they have been
    manipulated by gcc optimizations. The sched-devel.git port has been tested
    on x86_32. Patches before the port are tested on x86_32 and x86_64.

    Note that some folding of the immediate values patches could eventually be
    required. At that point, add-all-cpus-option-to-stop-machine-run.patch would
    become useless.

    The series order is the following :

    make-marker_debug-static.patch # in -mm
    x86-nmi-safe-int3-and-page-fault.patch
    check-for-breakpoint-in-text-poke-to-eliminate-bug-on.patch
    #Kprobes mutex cleanup
    kprobes-use-mutex-for-insn-pages.patch
    kprobes-dont-use-kprobes-mutex-in-arch-code.patch
    kprobes-declare-kprobes-mutex-static.patch
    #Text Edit Lock (depends on Enhance DEBUG_RODATA and kprobes mutex cleanup)
    text-edit-lock-architecture-independent-code.patch
    text-edit-lock-kprobes-architecture-independent-support.patch
    #
    #Immediate Values
    add-all-cpus-option-to-stop-machine-run.patch
    immediate-values-architecture-independent-code.patch
    immediate-values-kconfig-menu-in-embedded.patch
    immediate-values-x86-optimization.patch
    add-text-poke-and-sync-core-to-powerpc.patch
    immediate-values-powerpc-optimization.patch
    immediate-values-documentation.patch
    immediate-values-support-init.patch
    #
    scheduler-profiling-use-immediate-values.patch
    #
    markers-remove-extra-format-argument.patch
    markers-define-non-optimized-marker.patch
    #
    immediate-values-move-kprobes-x86-restore-interrupt-to-kdebug-h.patch
    add-discard-section-to-x86.patch
    immediate-values-x86-optimization-nmi-mce-support.patch
    immediate-values-powerpc-optimization-nmi-mce-support.patch
    immediate-values-use-arch-nmi-mce-support.patch
    linux-kernel-markers-immediate-values.patch
    #
    immediate-values-jump.patch
    markers-use-imv-jump.patch


    Mathieu

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  5. [RFC patch 19/27] Markers - define non optimized marker

    To support the forthcoming "immediate values" marker optimization, we must have
    a way to declare markers in few code paths that does not use instruction
    modification based enable. This will be the case of printk(), some traps and
    eventually lockdep instrumentation.

    Changelog :
    - Fix reversed boolean logic of "generic".

    Signed-off-by: Mathieu Desnoyers
    ---
    include/linux/marker.h | 29 ++++++++++++++++++++++++-----
    1 file changed, 24 insertions(+), 5 deletions(-)

    Index: linux-2.6-lttng/include/linux/marker.h
    ================================================== =================
    --- linux-2.6-lttng.orig/include/linux/marker.h 2008-03-27 20:47:44.000000000 -0400
    +++ linux-2.6-lttng/include/linux/marker.h 2008-03-27 20:49:04.000000000 -0400
    @@ -58,8 +58,12 @@ struct marker {
    * Make sure the alignment of the structure in the __markers section will
    * not add unwanted padding between the beginning of the section and the
    * structure. Force alignment to the same alignment as the section start.
    + *
    + * The "generic" argument controls which marker enabling mechanism must be used.
    + * If generic is true, a variable read is used.
    + * If generic is false, immediate values are used.
    */
    -#define __trace_mark(name, call_private, format, args...) \
    +#define __trace_mark(generic, name, call_private, format, args...) \
    do { \
    static const char __mstrtab_##name[] \
    __attribute__((section("__markers_strings"))) \
    @@ -79,7 +83,7 @@ struct marker {
    extern void marker_update_probe_range(struct marker *begin,
    struct marker *end);
    #else /* !CONFIG_MARKERS */
    -#define __trace_mark(name, call_private, format, args...) \
    +#define __trace_mark(generic, name, call_private, format, args...) \
    __mark_check_format(format, ## args)
    static inline void marker_update_probe_range(struct marker *begin,
    struct marker *end)
    @@ -87,15 +91,30 @@ static inline void marker_update_probe_r
    #endif /* CONFIG_MARKERS */

    /**
    - * trace_mark - Marker
    + * trace_mark - Marker using code patching
    * @name: marker name, not quoted.
    * @format: format string
    * @args...: variable argument list
    *
    - * Places a marker.
    + * Places a marker using optimized code patching technique (imv_read())
    + * to be enabled when immediate values are present.
    */
    #define trace_mark(name, format, args...) \
    - __trace_mark(name, NULL, format, ## args)
    + __trace_mark(0, name, NULL, format, ## args)
    +
    +/**
    + * _trace_mark - Marker using variable read
    + * @name: marker name, not quoted.
    + * @format: format string
    + * @args...: variable argument list
    + *
    + * Places a marker using a standard memory read (_imv_read()) to be
    + * enabled. Should be used for markers in code paths where instruction
    + * modification based enabling is not welcome. (__init and __exit functions,
    + * lockdep, some traps, printk).
    + */
    +#define _trace_mark(name, format, args...) \
    + __trace_mark(1, name, NULL, format, ## args)

    /**
    * MARK_NOARGS - Format string for a marker with no argument.

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  6. [RFC patch 07/27] Text Edit Lock - Architecture Independent Code

    This is an architecture independant synchronization around kernel text
    modifications through use of a global mutex.

    A mutex has been chosen so that kprobes, the main user of this, can sleep during
    memory allocation between the memory read of the instructions it must replace
    and the memory write of the breakpoint.

    Other user of this interface: immediate values.

    Paravirt and alternatives are always done when SMP is inactive, so there is no
    need to use locks.

    Signed-off-by: Mathieu Desnoyers
    CC: Andi Kleen
    CC: Ingo Molnar
    ---
    include/linux/memory.h | 7 +++++++
    mm/memory.c | 34 ++++++++++++++++++++++++++++++++++
    2 files changed, 41 insertions(+)

    Index: linux-2.6-lttng/include/linux/memory.h
    ================================================== =================
    --- linux-2.6-lttng.orig/include/linux/memory.h 2008-04-08 12:01:44.000000000 -0400
    +++ linux-2.6-lttng/include/linux/memory.h 2008-04-08 12:01:56.000000000 -0400
    @@ -93,4 +93,11 @@ extern int memory_notify(unsigned long v
    #define hotplug_memory_notifier(fn, pri) do { } while (0)
    #endif

    +/*
    + * Take and release the kernel text modification lock, used for code patching.
    + * Users of this lock can sleep.
    + */
    +extern void kernel_text_lock(void);
    +extern void kernel_text_unlock(void);
    +
    #endif /* _LINUX_MEMORY_H_ */
    Index: linux-2.6-lttng/mm/memory.c
    ================================================== =================
    --- linux-2.6-lttng.orig/mm/memory.c 2008-04-08 12:01:44.000000000 -0400
    +++ linux-2.6-lttng/mm/memory.c 2008-04-08 12:01:56.000000000 -0400
    @@ -51,6 +51,8 @@
    #include
    #include
    #include
    +#include
    +#include

    #include
    #include
    @@ -96,6 +98,12 @@ int randomize_va_space __read_mostly =
    2;
    #endif

    +/*
    + * mutex protecting text section modification (dynamic code patching).
    + * some users need to sleep (allocating memory...) while they hold this lock.
    + */
    +static DEFINE_MUTEX(text_mutex);
    +
    static int __init disable_randmaps(char *s)
    {
    randomize_va_space = 0;
    @@ -2737,3 +2745,29 @@ void print_vma_addr(char *prefix, unsign
    }
    up_read(&current->mm->mmap_sem);
    }
    +
    +/**
    + * kernel_text_lock - Take the kernel text modification lock
    + *
    + * Insures mutual write exclusion of kernel and modules text live text
    + * modification. Should be used for code patching.
    + * Users of this lock can sleep.
    + */
    +void __kprobes kernel_text_lock(void)
    +{
    + mutex_lock(&text_mutex);
    +}
    +EXPORT_SYMBOL_GPL(kernel_text_lock);
    +
    +/**
    + * kernel_text_unlock - Release the kernel text modification lock
    + *
    + * Insures mutual write exclusion of kernel and modules text live text
    + * modification. Should be used for code patching.
    + * Users of this lock can sleep.
    + */
    +void __kprobes kernel_text_unlock(void)
    +{
    + mutex_unlock(&text_mutex);
    +}
    +EXPORT_SYMBOL_GPL(kernel_text_unlock);

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  7. [RFC patch 26/27] Immediate Values - Jump

    Adds a new imv_cond() macro to declare a byte read that is meant to be embedded
    in unlikely(imv_cond(var)), so the kernel can dynamically detect patterns such
    as mov, test, jne or mov, test, je and patch it with nops and a jump.

    Signed-off-by: Mathieu Desnoyers
    ---
    arch/x86/kernel/immediate.c | 381 ++++++++++++++++++++++++++++++++--------
    include/asm-powerpc/immediate.h | 2
    include/asm-x86/immediate.h | 34 +++
    include/linux/immediate.h | 11 -
    kernel/immediate.c | 6
    5 files changed, 359 insertions(+), 75 deletions(-)

    Index: linux-2.6-lttng/include/asm-x86/immediate.h
    ================================================== =================
    --- linux-2.6-lttng.orig/include/asm-x86/immediate.h 2008-04-16 14:04:47.000000000 -0400
    +++ linux-2.6-lttng/include/asm-x86/immediate.h 2008-04-16 14:19:13.000000000 -0400
    @@ -20,6 +20,7 @@ struct __imv {
    * Pointer to the memory location of the
    * immediate value within the instruction.
    */
    + int jmp_off; /* offset for jump target */
    unsigned char size; /* Type size. */
    unsigned char insn_size;/* Instruction size. */
    } __attribute__ ((packed));
    @@ -57,6 +58,7 @@ struct __imv {
    ".previous\n\t" \
    ".section __imv,\"aw\",@progbits\n\t" \
    _ASM_PTR "%c1, (3f)-%c2\n\t" \
    + ".int 0\n\t" \
    ".byte %c2, (2b-1b)\n\t" \
    ".previous\n\t" \
    "mov $0,%0\n\t" \
    @@ -74,6 +76,7 @@ struct __imv {
    ".previous\n\t" \
    ".section __imv,\"aw\",@progbits\n\t" \
    _ASM_PTR "%c1, (3f)-%c2\n\t" \
    + ".int 0\n\t" \
    ".byte %c2, (2b-1b)\n\t" \
    ".previous\n\t" \
    ".org . + ((-.-(2b-1b)) & (%c2-1)), 0x90\n\t" \
    @@ -95,6 +98,7 @@ struct __imv {
    ".previous\n\t" \
    ".section __imv,\"aw\",@progbits\n\t" \
    _ASM_PTR "%c1, (3f)-%c2\n\t" \
    + ".int 0\n\t" \
    ".byte %c2, (2b-1b)\n\t" \
    ".previous\n\t" \
    ".org . + ((-.-(2b-1b)) & (%c2-1)), 0x90\n\t" \
    @@ -108,6 +112,34 @@ struct __imv {
    value; \
    })

    -extern int arch_imv_update(const struct __imv *imv, int early);
    +/*
    + * Uses %al.
    + * size is 0.
    + * Use in if (unlikely(imv_cond(var)))
    + * Given a char as argument.
    + */
    +#define imv_cond(name) \
    + ({ \
    + __typeof__(name##__imv) value; \
    + BUILD_BUG_ON(sizeof(value) > 1); \
    + asm (".section __discard,\"\",@progbits\n\t" \
    + "1:\n\t" \
    + "mov $0,%0\n\t" \
    + "2:\n\t" \
    + ".previous\n\t" \
    + ".section __imv,\"aw\",@progbits\n\t" \
    + _ASM_PTR "%c1, (3f)-1\n\t" \
    + ".int 0\n\t" \
    + ".byte %c2, (2b-1b)\n\t" \
    + ".previous\n\t" \
    + "mov $0,%0\n\t" \
    + "3:\n\t" \
    + : "=a" (value) \
    + : "i" (&name##__imv), \
    + "i" (0)); \
    + value; \
    + })
    +
    +extern int arch_imv_update(struct __imv *imv, int early);

    #endif /* _ASM_X86_IMMEDIATE_H */
    Index: linux-2.6-lttng/arch/x86/kernel/immediate.c
    ================================================== =================
    --- linux-2.6-lttng.orig/arch/x86/kernel/immediate.c 2008-04-16 14:04:47.000000000 -0400
    +++ linux-2.6-lttng/arch/x86/kernel/immediate.c 2008-04-16 14:06:17.000000000 -0400
    @@ -80,13 +80,19 @@
    #include

    #define BREAKPOINT_INSTRUCTION 0xcc
    +#define JMP_REL8 0xeb
    +#define JMP_REL32 0xe9
    +#define INSN_NOP1 0x90
    +#define INSN_NOP2 0x89, 0xf6
    #define BREAKPOINT_INS_LEN 1
    #define NR_NOPS 10

    +/*#define DEBUG_IMMEDIATE 1*/
    +
    static unsigned long target_after_int3; /* EIP of the target after the int3 */
    static unsigned long bypass_eip; /* EIP of the bypass. */
    static unsigned long bypass_after_int3; /* EIP after the end-of-bypass int3 */
    -static unsigned long after_imv; /*
    +static unsigned long after_imv; /*
    * EIP where to resume after the
    * single-stepping.
    */
    @@ -142,6 +148,25 @@ static int imv_notifier(struct notifier_

    if (die_val == DIE_INT3) {
    if (args->regs->ip == target_after_int3) {
    + /* deal with non-relocatable jmp instructions */
    + switch (*(uint8_t *)bypass_eip) {
    + case JMP_REL8: /* eb cb jmp rel8 */
    + args->regs->ip +=
    + *(signed char *)(bypass_eip + 1) + 1;
    + return NOTIFY_STOP;
    + case JMP_REL32: /* e9 cw jmp rel16 (valid on ia32) */
    + /* e9 cd jmp rel32 */
    + args->regs->ip +=
    + *(int *)(bypass_eip + 1) + 4;
    + return NOTIFY_STOP;
    + case INSN_NOP1:
    + /* deal with insertion of nop + jmp_rel32 */
    + if (*((uint8_t *)bypass_eip + 1) == JMP_REL32) {
    + args->regs->ip +=
    + *(int *)(bypass_eip + 2) + 5;
    + return NOTIFY_STOP;
    + }
    + }
    preempt_disable();
    args->regs->ip = bypass_eip;
    return NOTIFY_STOP;
    @@ -159,71 +184,107 @@ static struct notifier_block imv_notify
    .priority = 0x7fffffff, /* we need to be notified first */
    };

    -/**
    - * arch_imv_update - update one immediate value
    - * @imv: pointer of type const struct __imv to update
    - * @early: early boot (1) or normal (0)
    - *
    - * Update one immediate value. Must be called with imv_mutex held.
    +/*
    + * returns -1 if not found
    + * return 0 if found.
    */
    -__kprobes int arch_imv_update(const struct __imv *imv, int early)
    +static inline int detect_mov_test_jne(uint8_t *addr, uint8_t **opcode,
    + uint8_t **jmp_offset, int *offset_len)
    {
    - int ret;
    - unsigned char opcode_size = imv->insn_size - imv->size;
    - unsigned long insn = imv->imv - opcode_size;
    - unsigned long len;
    - char *vaddr;
    - struct page *pages[1];
    -
    -#ifdef CONFIG_KPROBES
    - /*
    - * Fail if a kprobe has been set on this instruction.
    - * (TODO: we could eventually do better and modify all the (possibly
    - * nested) kprobes for this site if kprobes had an API for this.
    - */
    - if (unlikely(!early
    - && *(unsigned char *)insn == BREAKPOINT_INSTRUCTION)) {
    - printk(KERN_WARNING "Immediate value in conflict with kprobe. "
    - "Variable at %p, "
    - "instruction at %p, size %hu\n",
    - (void *)imv->imv,
    - (void *)imv->var, imv->size);
    - return -EBUSY;
    - }
    -#endif
    -
    - /*
    - * If the variable and the instruction have the same value, there is
    - * nothing to do.
    - */
    - switch (imv->size) {
    - case 1: if (*(uint8_t *)imv->imv
    - == *(uint8_t *)imv->var)
    - return 0;
    - break;
    - case 2: if (*(uint16_t *)imv->imv
    - == *(uint16_t *)imv->var)
    - return 0;
    - break;
    - case 4: if (*(uint32_t *)imv->imv
    - == *(uint32_t *)imv->var)
    + printk(KERN_DEBUG "Trying at %p %hx %hx %hx %hx %hx %hx\n",
    + addr, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
    + /* b0 cb movb cb,%al */
    + if (addr[0] != 0xb0)
    + return -1;
    + /* 84 c0 test %al,%al */
    + if (addr[2] != 0x84 || addr[3] != 0xc0)
    + return -1;
    + printk(KERN_DEBUG "Found test %%al,%%al at %p\n", addr + 2);
    + switch (addr[4]) {
    + case 0x75: /* 75 cb jne rel8 */
    + printk(KERN_DEBUG "Found jne rel8 at %p\n", addr + 4);
    + *opcode = addr + 4;
    + *jmp_offset = addr + 5;
    + *offset_len = 1;
    + return 0;
    + case 0x0f:
    + switch (addr[5]) {
    + case 0x85: /* 0F 85 cw jne rel16 (valid on ia32) */
    + /* 0F 85 cd jne rel32 */
    + printk(KERN_DEBUG "Found jne rel16/32 at %p\n",
    + addr + 5);
    + *opcode = addr + 4;
    + *jmp_offset = addr + 6;
    + *offset_len = 4;
    return 0;
    + default:
    + return -1;
    + }
    break;
    -#ifdef CONFIG_X86_64
    - case 8: if (*(uint64_t *)imv->imv
    - == *(uint64_t *)imv->var)
    + default: return -1;
    + }
    +}
    +
    +/*
    + * returns -1 if not found
    + * return 0 if found.
    + */
    +static inline int detect_mov_test_je(uint8_t *addr, uint8_t **opcode,
    + uint8_t **jmp_offset, int *offset_len)
    +{
    + /* b0 cb movb cb,%al */
    + if (addr[0] != 0xb0)
    + return -1;
    + /* 84 c0 test %al,%al */
    + if (addr[2] != 0x84 || addr[3] != 0xc0)
    + return -1;
    + printk(KERN_DEBUG "Found test %%al,%%al at %p\n", addr + 2);
    + switch (addr[4]) {
    + case 0x74: /* 74 cb je rel8 */
    + printk(KERN_DEBUG "Found je rel8 at %p\n", addr + 4);
    + *opcode = addr + 4;
    + *jmp_offset = addr + 5;
    + *offset_len = 1;
    + return 0;
    + case 0x0f:
    + switch (addr[5]) {
    + case 0x84: /* 0F 84 cw je rel16 (valid on ia32) */
    + /* 0F 84 cd je rel32 */
    + printk(KERN_DEBUG "Found je rel16/32 at %p\n",
    + addr + 5);
    + *opcode = addr + 4;
    + *jmp_offset = addr + 6;
    + *offset_len = 4;
    return 0;
    + default:
    + return -1;
    + }
    break;
    -#endif
    - default:return -EINVAL;
    + default: return -1;
    }
    +}
    +
    +static int static_early;

    - if (!early) {
    - /* bypass is 10 bytes long for x86_64 long */
    - WARN_ON(imv->insn_size > 10);
    - _imv_bypass(&bypass_eip, &bypass_after_int3);
    +/*
    + * Marked noinline because we prefer to have only one _imv_bypass. Not that it
    + * is required, but there is no need to edit two bypasses.
    + */
    +static noinline int replace_instruction_safe(uint8_t *addr, uint8_t *newcode,
    + int size)
    +{
    + char *vaddr;
    + struct page *pages[1];
    + int len;
    + int ret;
    +
    + /* bypass is 10 bytes long for x86_64 long */
    + WARN_ON(size > 10);
    +
    + _imv_bypass(&bypass_eip, &bypass_after_int3);

    - after_imv = imv->imv + imv->size;
    + if (!static_early) {
    + after_imv = (unsigned long)addr + size;

    /*
    * Using the _early variants because nobody is executing the
    @@ -238,22 +299,23 @@ __kprobes int arch_imv_update(const stru
    vaddr = vmap(pages, 1, VM_MAP, PAGE_KERNEL);
    BUG_ON(!vaddr);
    text_poke_early(&vaddr[bypass_eip & ~PAGE_MASK],
    - (void *)insn, imv->insn_size);
    + (void *)addr, size);
    /*
    * Fill the rest with nops.
    */
    - len = NR_NOPS - imv->insn_size;
    + len = NR_NOPS - size;
    add_nops((void *)
    - &vaddr[(bypass_eip & ~PAGE_MASK) + imv->insn_size],
    + &vaddr[(bypass_eip & ~PAGE_MASK) + size],
    len);
    vunmap(vaddr);

    - target_after_int3 = insn + BREAKPOINT_INS_LEN;
    + target_after_int3 = (unsigned long)addr + BREAKPOINT_INS_LEN;
    /* register_die_notifier has memory barriers */
    register_die_notifier(&imv_notify);
    - /* The breakpoint will single-step the bypass */
    - text_poke((void *)insn,
    - ((unsigned char[]){BREAKPOINT_INSTRUCTION}), 1);
    + /* The breakpoint will execute the bypass */
    + text_poke((void *)addr,
    + ((unsigned char[]){BREAKPOINT_INSTRUCTION}),
    + BREAKPOINT_INS_LEN);
    /*
    * Make sure the breakpoint is set before we continue (visible
    * to other CPUs and interrupts).
    @@ -265,14 +327,18 @@ __kprobes int arch_imv_update(const stru
    ret = on_each_cpu(imv_synchronize_core, NULL, 1, 1);
    BUG_ON(ret != 0);

    - text_poke((void *)(insn + opcode_size), (void *)imv->var,
    - imv->size);
    + text_poke((void *)(addr + BREAKPOINT_INS_LEN),
    + &newcode[BREAKPOINT_INS_LEN],
    + size - BREAKPOINT_INS_LEN);
    /*
    * Make sure the value can be seen from other CPUs and
    * interrupts.
    */
    wmb();
    - text_poke((void *)insn, (unsigned char *)bypass_eip, 1);
    +#ifdef DEBUG_IMMEDIATE
    + mdelay(10); /* lets the breakpoint for a while */
    +#endif
    + text_poke(addr, newcode, BREAKPOINT_INS_LEN);
    /*
    * Wait for all int3 handlers to end (interrupts are disabled in
    * int3). This CPU is clearly not in a int3 handler, because
    @@ -285,7 +351,184 @@ __kprobes int arch_imv_update(const stru
    unregister_die_notifier(&imv_notify);
    /* unregister_die_notifier has memory barriers */
    } else
    - text_poke_early((void *)imv->imv, (void *)imv->var,
    - imv->size);
    + text_poke_early(addr, newcode, size);
    + return 0;
    +}
    +
    +static int patch_jump_target(struct __imv *imv)
    +{
    + uint8_t *opcode, *jmp_offset;
    + int offset_len;
    + int mov_test_j_found = 0;
    +
    + if(!detect_mov_test_jne((uint8_t *)imv->imv - 1,
    + &opcode, &jmp_offset, &offset_len)) {
    + imv->insn_size = 1; /* positive logic */
    + mov_test_j_found = 1;
    + } else if(!detect_mov_test_je((uint8_t *)imv->imv - 1,
    + &opcode, &jmp_offset, &offset_len)) {
    + imv->insn_size = 0; /* negative logic */
    + mov_test_j_found = 1;
    + }
    +
    + if (mov_test_j_found) {
    + int logicvar = imv->insn_size ? imv->var : !imv->var;
    + int newoff;
    +
    + if (offset_len == 1) {
    + imv->jmp_off = *(signed char *)jmp_offset;
    + /* replace with JMP_REL8 opcode. */
    + replace_instruction_safe(opcode,
    + ((unsigned char[]){ JMP_REL8,
    + (logicvar ? (signed char)imv->jmp_off : 0) }),
    + 2);
    + } else {
    + /* replace with nop and JMP_REL16/32 opcode.
    + * It's ok to shrink an instruction, never ok to
    + * grow it afterward. */
    + imv->jmp_off = *(int *)jmp_offset;
    + newoff = logicvar ? (int)imv->jmp_off : 0;
    + replace_instruction_safe(opcode,
    + ((unsigned char[]){ INSN_NOP1, JMP_REL32,
    + ((unsigned char *)&newoff)[0],
    + ((unsigned char *)&newoff)[1],
    + ((unsigned char *)&newoff)[2],
    + ((unsigned char *)&newoff)[3] }),
    + 6);
    + }
    + /* now we can get rid of the movb */
    + replace_instruction_safe((uint8_t *)imv->imv - 1,
    + ((unsigned char[]){ INSN_NOP2 }),
    + 2);
    + /* now we can get rid of the testb */
    + replace_instruction_safe((uint8_t *)imv->imv + 1,
    + ((unsigned char[]){ INSN_NOP2 }),
    + 2);
    + /* remember opcode + 1 to enable the JMP_REL patching */
    + if (offset_len == 1)
    + imv->imv = (unsigned long)opcode + 1;
    + else
    + imv->imv = (unsigned long)opcode + 2; /* skip nop */
    + return 0;
    +
    + }
    +
    + if (*((uint8_t *)imv->imv - 1) == JMP_REL8) {
    + int logicvar = imv->insn_size ? imv->var : !imv->var;
    +
    + printk(KERN_DEBUG "Found JMP_REL8 at %p\n",
    + ((uint8_t *)imv->imv - 1));
    + replace_instruction_safe((uint8_t *)imv->imv - 1,
    + ((unsigned char[]){ JMP_REL8,
    + (logicvar ? (signed char)imv->jmp_off : 0) }),
    + 2);
    + return 0;
    + }
    +
    + if (*((uint8_t *)imv->imv - 1) == JMP_REL32) {
    + int logicvar = imv->insn_size ? imv->var : !imv->var;
    + int newoff = logicvar ? (int)imv->jmp_off : 0;
    +
    + printk(KERN_DEBUG "Found JMP_REL32 at %p, update with %x\n",
    + ((uint8_t *)imv->imv - 1), newoff);
    + replace_instruction_safe((uint8_t *)imv->imv - 1,
    + ((unsigned char[]){ JMP_REL32,
    + ((unsigned char *)&newoff)[0],
    + ((unsigned char *)&newoff)[1],
    + ((unsigned char *)&newoff)[2],
    + ((unsigned char *)&newoff)[3] }),
    + 5);
    + return 0;
    + }
    +
    + /* Nothing known found. */
    + return -1;
    +}
    +
    +/**
    + * arch_imv_update - update one immediate value
    + * @imv: pointer of type const struct __imv to update
    + * @early: early boot (1) or normal (0)
    + *
    + * Update one immediate value. Must be called with imv_mutex held.
    + */
    +__kprobes int arch_imv_update(struct __imv *imv, int early)
    +{
    + int ret;
    + uint8_t buf[10];
    + unsigned long insn, opcode_size;
    +
    + static_early = early;
    +
    + /*
    + * If imv_cond is encountered, try to patch it with
    + * patch_jump_target. Continue with normal immediate values if the area
    + * surrounding the instruction is not as expected.
    + */
    + if (imv->size == 0) {
    + ret = patch_jump_target(imv);
    + if (ret) {
    +#ifdef DEBUG_IMMEDIATE
    + static int nr_fail;
    + printk("Jump target fallback at %lX, nr fail %d\n",
    + imv->imv, ++nr_fail);
    +#endif
    + imv->size = 1;
    + } else {
    +#ifdef DEBUG_IMMEDIATE
    + static int nr_success;
    + printk("Jump target at %lX, nr success %d\n",
    + imv->imv, ++nr_success);
    +#endif
    + return 0;
    + }
    + }
    +
    + opcode_size = imv->insn_size - imv->size;
    + insn = imv->imv - opcode_size;
    +
    +#ifdef CONFIG_KPROBES
    + /*
    + * Fail if a kprobe has been set on this instruction.
    + * (TODO: we could eventually do better and modify all the (possibly
    + * nested) kprobes for this site if kprobes had an API for this.
    + */
    + if (unlikely(!early
    + && *(unsigned char *)insn == BREAKPOINT_INSTRUCTION)) {
    + printk(KERN_WARNING "Immediate value in conflict with kprobe. "
    + "Variable at %p, "
    + "instruction at %p, size %hu\n",
    + (void *)imv->var,
    + (void *)imv->imv, imv->size);
    + return -EBUSY;
    + }
    +#endif
    +
    + /*
    + * If the variable and the instruction have the same value, there is
    + * nothing to do.
    + */
    + switch (imv->size) {
    + case 1: if (*(uint8_t *)imv->imv == *(uint8_t *)imv->var)
    + return 0;
    + break;
    + case 2: if (*(uint16_t *)imv->imv == *(uint16_t *)imv->var)
    + return 0;
    + break;
    + case 4: if (*(uint32_t *)imv->imv == *(uint32_t *)imv->var)
    + return 0;
    + break;
    +#ifdef CONFIG_X86_64
    + case 8: if (*(uint64_t *)imv->imv == *(uint64_t *)imv->var)
    + return 0;
    + break;
    +#endif
    + default:return -EINVAL;
    + }
    +
    + memcpy(buf, (uint8_t *)insn, opcode_size);
    + memcpy(&buf[opcode_size], (void *)imv->var, imv->size);
    + replace_instruction_safe((uint8_t *)insn, buf, imv->insn_size);
    +
    return 0;
    }
    Index: linux-2.6-lttng/include/linux/immediate.h
    ================================================== =================
    --- linux-2.6-lttng.orig/include/linux/immediate.h 2008-04-16 14:04:47.000000000 -0400
    +++ linux-2.6-lttng/include/linux/immediate.h 2008-04-16 14:04:48.000000000 -0400
    @@ -33,8 +33,7 @@
    * Internal update functions.
    */
    extern void core_imv_update(void);
    -extern void imv_update_range(const struct __imv *begin,
    - const struct __imv *end);
    +extern void imv_update_range(struct __imv *begin, struct __imv *end);
    extern void imv_unref_core_init(void);
    extern void imv_unref(struct __imv *begin, struct __imv *end, void *start,
    unsigned long size);
    @@ -54,6 +53,14 @@ extern void imv_unref(struct __imv *begi
    #define imv_read(name) _imv_read(name)

    /**
    + * imv_cond - read immediate variable use as condition for if()
    + * @name: immediate value name
    + *
    + * Reads the value of @name.
    + */
    +#define imv_cond _imv_read(name)
    +
    +/**
    * imv_set - set immediate variable (with locking)
    * @name: immediate value name
    * @i: required value
    Index: linux-2.6-lttng/kernel/immediate.c
    ================================================== =================
    --- linux-2.6-lttng.orig/kernel/immediate.c 2008-04-16 14:04:47.000000000 -0400
    +++ linux-2.6-lttng/kernel/immediate.c 2008-04-16 14:04:48.000000000 -0400
    @@ -43,10 +43,10 @@ static DEFINE_MUTEX(imv_mutex);
    *
    * Updates a range of immediates.
    */
    -void imv_update_range(const struct __imv *begin,
    - const struct __imv *end)
    +void imv_update_range(struct __imv *begin,
    + struct __imv *end)
    {
    - const struct __imv *iter;
    + struct __imv *iter;
    int ret;
    for (iter = begin; iter < end; iter++) {
    mutex_lock(&imv_mutex);
    Index: linux-2.6-lttng/include/asm-powerpc/immediate.h
    ================================================== =================
    --- linux-2.6-lttng.orig/include/asm-powerpc/immediate.h 2008-04-16 14:04:47.000000000 -0400
    +++ linux-2.6-lttng/include/asm-powerpc/immediate.h 2008-04-16 14:04:48.000000000 -0400
    @@ -68,6 +68,8 @@ struct __imv {
    value; \
    })

    +#define imv_cond(name) imv_read(name)
    +
    extern int arch_imv_update(const struct __imv *imv, int early);

    #endif /* _ASM_POWERPC_IMMEDIATE_H */

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  8. [RFC patch 03/27] Check for breakpoint in text_poke to eliminate bug_on

    It's ok to modify an instruction non-atomically (multiple memory accesses to a
    large and/or non aligned instruction) *if and only if* we have inserted a
    breakpoint at the beginning of the instruction.

    Signed-off-by: Mathieu Desnoyers
    ---
    arch/x86/kernel/alternative.c | 49 ++++++++++++++++++++++++------------------
    1 file changed, 29 insertions(+), 20 deletions(-)

    Index: linux-2.6-sched-devel/arch/x86/kernel/alternative.c
    ================================================== =================
    --- linux-2.6-sched-devel.orig/arch/x86/kernel/alternative.c 2008-04-16 17:17:59.000000000 -0400
    +++ linux-2.6-sched-devel/arch/x86/kernel/alternative.c 2008-04-16 17:19:53.000000000 -0400
    @@ -15,6 +15,7 @@
    #include

    #define MAX_PATCH_LEN (255-1)
    +#define BREAKPOINT_INSTRUCTION 0xcc

    #ifdef CONFIG_HOTPLUG_CPU
    static int smp_alt_once;
    @@ -505,37 +506,45 @@ void *text_poke_early(void *addr, const
    * It means the size must be writable atomically and the address must be aligned
    * in a way that permits an atomic write. It also makes sure we fit on a single
    * page.
    + *
    + * It's ok to modify an instruction non-atomically (multiple memory accesses to
    + * a large and/or non aligned instruction) *if and only if* we have inserted a
    + * breakpoint at the beginning of the instruction and we are modifying the rest
    + * of the instruction.
    */
    void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
    {
    unsigned long flags;
    char *vaddr;
    int nr_pages = 2;
    + struct page *pages[2];
    + int i;

    - BUG_ON(len > sizeof(long));
    - BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1))
    - - ((long)addr & ~(sizeof(long) - 1)));
    - if (kernel_text_address((unsigned long)addr)) {
    - struct page *pages[2] = { virt_to_page(addr),
    - virt_to_page(addr + PAGE_SIZE) };
    - if (!pages[1])
    - nr_pages = 1;
    - vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
    - BUG_ON(!vaddr);
    - local_irq_save(flags);
    - memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
    - local_irq_restore(flags);
    - vunmap(vaddr);
    + if (*((uint8_t *)addr - 1) != BREAKPOINT_INSTRUCTION) {
    + BUG_ON(len > sizeof(long));
    + BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1))
    + - ((long)addr & ~(sizeof(long) - 1)));
    + }
    + if (!core_kernel_text((unsigned long)addr)) {
    + pages[0] = vmalloc_to_page(addr);
    + pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
    } else {
    - /*
    - * modules are in vmalloc'ed memory, always writable.
    - */
    - local_irq_save(flags);
    - memcpy(addr, opcode, len);
    - local_irq_restore(flags);
    + pages[0] = virt_to_page(addr);
    + pages[1] = virt_to_page(addr + PAGE_SIZE);
    }
    + BUG_ON(!pages[0]);
    + if (!pages[1])
    + nr_pages = 1;
    + vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
    + BUG_ON(!vaddr);
    + local_irq_save(flags);
    + memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
    + local_irq_restore(flags);
    + vunmap(vaddr);
    sync_core();
    /* Could also do a CLFLUSH here to speed up CPU recovery; but
    that causes hangs on some VIA CPUs. */
    + for (i = 0; i < len; i++)
    + BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
    return addr;
    }

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  9. [RFC patch 21/27] Add __discard section to x86

    Add a __discard sectionto the linker script. Code produced in this section will
    not be put in the vmlinux file. This is useful when we have to calculate the
    size of an instruction before actually declaring it (for alignment purposes for
    instance). This is used by the immediate values.

    Signed-off-by: Mathieu Desnoyers
    Acked-by: H. Peter Anvin
    CC: Andi Kleen
    CC: Chuck Ebbert
    CC: Christoph Hellwig
    CC: Jeremy Fitzhardinge
    CC: Thomas Gleixner
    CC: Ingo Molnar
    ---
    arch/x86/kernel/vmlinux_32.lds.S | 1 +
    arch/x86/kernel/vmlinux_64.lds.S | 1 +
    2 files changed, 2 insertions(+)

    Index: linux-2.6-sched-devel/arch/x86/kernel/vmlinux_32.lds.S
    ================================================== =================
    --- linux-2.6-sched-devel.orig/arch/x86/kernel/vmlinux_32.lds.S 2008-04-16 11:07:19.000000000 -0400
    +++ linux-2.6-sched-devel/arch/x86/kernel/vmlinux_32.lds.S 2008-04-16 11:17:04.000000000 -0400
    @@ -213,6 +213,7 @@ SECTIONS
    /* Sections to be discarded */
    /DISCARD/ : {
    *(.exitcall.exit)
    + *(__discard)
    }

    STABS_DEBUG
    Index: linux-2.6-sched-devel/arch/x86/kernel/vmlinux_64.lds.S
    ================================================== =================
    --- linux-2.6-sched-devel.orig/arch/x86/kernel/vmlinux_64.lds.S 2008-04-16 11:07:19.000000000 -0400
    +++ linux-2.6-sched-devel/arch/x86/kernel/vmlinux_64.lds.S 2008-04-16 11:17:04.000000000 -0400
    @@ -246,6 +246,7 @@ SECTIONS
    /DISCARD/ : {
    *(.exitcall.exit)
    *(.eh_frame)
    + *(__discard)
    }

    STABS_DEBUG

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  10. [RFC patch 16/27] Immediate Values Support init

    Supports placing immediate values in init code

    We need to put the immediate values in RW data section so we can edit them
    before init section unload.

    This code puts NULL pointers in lieu of original pointer referencing init code
    before the init sections are freed, both in the core kernel and in modules.

    TODO : support __exit section.

    Signed-off-by: Mathieu Desnoyers
    CC: Rusty Russell
    CC: "Frank Ch. Eigler"
    ---
    Documentation/immediate.txt | 8 ++++----
    include/asm-generic/vmlinux.lds.h | 8 ++++----
    include/asm-powerpc/immediate.h | 4 ++--
    include/asm-x86/immediate.h | 6 +++---
    include/linux/immediate.h | 7 ++++++-
    include/linux/module.h | 2 +-
    init/main.c | 1 +
    kernel/immediate.c | 31 +++++++++++++++++++++++++++++--
    kernel/module.c | 2 ++
    9 files changed, 52 insertions(+), 17 deletions(-)

    Index: linux-2.6-lttng/kernel/immediate.c
    ================================================== =================
    --- linux-2.6-lttng.orig/kernel/immediate.c 2008-04-16 11:24:03.000000000 -0400
    +++ linux-2.6-lttng/kernel/immediate.c 2008-04-16 11:24:25.000000000 -0400
    @@ -22,6 +22,7 @@
    #include
    #include

    +#include
    #include

    /*
    @@ -30,8 +31,8 @@
    static int imv_early_boot_complete;
    static int wrote_text;

    -extern const struct __imv __start___imv[];
    -extern const struct __imv __stop___imv[];
    +extern struct __imv __start___imv[];
    +extern struct __imv __stop___imv[];

    static int stop_machine_imv_update(void *imv_ptr)
    {
    @@ -118,6 +119,8 @@ void imv_update_range(const struct __imv
    int ret;
    for (iter = begin; iter < end; iter++) {
    mutex_lock(&imv_mutex);
    + if (!iter->imv) /* Skip removed __init immediate values */
    + goto skip;
    ret = apply_imv_update(iter);
    if (imv_early_boot_complete && ret)
    printk(KERN_WARNING
    @@ -126,6 +129,7 @@ void imv_update_range(const struct __imv
    "instruction at %p, size %hu\n",
    (void *)iter->imv,
    (void *)iter->var, iter->size);
    +skip:
    mutex_unlock(&imv_mutex);
    }
    }
    @@ -143,6 +147,29 @@ void core_imv_update(void)
    }
    EXPORT_SYMBOL_GPL(core_imv_update);

    +/**
    + * imv_unref
    + *
    + * Deactivate any immediate value reference pointing into the code region in the
    + * range start to start + size.
    + */
    +void imv_unref(struct __imv *begin, struct __imv *end, void *start,
    + unsigned long size)
    +{
    + struct __imv *iter;
    +
    + for (iter = begin; iter < end; iter++)
    + if (iter->imv >= (unsigned long)start
    + && iter->imv < (unsigned long)start + size)
    + iter->imv = 0UL;
    +}
    +
    +void imv_unref_core_init(void)
    +{
    + imv_unref(__start___imv, __stop___imv, __init_begin,
    + (unsigned long)__init_end - (unsigned long)__init_begin);
    +}
    +
    void __init imv_init_complete(void)
    {
    imv_early_boot_complete = 1;
    Index: linux-2.6-lttng/kernel/module.c
    ================================================== =================
    --- linux-2.6-lttng.orig/kernel/module.c 2008-04-16 11:24:03.000000000 -0400
    +++ linux-2.6-lttng/kernel/module.c 2008-04-16 11:24:25.000000000 -0400
    @@ -2208,6 +2208,8 @@ sys_init_module(void __user *umod,
    /* Drop initial reference. */
    module_put(mod);
    unwind_remove_table(mod->unwind_info, 1);
    + imv_unref(mod->immediate, mod->immediate + mod->num_immediate,
    + mod->module_init, mod->init_size);
    module_free(mod, mod->module_init);
    mod->module_init = NULL;
    mod->init_size = 0;
    Index: linux-2.6-lttng/include/linux/module.h
    ================================================== =================
    --- linux-2.6-lttng.orig/include/linux/module.h 2008-04-16 11:24:03.000000000 -0400
    +++ linux-2.6-lttng/include/linux/module.h 2008-04-16 11:24:25.000000000 -0400
    @@ -357,7 +357,7 @@ struct module
    keeping pointers to this stuff */
    char *args;
    #ifdef CONFIG_IMMEDIATE
    - const struct __imv *immediate;
    + struct __imv *immediate;
    unsigned int num_immediate;
    #endif
    #ifdef CONFIG_MARKERS
    Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
    ================================================== =================
    --- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h 2008-04-16 11:24:03.000000000 -0400
    +++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h 2008-04-16 11:24:25.000000000 -0400
    @@ -52,7 +52,10 @@
    . = ALIGN(8); \
    VMLINUX_SYMBOL(__start___markers) = .; \
    *(__markers) \
    - VMLINUX_SYMBOL(__stop___markers) = .;
    + VMLINUX_SYMBOL(__stop___markers) = .; \
    + VMLINUX_SYMBOL(__start___imv) = .; \
    + *(__imv) /* Immediate values: pointers */ \
    + VMLINUX_SYMBOL(__stop___imv) = .;

    #define RO_DATA(align) \
    . = ALIGN((align)); \
    @@ -61,9 +64,6 @@
    *(.rodata) *(.rodata.*) \
    *(__vermagic) /* Kernel version magic */ \
    *(__markers_strings) /* Markers: strings */ \
    - VMLINUX_SYMBOL(__start___imv) = .; \
    - *(__imv) /* Immediate values: pointers */ \
    - VMLINUX_SYMBOL(__stop___imv) = .; \
    } \
    \
    .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \
    Index: linux-2.6-lttng/include/linux/immediate.h
    ================================================== =================
    --- linux-2.6-lttng.orig/include/linux/immediate.h 2008-04-16 11:24:03.000000000 -0400
    +++ linux-2.6-lttng/include/linux/immediate.h 2008-04-16 11:24:25.000000000 -0400
    @@ -46,6 +46,9 @@ struct __imv {
    extern void core_imv_update(void);
    extern void imv_update_range(const struct __imv *begin,
    const struct __imv *end);
    +extern void imv_unref_core_init(void);
    +extern void imv_unref(struct __imv *begin, struct __imv *end, void *start,
    + unsigned long size);

    #else

    @@ -73,7 +76,9 @@ extern void imv_update_range(const struc

    static inline void core_imv_update(void) { }
    static inline void module_imv_update(void) { }
    -
    +static inline void imv_unref_core_init(void) { }
    +static inline void imv_unref_init(struct __imv *begin, struct __imv *end,
    + void *init, unsigned long init_size) { }
    #endif

    #define DECLARE_IMV(type, name) extern __typeof__(type) name##__imv
    Index: linux-2.6-lttng/init/main.c
    ================================================== =================
    --- linux-2.6-lttng.orig/init/main.c 2008-04-16 11:24:03.000000000 -0400
    +++ linux-2.6-lttng/init/main.c 2008-04-16 11:24:25.000000000 -0400
    @@ -776,6 +776,7 @@ static void run_init_process(char *init_
    */
    static int noinline init_post(void)
    {
    + imv_unref_core_init();
    free_initmem();
    unlock_kernel();
    mark_rodata_ro();
    Index: linux-2.6-lttng/include/asm-x86/immediate.h
    ================================================== =================
    --- linux-2.6-lttng.orig/include/asm-x86/immediate.h 2008-04-16 11:24:03.000000000 -0400
    +++ linux-2.6-lttng/include/asm-x86/immediate.h 2008-04-16 11:24:25.000000000 -0400
    @@ -33,7 +33,7 @@
    BUILD_BUG_ON(sizeof(value) > 8); \
    switch (sizeof(value)) { \
    case 1: \
    - asm(".section __imv,\"a\",@progbits\n\t" \
    + asm(".section __imv,\"aw\",@progbits\n\t" \
    _ASM_PTR "%c1, (3f)-%c2\n\t" \
    ".byte %c2\n\t" \
    ".previous\n\t" \
    @@ -45,7 +45,7 @@
    break; \
    case 2: \
    case 4: \
    - asm(".section __imv,\"a\",@progbits\n\t" \
    + asm(".section __imv,\"aw\",@progbits\n\t" \
    _ASM_PTR "%c1, (3f)-%c2\n\t" \
    ".byte %c2\n\t" \
    ".previous\n\t" \
    @@ -60,7 +60,7 @@
    value = name##__imv; \
    break; \
    } \
    - asm(".section __imv,\"a\",@progbits\n\t" \
    + asm(".section __imv,\"aw\",@progbits\n\t" \
    _ASM_PTR "%c1, (3f)-%c2\n\t" \
    ".byte %c2\n\t" \
    ".previous\n\t" \
    Index: linux-2.6-lttng/include/asm-powerpc/immediate.h
    ================================================== =================
    --- linux-2.6-lttng.orig/include/asm-powerpc/immediate.h 2008-04-16 11:24:03.000000000 -0400
    +++ linux-2.6-lttng/include/asm-powerpc/immediate.h 2008-04-16 11:24:25.000000000 -0400
    @@ -26,7 +26,7 @@
    BUILD_BUG_ON(sizeof(value) > 8); \
    switch (sizeof(value)) { \
    case 1: \
    - asm(".section __imv,\"a\",@progbits\n\t" \
    + asm(".section __imv,\"aw\",@progbits\n\t" \
    PPC_LONG "%c1, ((1f)-1)\n\t" \
    ".byte 1\n\t" \
    ".previous\n\t" \
    @@ -36,7 +36,7 @@
    : "i" (&name##__imv)); \
    break; \
    case 2: \
    - asm(".section __imv,\"a\",@progbits\n\t" \
    + asm(".section __imv,\"aw\",@progbits\n\t" \
    PPC_LONG "%c1, ((1f)-2)\n\t" \
    ".byte 2\n\t" \
    ".previous\n\t" \
    Index: linux-2.6-lttng/Documentation/immediate.txt
    ================================================== =================
    --- linux-2.6-lttng.orig/Documentation/immediate.txt 2008-04-16 11:24:30.000000000 -0400
    +++ linux-2.6-lttng/Documentation/immediate.txt 2008-04-16 11:24:45.000000000 -0400
    @@ -42,10 +42,10 @@ The immediate mechanism supports inserti
    immediate. Immediate values can be put in inline functions, inlined static
    functions, and unrolled loops.

    -If you have to read the immediate values from a function declared as __init or
    -__exit, you should explicitly use _imv_read(), which will fall back on a
    -global variable read. Failing to do so will leave a reference to the __init
    -section after it is freed (it would generate a modpost warning).
    +If you have to read the immediate values from a function declared as __exit, you
    +should explicitly use _imv_read(), which will fall back on a global variable
    +read. Failing to do so will leave a reference to the __exit section in kernel
    +without module unload support. imv_read() in the __init section is supported.

    You can choose to set an initial static value to the immediate by using, for
    instance:

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  11. [RFC patch 10/27] Immediate Values - Architecture Independent Code

    Immediate values are used as read mostly variables that are rarely updated. They
    use code patching to modify the values inscribed in the instruction stream. It
    provides a way to save precious cache lines that would otherwise have to be used
    by these variables.

    There is a generic _imv_read() version, which uses standard global
    variables, and optimized per architecture imv_read() implementations,
    which use a load immediate to remove a data cache hit. When the immediate values
    functionnality is disabled in the kernel, it falls back to global variables.

    It adds a new rodata section "__imv" to place the pointers to the enable
    value. Immediate values activation functions sits in kernel/immediate.c.

    Immediate values refer to the memory address of a previously declared integer.
    This integer holds the information about the state of the immediate values
    associated, and must be accessed through the API found in linux/immediate.h.

    At module load time, each immediate value is checked to see if it must be
    enabled. It would be the case if the variable they refer to is exported from
    another module and already enabled.

    In the early stages of start_kernel(), the immediate values are updated to
    reflect the state of the variable they refer to.

    * Why should this be merged *

    It improves performances on heavy memory I/O workloads.

    An interesting result shows the potential this infrastructure has by
    showing the slowdown a simple system call such as getppid() suffers when it is
    used under heavy user-space cache trashing:

    Random walk L1 and L2 trashing surrounding a getppid() call:
    (note: in this test, do_syscal_trace was taken at each system call, see
    Documentation/immediate.txt in these patches for details)
    - No memory pressure : getppid() takes 1573 cycles
    - With memory pressure : getppid() takes 15589 cycles

    We therefore have a slowdown of 10 times just to get the kernel variables from
    memory. Another test on the same architecture (Intel P4) measured the memory
    latency to be 559 cycles. Therefore, each cache line removed from the hot path
    would improve the syscall time of 3.5% in these conditions.

    Changelog:

    - section __imv is already SHF_ALLOC
    - Because of the wonders of ELF, section 0 has sh_addr and sh_size 0. So
    the if (immediateindex) is unnecessary here.
    - Remove module_mutex usage: depend on functions implemented in module.c for
    that.
    - Does not update tainted module's immediate values.
    - remove imv_*_t types, add DECLARE_IMV() and DEFINE_IMV().
    - imv_read(&var) becomes imv_read(var) because of this.
    - Adding a new EXPORT_IMV_SYMBOL(_GPL).
    - remove imv_if(). Should use if (unlikely(imv_read(var))) instead.
    - Wait until we have gcc support before we add the imv_if macro, since
    its form may have to change.
    - Dont't declare the __imv section in vmlinux.lds.h, just put the content
    in the rodata section.
    - Simplify interface : remove imv_set_early, keep track of kernel boot
    status internally.
    - Remove the ALIGN(8) before the __imv section. It is packed now.
    - Uses an IPI busy-loop on each CPU with interrupts disabled as a simple,
    architecture agnostic, update mechanism.
    - Use imv_* instead of immediate_*.
    - Updating immediate values, cannot rely on smp_call_function() b/c
    synchronizing cpus using IPIs leads to deadlocks. Process A held a read lock
    on tasklist_lock, then process B called apply_imv_update(). Process A received
    the IPI and begins executing ipi_busy_loop(). Then process C takes a write
    lock irq on the task list lock, before receiving the IPI. Thus, process A
    holds up process C, and C can't get an IPI b/c interrupts are disabled. Solve
    this problem by using a new 'ALL_CPUS' parameter to stop_machine_run(). Which
    runs a function on all cpus after they are busy looping and have disabled
    irqs. Since this is done in a new process context, we don't have to worry
    about interrupted spin_locks. Also, less lines of code. Has survived 24 hours+
    of testing...

    Signed-off-by: Mathieu Desnoyers
    Signed-off-by: Jason Baron
    CC: Rusty Russell
    CC: Adrian Bunk
    CC: Andi Kleen
    CC: Christoph Hellwig
    CC: mingo@elte.hu
    CC: akpm@osdl.org
    ---
    include/asm-generic/vmlinux.lds.h | 3
    include/linux/immediate.h | 94 +++++++++++++++++++++++
    include/linux/module.h | 16 ++++
    init/main.c | 8 ++
    kernel/Makefile | 1
    kernel/immediate.c | 149 ++++++++++++++++++++++++++++++++++++++
    kernel/module.c | 50 ++++++++++++
    7 files changed, 320 insertions(+), 1 deletion(-)

    Index: linux-2.6-sched-devel/include/linux/immediate.h
    ================================================== =================
    --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    +++ linux-2.6-sched-devel/include/linux/immediate.h 2008-04-16 11:14:29.000000000 -0400
    @@ -0,0 +1,94 @@
    +#ifndef _LINUX_IMMEDIATE_H
    +#define _LINUX_IMMEDIATE_H
    +
    +/*
    + * Immediate values, can be updated at runtime and save cache lines.
    + *
    + * (C) Copyright 2007 Mathieu Desnoyers
    + *
    + * This file is released under the GPLv2.
    + * See the file COPYING for more details.
    + */
    +
    +#ifdef CONFIG_IMMEDIATE
    +
    +struct __imv {
    + unsigned long var; /* Pointer to the identifier variable of the
    + * immediate value
    + */
    + unsigned long imv; /*
    + * Pointer to the memory location of the
    + * immediate value within the instruction.
    + */
    + unsigned char size; /* Type size. */
    +} __attribute__ ((packed));
    +
    +#include
    +
    +/**
    + * imv_set - set immediate variable (with locking)
    + * @name: immediate value name
    + * @i: required value
    + *
    + * Sets the value of @name, taking the module_mutex if required by
    + * the architecture.
    + */
    +#define imv_set(name, i) \
    + do { \
    + name##__imv = (i); \
    + core_imv_update(); \
    + module_imv_update(); \
    + } while (0)
    +
    +/*
    + * Internal update functions.
    + */
    +extern void core_imv_update(void);
    +extern void imv_update_range(const struct __imv *begin,
    + const struct __imv *end);
    +
    +#else
    +
    +/*
    + * Generic immediate values: a simple, standard, memory load.
    + */
    +
    +/**
    + * imv_read - read immediate variable
    + * @name: immediate value name
    + *
    + * Reads the value of @name.
    + */
    +#define imv_read(name) _imv_read(name)
    +
    +/**
    + * imv_set - set immediate variable (with locking)
    + * @name: immediate value name
    + * @i: required value
    + *
    + * Sets the value of @name, taking the module_mutex if required by
    + * the architecture.
    + */
    +#define imv_set(name, i) (name##__imv = (i))
    +
    +static inline void core_imv_update(void) { }
    +static inline void module_imv_update(void) { }
    +
    +#endif
    +
    +#define DECLARE_IMV(type, name) extern __typeof__(type) name##__imv
    +#define DEFINE_IMV(type, name) __typeof__(type) name##__imv
    +
    +#define EXPORT_IMV_SYMBOL(name) EXPORT_SYMBOL(name##__imv)
    +#define EXPORT_IMV_SYMBOL_GPL(name) EXPORT_SYMBOL_GPL(name##__imv)
    +
    +/**
    + * _imv_read - Read immediate value with standard memory load.
    + * @name: immediate value name
    + *
    + * Force a data read of the immediate value instead of the immediate value
    + * based mechanism. Useful for __init and __exit section data read.
    + */
    +#define _imv_read(name) (name##__imv)
    +
    +#endif
    Index: linux-2.6-sched-devel/include/linux/module.h
    ================================================== =================
    --- linux-2.6-sched-devel.orig/include/linux/module.h 2008-04-16 11:07:24.000000000 -0400
    +++ linux-2.6-sched-devel/include/linux/module.h 2008-04-16 11:14:29.000000000 -0400
    @@ -15,6 +15,7 @@
    #include
    #include
    #include
    +#include
    #include
    #include

    @@ -355,6 +356,10 @@ struct module
    /* The command line arguments (may be mangled). People like
    keeping pointers to this stuff */
    char *args;
    +#ifdef CONFIG_IMMEDIATE
    + const struct __imv *immediate;
    + unsigned int num_immediate;
    +#endif
    #ifdef CONFIG_MARKERS
    struct marker *markers;
    unsigned int num_markers;
    @@ -467,6 +472,9 @@ extern void print_modules(void);

    extern void module_update_markers(void);

    +extern void _module_imv_update(void);
    +extern void module_imv_update(void);
    +
    #else /* !CONFIG_MODULES... */
    #define EXPORT_SYMBOL(sym)
    #define EXPORT_SYMBOL_GPL(sym)
    @@ -571,6 +579,14 @@ static inline void module_update_markers
    {
    }

    +static inline void _module_imv_update(void)
    +{
    +}
    +
    +static inline void module_imv_update(void)
    +{
    +}
    +
    #endif /* CONFIG_MODULES */

    struct device_driver;
    Index: linux-2.6-sched-devel/kernel/module.c
    ================================================== =================
    --- linux-2.6-sched-devel.orig/kernel/module.c 2008-04-16 11:10:44.000000000 -0400
    +++ linux-2.6-sched-devel/kernel/module.c 2008-04-16 11:15:32.000000000 -0400
    @@ -33,6 +33,7 @@
    #include
    #include
    #include
    +#include
    #include
    #include
    #include
    @@ -1716,6 +1717,7 @@ static struct module *load_module(void _
    unsigned int unusedcrcindex;
    unsigned int unusedgplindex;
    unsigned int unusedgplcrcindex;
    + unsigned int immediateindex;
    unsigned int markersindex;
    unsigned int markersstringsindex;
    struct module *mod;
    @@ -1814,6 +1816,7 @@ static struct module *load_module(void _
    #ifdef ARCH_UNWIND_SECTION_NAME
    unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
    #endif
    + immediateindex = find_sec(hdr, sechdrs, secstrings, "__imv");

    /* Don't keep modinfo section */
    sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
    @@ -1972,6 +1975,11 @@ static struct module *load_module(void _
    mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr;
    if (gplfuturecrcindex)
    mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr;
    +#ifdef CONFIG_IMMEDIATE
    + mod->immediate = (void *)sechdrs[immediateindex].sh_addr;
    + mod->num_immediate =
    + sechdrs[immediateindex].sh_size / sizeof(*mod->immediate);
    +#endif

    mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
    if (unusedcrcindex)
    @@ -2039,11 +2047,16 @@ static struct module *load_module(void _

    add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);

    + if (!mod->taints) {
    #ifdef CONFIG_MARKERS
    - if (!mod->taints)
    marker_update_probe_range(mod->markers,
    mod->markers + mod->num_markers);
    #endif
    +#ifdef CONFIG_IMMEDIATE
    + imv_update_range(mod->immediate,
    + mod->immediate + mod->num_immediate);
    +#endif
    + }
    err = module_finalize(hdr, sechdrs, mod);
    if (err < 0)
    goto cleanup;
    @@ -2589,3 +2602,38 @@ void module_update_markers(void)
    mutex_unlock(&module_mutex);
    }
    #endif
    +
    +#ifdef CONFIG_IMMEDIATE
    +/**
    + * _module_imv_update - update all immediate values in the kernel
    + *
    + * Iterate on the kernel core and modules to update the immediate values.
    + * Module_mutex must be held be the caller.
    + */
    +void _module_imv_update(void)
    +{
    + struct module *mod;
    +
    + list_for_each_entry(mod, &modules, list) {
    + if (mod->taints)
    + continue;
    + imv_update_range(mod->immediate,
    + mod->immediate + mod->num_immediate);
    + }
    +}
    +EXPORT_SYMBOL_GPL(_module_imv_update);
    +
    +/**
    + * module_imv_update - update all immediate values in the kernel
    + *
    + * Iterate on the kernel core and modules to update the immediate values.
    + * Takes module_mutex.
    + */
    +void module_imv_update(void)
    +{
    + mutex_lock(&module_mutex);
    + _module_imv_update();
    + mutex_unlock(&module_mutex);
    +}
    +EXPORT_SYMBOL_GPL(module_imv_update);
    +#endif
    Index: linux-2.6-sched-devel/kernel/immediate.c
    ================================================== =================
    --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    +++ linux-2.6-sched-devel/kernel/immediate.c 2008-04-16 11:14:29.000000000 -0400
    @@ -0,0 +1,149 @@
    +/*
    + * Copyright (C) 2007 Mathieu Desnoyers
    + *
    + * This program is free software; you can redistribute it and/or modify
    + * it under the terms of the GNU General Public License as published by
    + * the Free Software Foundation; either version 2 of the License, or
    + * (at your option) any later version.
    + *
    + * This program is distributed in the hope that it will be useful,
    + * but WITHOUT ANY WARRANTY; without even the implied warranty of
    + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    + * GNU General Public License for more details.
    + *
    + * You should have received a copy of the GNU General Public License
    + * along with this program; if not, write to the Free Software
    + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
    + */
    +#include
    +#include
    +#include
    +#include
    +#include
    +#include
    +
    +#include
    +
    +/*
    + * Kernel ready to execute the SMP update that may depend on trap and ipi.
    + */
    +static int imv_early_boot_complete;
    +static int wrote_text;
    +
    +extern const struct __imv __start___imv[];
    +extern const struct __imv __stop___imv[];
    +
    +static int stop_machine_imv_update(void *imv_ptr)
    +{
    + struct __imv *imv = imv_ptr;
    +
    + if (!wrote_text) {
    + text_poke((void *)imv->imv, (void *)imv->var, imv->size);
    + wrote_text = 1;
    + smp_wmb(); /* make sure other cpus see that this has run */
    + } else
    + sync_core();
    +
    + flush_icache_range(imv->imv, imv->imv + imv->size);
    +
    + return 0;
    +}
    +
    +/*
    + * imv_mutex nests inside module_mutex. imv_mutex protects builtin
    + * immediates and module immediates.
    + */
    +static DEFINE_MUTEX(imv_mutex);
    +
    +
    +/**
    + * apply_imv_update - update one immediate value
    + * @imv: pointer of type const struct __imv to update
    + *
    + * Update one immediate value. Must be called with imv_mutex held.
    + * It makes sure all CPUs are not executing the modified code by having them
    + * busy looping with interrupts disabled.
    + * It does _not_ protect against NMI and MCE (could be a problem with Intel's
    + * errata if we use immediate values in their code path).
    + */
    +static int apply_imv_update(const struct __imv *imv)
    +{
    + /*
    + * If the variable and the instruction have the same value, there is
    + * nothing to do.
    + */
    + switch (imv->size) {
    + case 1: if (*(uint8_t *)imv->imv
    + == *(uint8_t *)imv->var)
    + return 0;
    + break;
    + case 2: if (*(uint16_t *)imv->imv
    + == *(uint16_t *)imv->var)
    + return 0;
    + break;
    + case 4: if (*(uint32_t *)imv->imv
    + == *(uint32_t *)imv->var)
    + return 0;
    + break;
    + case 8: if (*(uint64_t *)imv->imv
    + == *(uint64_t *)imv->var)
    + return 0;
    + break;
    + default:return -EINVAL;
    + }
    +
    + if (imv_early_boot_complete) {
    + kernel_text_lock();
    + wrote_text = 0;
    + stop_machine_run(stop_machine_imv_update, (void *)imv,
    + ALL_CPUS);
    + kernel_text_unlock();
    + } else
    + text_poke_early((void *)imv->imv, (void *)imv->var,
    + imv->size);
    + return 0;
    +}
    +
    +/**
    + * imv_update_range - Update immediate values in a range
    + * @begin: pointer to the beginning of the range
    + * @end: pointer to the end of the range
    + *
    + * Updates a range of immediates.
    + */
    +void imv_update_range(const struct __imv *begin,
    + const struct __imv *end)
    +{
    + const struct __imv *iter;
    + int ret;
    + for (iter = begin; iter < end; iter++) {
    + mutex_lock(&imv_mutex);
    + ret = apply_imv_update(iter);
    + if (imv_early_boot_complete && ret)
    + printk(KERN_WARNING
    + "Invalid immediate value. "
    + "Variable at %p, "
    + "instruction at %p, size %hu\n",
    + (void *)iter->imv,
    + (void *)iter->var, iter->size);
    + mutex_unlock(&imv_mutex);
    + }
    +}
    +EXPORT_SYMBOL_GPL(imv_update_range);
    +
    +/**
    + * imv_update - update all immediate values in the kernel
    + *
    + * Iterate on the kernel core and modules to update the immediate values.
    + */
    +void core_imv_update(void)
    +{
    + /* Core kernel imvs */
    + imv_update_range(__start___imv, __stop___imv);
    +}
    +EXPORT_SYMBOL_GPL(core_imv_update);
    +
    +void __init imv_init_complete(void)
    +{
    + imv_early_boot_complete = 1;
    +}
    Index: linux-2.6-sched-devel/init/main.c
    ================================================== =================
    --- linux-2.6-sched-devel.orig/init/main.c 2008-04-16 11:07:24.000000000 -0400
    +++ linux-2.6-sched-devel/init/main.c 2008-04-16 11:15:51.000000000 -0400
    @@ -60,6 +60,7 @@
    #include
    #include
    #include
    +#include

    #include
    #include
    @@ -103,6 +104,11 @@ static inline void mark_rodata_ro(void)
    #ifdef CONFIG_TC
    extern void tc_init(void);
    #endif
    +#ifdef CONFIG_IMMEDIATE
    +extern void imv_init_complete(void);
    +#else
    +static inline void imv_init_complete(void) { }
    +#endif

    enum system_states system_state;
    EXPORT_SYMBOL(system_state);
    @@ -547,6 +553,7 @@ asmlinkage void __init start_kernel(void
    boot_init_stack_canary();

    cgroup_init_early();
    + core_imv_update();

    local_irq_disable();
    early_boot_irqs_off();
    @@ -671,6 +678,7 @@ asmlinkage void __init start_kernel(void
    cpuset_init();
    taskstats_init_early();
    delayacct_init();
    + imv_init_complete();

    check_bugs();

    Index: linux-2.6-sched-devel/kernel/Makefile
    ================================================== =================
    --- linux-2.6-sched-devel.orig/kernel/Makefile 2008-04-16 11:07:24.000000000 -0400
    +++ linux-2.6-sched-devel/kernel/Makefile 2008-04-16 11:14:29.000000000 -0400
    @@ -75,6 +75,7 @@ obj-$(CONFIG_RELAY) += relay.o
    obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
    obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
    obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
    +obj-$(CONFIG_IMMEDIATE) += immediate.o
    obj-$(CONFIG_MARKERS) += marker.o
    obj-$(CONFIG_LATENCYTOP) += latencytop.o
    obj-$(CONFIG_FTRACE) += trace/
    Index: linux-2.6-sched-devel/include/asm-generic/vmlinux.lds.h
    ================================================== =================
    --- linux-2.6-sched-devel.orig/include/asm-generic/vmlinux.lds.h 2008-04-16 11:07:23.000000000 -0400
    +++ linux-2.6-sched-devel/include/asm-generic/vmlinux.lds.h 2008-04-16 11:14:29.000000000 -0400
    @@ -61,6 +61,9 @@
    *(.rodata) *(.rodata.*) \
    *(__vermagic) /* Kernel version magic */ \
    *(__markers_strings) /* Markers: strings */ \
    + VMLINUX_SYMBOL(__start___imv) = .; \
    + *(__imv) /* Immediate values: pointers */ \
    + VMLINUX_SYMBOL(__stop___imv) = .; \
    } \
    \
    .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  12. [RFC patch 04/27] Kprobes - use a mutex to protect the instruction pages list.

    Protect the instruction pages list by a specific insn pages mutex, called in
    get_insn_slot() and free_insn_slot(). It makes sure that architectures that does
    not need to call arch_remove_kprobe() does not take an unneeded kprobes mutex.

    Signed-off-by: Mathieu Desnoyers
    Acked-by: Ananth N Mavinakayanahalli
    Acked-by: Masami Hiramatsu
    CC: hch@infradead.org
    CC: anil.s.keshavamurthy@intel.com
    CC: davem@davemloft.net
    ---
    kernel/kprobes.c | 27 +++++++++++++++++++++------
    1 file changed, 21 insertions(+), 6 deletions(-)

    Index: linux-2.6-lttng/kernel/kprobes.c
    ================================================== =================
    --- linux-2.6-lttng.orig/kernel/kprobes.c 2007-08-27 11:48:56.000000000 -0400
    +++ linux-2.6-lttng/kernel/kprobes.c 2007-08-27 11:48:58.000000000 -0400
    @@ -95,6 +95,10 @@ enum kprobe_slot_state {
    SLOT_USED = 2,
    };

    +/*
    + * Protects the kprobe_insn_pages list. Can nest into kprobe_mutex.
    + */
    +static DEFINE_MUTEX(kprobe_insn_mutex);
    static struct hlist_head kprobe_insn_pages;
    static int kprobe_garbage_slots;
    static int collect_garbage_slots(void);
    @@ -131,7 +135,9 @@ kprobe_opcode_t __kprobes *get_insn_slot
    {
    struct kprobe_insn_page *kip;
    struct hlist_node *pos;
    + kprobe_opcode_t *ret;

    + mutex_lock(&kprobe_insn_mutex);
    retry:
    hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) {
    if (kip->nused < INSNS_PER_PAGE) {
    @@ -140,7 +146,8 @@ kprobe_opcode_t __kprobes *get_insn_slot
    if (kip->slot_used[i] == SLOT_CLEAN) {
    kip->slot_used[i] = SLOT_USED;
    kip->nused++;
    - return kip->insns + (i * MAX_INSN_SIZE);
    + ret = kip->insns + (i * MAX_INSN_SIZE);
    + goto end;
    }
    }
    /* Surprise! No unused slots. Fix kip->nused. */
    @@ -154,8 +161,10 @@ kprobe_opcode_t __kprobes *get_insn_slot
    }
    /* All out of space. Need to allocate a new page. Use slot 0. */
    kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
    - if (!kip)
    - return NULL;
    + if (!kip) {
    + ret = NULL;
    + goto end;
    + }

    /*
    * Use module_alloc so this page is within +/- 2GB of where the
    @@ -165,7 +174,8 @@ kprobe_opcode_t __kprobes *get_insn_slot
    kip->insns = module_alloc(PAGE_SIZE);
    if (!kip->insns) {
    kfree(kip);
    - return NULL;
    + ret = NULL;
    + goto end;
    }
    INIT_HLIST_NODE(&kip->hlist);
    hlist_add_head(&kip->hlist, &kprobe_insn_pages);
    @@ -173,7 +183,10 @@ kprobe_opcode_t __kprobes *get_insn_slot
    kip->slot_used[0] = SLOT_USED;
    kip->nused = 1;
    kip->ngarbage = 0;
    - return kip->insns;
    + ret = kip->insns;
    +end:
    + mutex_unlock(&kprobe_insn_mutex);
    + return ret;
    }

    /* Return 1 if all garbages are collected, otherwise 0. */
    @@ -207,7 +220,7 @@ static int __kprobes collect_garbage_slo
    struct kprobe_insn_page *kip;
    struct hlist_node *pos, *next;

    - /* Ensure no-one is preepmted on the garbages */
    + /* Ensure no-one is preempted on the garbages */
    if (check_safety() != 0)
    return -EAGAIN;

    @@ -231,6 +244,7 @@ void __kprobes free_insn_slot(kprobe_opc
    struct kprobe_insn_page *kip;
    struct hlist_node *pos;

    + mutex_lock(&kprobe_insn_mutex);
    hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) {
    if (kip->insns <= slot &&
    slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
    @@ -247,6 +261,7 @@ void __kprobes free_insn_slot(kprobe_opc

    if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE)
    collect_garbage_slots();
    + mutex_unlock(&kprobe_insn_mutex);
    }
    #endif


    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  13. [RFC patch 12/27] Immediate Values - x86 Optimization

    x86 optimization of the immediate values which uses a movl with code patching
    to set/unset the value used to populate the register used as variable source.

    Note : a movb needs to get its value froma =q constraint.

    Quoting "H. Peter Anvin"

    Using =r for single-byte values is incorrect for 32-bit code -- that would
    permit %spl, %bpl, %sil, %dil which are illegal in 32-bit mode.

    Changelog:
    - Use text_poke_early with cr0 WP save/restore to patch the bypass. We are doing
    non atomic writes to a code region only touched by us (nobody can execute it
    since we are protected by the imv_mutex).
    - Put imv_set and _imv_set in the architecture independent header.
    - Use $0 instead of %2 with (0) operand.
    - Add x86_64 support, ready for i386+x86_64 -> x86 merge.
    - Use asm-x86/asm.h.
    - Bugfix : 8 bytes 64 bits immediate value was declared as "4 bytes" in the
    immediate structure.
    - Change the immediate.c update code to support variable length opcodes.
    - Vastly simplified, using a busy looping IPI with interrupts disabled.
    Does not protect against NMI nor MCE.
    - Pack the __imv section. Use smallest types required for size (char).
    - Use imv_* instead of immediate_*.

    Signed-off-by: Mathieu Desnoyers
    CC: Andi Kleen
    CC: "H. Peter Anvin"
    CC: Chuck Ebbert
    CC: Christoph Hellwig
    CC: Jeremy Fitzhardinge
    CC: Thomas Gleixner
    CC: Ingo Molnar
    CC: Rusty Russell
    CC: Adrian Bunk
    CC: akpm@osdl.org
    ---
    arch/x86/Kconfig | 1
    include/asm-x86/immediate.h | 77 ++++++++++++++++++++++++++++++++++++++++++++
    2 files changed, 78 insertions(+)

    Index: linux-2.6-sched-devel/include/asm-x86/immediate.h
    ================================================== =================
    --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    +++ linux-2.6-sched-devel/include/asm-x86/immediate.h 2008-04-16 11:16:32.000000000 -0400
    @@ -0,0 +1,77 @@
    +#ifndef _ASM_X86_IMMEDIATE_H
    +#define _ASM_X86_IMMEDIATE_H
    +
    +/*
    + * Immediate values. x86 architecture optimizations.
    + *
    + * (C) Copyright 2006 Mathieu Desnoyers
    + *
    + * This file is released under the GPLv2.
    + * See the file COPYING for more details.
    + */
    +
    +#include
    +
    +/**
    + * imv_read - read immediate variable
    + * @name: immediate value name
    + *
    + * Reads the value of @name.
    + * Optimized version of the immediate.
    + * Do not use in __init and __exit functions. Use _imv_read() instead.
    + * If size is bigger than the architecture long size, fall back on a memory
    + * read.
    + *
    + * Make sure to populate the initial static 64 bits opcode with a value
    + * what will generate an instruction with 8 bytes immediate value (not the REX.W
    + * prefixed one that loads a sign extended 32 bits immediate value in a r64
    + * register).
    + */
    +#define imv_read(name) \
    + ({ \
    + __typeof__(name##__imv) value; \
    + BUILD_BUG_ON(sizeof(value) > 8); \
    + switch (sizeof(value)) { \
    + case 1: \
    + asm(".section __imv,\"a\",@progbits\n\t" \
    + _ASM_PTR "%c1, (3f)-%c2\n\t" \
    + ".byte %c2\n\t" \
    + ".previous\n\t" \
    + "mov $0,%0\n\t" \
    + "3:\n\t" \
    + : "=q" (value) \
    + : "i" (&name##__imv), \
    + "i" (sizeof(value))); \
    + break; \
    + case 2: \
    + case 4: \
    + asm(".section __imv,\"a\",@progbits\n\t" \
    + _ASM_PTR "%c1, (3f)-%c2\n\t" \
    + ".byte %c2\n\t" \
    + ".previous\n\t" \
    + "mov $0,%0\n\t" \
    + "3:\n\t" \
    + : "=r" (value) \
    + : "i" (&name##__imv), \
    + "i" (sizeof(value))); \
    + break; \
    + case 8: \
    + if (sizeof(long) < 8) { \
    + value = name##__imv; \
    + break; \
    + } \
    + asm(".section __imv,\"a\",@progbits\n\t" \
    + _ASM_PTR "%c1, (3f)-%c2\n\t" \
    + ".byte %c2\n\t" \
    + ".previous\n\t" \
    + "mov $0xFEFEFEFE01010101,%0\n\t" \
    + "3:\n\t" \
    + : "=r" (value) \
    + : "i" (&name##__imv), \
    + "i" (sizeof(value))); \
    + break; \
    + }; \
    + value; \
    + })
    +
    +#endif /* _ASM_X86_IMMEDIATE_H */
    Index: linux-2.6-sched-devel/arch/x86/Kconfig
    ================================================== =================
    --- linux-2.6-sched-devel.orig/arch/x86/Kconfig 2008-04-16 11:07:19.000000000 -0400
    +++ linux-2.6-sched-devel/arch/x86/Kconfig 2008-04-16 11:16:50.000000000 -0400
    @@ -25,6 +25,7 @@ config X86
    select HAVE_KRETPROBES
    select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
    select HAVE_ARCH_KGDB
    + select HAVE_IMMEDIATE


    config GENERIC_LOCKBREAK

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  14. [RFC patch 14/27] Immediate Values - Powerpc Optimization

    PowerPC optimization of the immediate values which uses a li instruction,
    patched with an immediate value.

    Changelog:
    - Put imv_set and _imv_set in the architecture independent header.
    - Pack the __imv section. Use smallest types required for size (char).
    - Remove architecture specific update code : now handled by architecture
    agnostic code.
    - Use imv_* instead of immediate_*.

    Signed-off-by: Mathieu Desnoyers
    CC: Rusty Russell
    CC: Christoph Hellwig
    CC: Paul Mackerras
    CC: Adrian Bunk
    CC: Andi Kleen
    CC: mingo@elte.hu
    CC: akpm@osdl.org
    ---
    arch/powerpc/Kconfig | 1
    include/asm-powerpc/immediate.h | 55 ++++++++++++++++++++++++++++++++++++++++
    2 files changed, 56 insertions(+)

    Index: linux-2.6-lttng/include/asm-powerpc/immediate.h
    ================================================== =================
    --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    +++ linux-2.6-lttng/include/asm-powerpc/immediate.h 2008-03-06 08:55:54.000000000 -0500
    @@ -0,0 +1,55 @@
    +#ifndef _ASM_POWERPC_IMMEDIATE_H
    +#define _ASM_POWERPC_IMMEDIATE_H
    +
    +/*
    + * Immediate values. PowerPC architecture optimizations.
    + *
    + * (C) Copyright 2006 Mathieu Desnoyers
    + *
    + * This file is released under the GPLv2.
    + * See the file COPYING for more details.
    + */
    +
    +#include
    +
    +/**
    + * imv_read - read immediate variable
    + * @name: immediate value name
    + *
    + * Reads the value of @name.
    + * Optimized version of the immediate.
    + * Do not use in __init and __exit functions. Use _imv_read() instead.
    + */
    +#define imv_read(name) \
    + ({ \
    + __typeof__(name##__imv) value; \
    + BUILD_BUG_ON(sizeof(value) > 8); \
    + switch (sizeof(value)) { \
    + case 1: \
    + asm(".section __imv,\"a\",@progbits\n\t" \
    + PPC_LONG "%c1, ((1f)-1)\n\t" \
    + ".byte 1\n\t" \
    + ".previous\n\t" \
    + "li %0,0\n\t" \
    + "1:\n\t" \
    + : "=r" (value) \
    + : "i" (&name##__imv)); \
    + break; \
    + case 2: \
    + asm(".section __imv,\"a\",@progbits\n\t" \
    + PPC_LONG "%c1, ((1f)-2)\n\t" \
    + ".byte 2\n\t" \
    + ".previous\n\t" \
    + "li %0,0\n\t" \
    + "1:\n\t" \
    + : "=r" (value) \
    + : "i" (&name##__imv)); \
    + break; \
    + case 4: \
    + case 8: value = name##__imv; \
    + break; \
    + }; \
    + value; \
    + })
    +
    +#endif /* _ASM_POWERPC_IMMEDIATE_H */
    Index: linux-2.6-lttng/arch/powerpc/Kconfig
    ================================================== =================
    --- linux-2.6-lttng.orig/arch/powerpc/Kconfig 2008-03-06 08:45:31.000000000 -0500
    +++ linux-2.6-lttng/arch/powerpc/Kconfig 2008-03-06 08:56:14.000000000 -0500
    @@ -91,6 +91,7 @@ config PPC
    select HAVE_OPROFILE
    select HAVE_KPROBES
    select HAVE_KRETPROBES
    + select HAVE_IMMEDIATE

    config EARLY_PRINTK
    bool

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  15. [RFC patch 08/27] Text Edit Lock - kprobes architecture independent support

    Use the mutual exclusion provided by the text edit lock in the kprobes code. It
    allows coherent manipulation of the kernel code by other subsystems.

    Changelog:

    Move the kernel_text_lock/unlock out of the for loops.

    Signed-off-by: Mathieu Desnoyers
    Acked-by: Ananth N Mavinakayanahalli
    CC: ananth@in.ibm.com
    CC: anil.s.keshavamurthy@intel.com
    CC: davem@davemloft.net
    CC: Roel Kluin <12o3l@tiscali.nl>
    ---
    kernel/kprobes.c | 19 +++++++++++++------
    1 file changed, 13 insertions(+), 6 deletions(-)

    Index: linux-2.6-lttng/kernel/kprobes.c
    ================================================== =================
    --- linux-2.6-lttng.orig/kernel/kprobes.c 2008-04-09 10:52:51.000000000 -0400
    +++ linux-2.6-lttng/kernel/kprobes.c 2008-04-09 10:52:57.000000000 -0400
    @@ -43,6 +43,7 @@
    #include
    #include
    #include
    +#include

    #include
    #include
    @@ -577,9 +578,10 @@ static int __kprobes __register_kprobe(s
    goto out;
    }

    + kernel_text_lock();
    ret = arch_prepare_kprobe(p);
    if (ret)
    - goto out;
    + goto out_unlock_text;

    INIT_HLIST_NODE(&p->hlist);
    hlist_add_head_rcu(&p->hlist,
    @@ -587,7 +589,8 @@ static int __kprobes __register_kprobe(s

    if (kprobe_enabled)
    arch_arm_kprobe(p);
    -
    +out_unlock_text:
    + kernel_text_unlock();
    out:
    mutex_unlock(&kprobe_mutex);

    @@ -630,8 +633,11 @@ valid_p:
    * enabled - otherwise, the breakpoint would already have
    * been removed. We save on flushing icache.
    */
    - if (kprobe_enabled)
    + if (kprobe_enabled) {
    + kernel_text_lock();
    arch_disarm_kprobe(p);
    + kernel_text_unlock();
    + }
    hlist_del_rcu(&old_p->hlist);
    cleanup_p = 1;
    } else {
    @@ -729,7 +735,6 @@ static int __kprobes pre_handler_kretpro
    }

    arch_prepare_kretprobe(ri, regs);
    -
    /* XXX(hch): why is there no hlist_move_head? */
    hlist_del(&ri->uflist);
    hlist_add_head(&ri->uflist, &ri->rp->used_instances);
    @@ -951,11 +956,13 @@ static void __kprobes enable_all_kprobes
    if (kprobe_enabled)
    goto already_enabled;

    + kernel_text_lock();
    for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
    head = &kprobe_table[i];
    hlist_for_each_entry_rcu(p, node, head, hlist)
    arch_arm_kprobe(p);
    }
    + kernel_text_unlock();

    kprobe_enabled = true;
    printk(KERN_INFO "Kprobes globally enabled\n");
    @@ -980,6 +987,7 @@ static void __kprobes disable_all_kprobe

    kprobe_enabled = false;
    printk(KERN_INFO "Kprobes globally disabled\n");
    + kernel_text_lock();
    for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
    head = &kprobe_table[i];
    hlist_for_each_entry_rcu(p, node, head, hlist) {
    @@ -987,6 +995,7 @@ static void __kprobes disable_all_kprobe
    arch_disarm_kprobe(p);
    }
    }
    + kernel_text_unlock();

    mutex_unlock(&kprobe_mutex);
    /* Allow all currently running kprobes to complete */

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  16. [RFC patch 23/27] Immediate Values - Powerpc Optimization NMI MCE support

    Use an atomic update for immediate values.

    Signed-off-by: Mathieu Desnoyers
    CC: Rusty Russell
    CC: Christoph Hellwig
    CC: Paul Mackerras
    ---
    arch/powerpc/kernel/Makefile | 1
    arch/powerpc/kernel/immediate.c | 73 ++++++++++++++++++++++++++++++++++++++++
    include/asm-powerpc/immediate.h | 18 +++++++++
    3 files changed, 92 insertions(+)

    Index: linux-2.6-lttng/arch/powerpc/kernel/immediate.c
    ================================================== =================
    --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    +++ linux-2.6-lttng/arch/powerpc/kernel/immediate.c 2008-03-03 10:23:54.000000000 -0500
    @@ -0,0 +1,73 @@
    +/*
    + * Powerpc optimized immediate values enabling/disabling.
    + *
    + * Mathieu Desnoyers
    + */
    +
    +#include
    +#include
    +#include
    +#include
    +#include
    +#include
    +
    +#define LI_OPCODE_LEN 2
    +
    +/**
    + * arch_imv_update - update one immediate value
    + * @imv: pointer of type const struct __imv to update
    + * @early: early boot (1), normal (0)
    + *
    + * Update one immediate value. Must be called with imv_mutex held.
    + */
    +int arch_imv_update(const struct __imv *imv, int early)
    +{
    +#ifdef CONFIG_KPROBES
    + kprobe_opcode_t *insn;
    + /*
    + * Fail if a kprobe has been set on this instruction.
    + * (TODO: we could eventually do better and modify all the (possibly
    + * nested) kprobes for this site if kprobes had an API for this.
    + */
    + switch (imv->size) {
    + case 1: /* The uint8_t points to the 3rd byte of the
    + * instruction */
    + insn = (void *)(imv->imv - 1 - LI_OPCODE_LEN);
    + break;
    + case 2: insn = (void *)(imv->imv - LI_OPCODE_LEN);
    + break;
    + default:
    + return -EINVAL;
    + }
    +
    + if (unlikely(!early && *insn == BREAKPOINT_INSTRUCTION)) {
    + printk(KERN_WARNING "Immediate value in conflict with kprobe. "
    + "Variable at %p, "
    + "instruction at %p, size %lu\n",
    + (void *)imv->imv,
    + (void *)imv->var, imv->size);
    + return -EBUSY;
    + }
    +#endif
    +
    + /*
    + * If the variable and the instruction have the same value, there is
    + * nothing to do.
    + */
    + switch (imv->size) {
    + case 1: if (*(uint8_t *)imv->imv
    + == *(uint8_t *)imv->var)
    + return 0;
    + break;
    + case 2: if (*(uint16_t *)imv->imv
    + == *(uint16_t *)imv->var)
    + return 0;
    + break;
    + default:return -EINVAL;
    + }
    + memcpy((void *)imv->imv, (void *)imv->var,
    + imv->size);
    + flush_icache_range(imv->imv,
    + imv->imv + imv->size);
    + return 0;
    +}
    Index: linux-2.6-lttng/include/asm-powerpc/immediate.h
    ================================================== =================
    --- linux-2.6-lttng.orig/include/asm-powerpc/immediate.h 2008-03-03 10:23:54.000000000 -0500
    +++ linux-2.6-lttng/include/asm-powerpc/immediate.h 2008-03-03 10:23:54.000000000 -0500
    @@ -12,6 +12,16 @@

    #include

    +struct __imv {
    + unsigned long var; /* Identifier variable of the immediate value */
    + unsigned long imv; /*
    + * Pointer to the memory location that holds
    + * the immediate value within the load immediate
    + * instruction.
    + */
    + unsigned char size; /* Type size. */
    +} __attribute__ ((packed));
    +
    /**
    * imv_read - read immediate variable
    * @name: immediate value name
    @@ -19,6 +29,11 @@
    * Reads the value of @name.
    * Optimized version of the immediate.
    * Do not use in __init and __exit functions. Use _imv_read() instead.
    + * Makes sure the 2 bytes update will be atomic by aligning the immediate
    + * value. Use a normal memory read for the 4 bytes immediate because there is no
    + * way to atomically update it without using a seqlock read side, which would
    + * cost more in term of total i-cache and d-cache space than a simple memory
    + * read.
    */
    #define imv_read(name) \
    ({ \
    @@ -40,6 +55,7 @@
    PPC_LONG "%c1, ((1f)-2)\n\t" \
    ".byte 2\n\t" \
    ".previous\n\t" \
    + ".align 2\n\t" \
    "li %0,0\n\t" \
    "1:\n\t" \
    : "=r" (value) \
    @@ -52,4 +68,6 @@
    value; \
    })

    +extern int arch_imv_update(const struct __imv *imv, int early);
    +
    #endif /* _ASM_POWERPC_IMMEDIATE_H */
    Index: linux-2.6-lttng/arch/powerpc/kernel/Makefile
    ================================================== =================
    --- linux-2.6-lttng.orig/arch/powerpc/kernel/Makefile 2008-03-03 09:51:27.000000000 -0500
    +++ linux-2.6-lttng/arch/powerpc/kernel/Makefile 2008-03-03 10:23:54.000000000 -0500
    @@ -45,6 +45,7 @@ obj-$(CONFIG_HIBERNATION) += swsusp.o su
    obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o
    obj-$(CONFIG_MODULES) += module_$(CONFIG_WORD_SIZE).o
    obj-$(CONFIG_44x) += cpu_setup_44x.o
    +obj-$(CONFIG_IMMEDIATE) += immediate.o

    ifeq ($(CONFIG_PPC_MERGE),y)


    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  17. [RFC patch 01/27] From: Adrian Bunk <bunk@kernel.org>

    With the needlessly global marker_debug being static gcc can optimize the
    unused code away.

    Signed-off-by: Adrian Bunk
    Acked-by: Mathieu Desnoyers
    Signed-off-by: Andrew Morton
    ---

    kernel/marker.c | 2 +-
    1 file changed, 1 insertion(+), 1 deletion(-)

    diff -puN kernel/marker.c~make-marker_debug-static kernel/marker.c
    --- a/kernel/marker.c~make-marker_debug-static
    +++ a/kernel/marker.c
    @@ -28,7 +28,7 @@ extern struct marker __start___markers[]
    extern struct marker __stop___markers[];

    /* Set to 1 to enable marker debug output */
    -const int marker_debug;
    +static const int marker_debug;

    /*
    * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
    _

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  18. [RFC patch 17/27] Scheduler Profiling - Use Immediate Values

    Use immediate values with lower d-cache hit in optimized version as a
    condition for scheduler profiling call.

    Changelog :
    - Use imv_* instead of immediate_*.
    - Follow the white rabbit : kvm_main.c which becomes x86.c.

    Signed-off-by: Mathieu Desnoyers
    CC: Rusty Russell
    CC: Adrian Bunk
    CC: Andi Kleen
    CC: Christoph Hellwig
    CC: mingo@elte.hu
    CC: akpm@osdl.org
    ---
    arch/x86/kvm/x86.c | 2 +-
    include/linux/profile.h | 5 +++--
    kernel/profile.c | 22 +++++++++++-----------
    kernel/sched_fair.c | 5 +----
    4 files changed, 16 insertions(+), 18 deletions(-)

    Index: linux-2.6-sched-devel/kernel/profile.c
    ================================================== =================
    --- linux-2.6-sched-devel.orig/kernel/profile.c 2008-04-16 11:07:24.000000000 -0400
    +++ linux-2.6-sched-devel/kernel/profile.c 2008-04-16 11:17:00.000000000 -0400
    @@ -41,8 +41,8 @@ static int (*timer_hook)(struct pt_regs
    static atomic_t *prof_buffer;
    static unsigned long prof_len, prof_shift;

    -int prof_on __read_mostly;
    -EXPORT_SYMBOL_GPL(prof_on);
    +DEFINE_IMV(char, prof_on) __read_mostly;
    +EXPORT_IMV_SYMBOL_GPL(prof_on);

    static cpumask_t prof_cpu_mask = CPU_MASK_ALL;
    #ifdef CONFIG_SMP
    @@ -60,7 +60,7 @@ static int __init profile_setup(char *st

    if (!strncmp(str, sleepstr, strlen(sleepstr))) {
    #ifdef CONFIG_SCHEDSTATS
    - prof_on = SLEEP_PROFILING;
    + imv_set(prof_on, SLEEP_PROFILING);
    if (str[strlen(sleepstr)] == ',')
    str += strlen(sleepstr) + 1;
    if (get_option(&str, &par))
    @@ -73,7 +73,7 @@ static int __init profile_setup(char *st
    "kernel sleep profiling requires CONFIG_SCHEDSTATS\n");
    #endif /* CONFIG_SCHEDSTATS */
    } else if (!strncmp(str, schedstr, strlen(schedstr))) {
    - prof_on = SCHED_PROFILING;
    + imv_set(prof_on, SCHED_PROFILING);
    if (str[strlen(schedstr)] == ',')
    str += strlen(schedstr) + 1;
    if (get_option(&str, &par))
    @@ -82,7 +82,7 @@ static int __init profile_setup(char *st
    "kernel schedule profiling enabled (shift: %ld)\n",
    prof_shift);
    } else if (!strncmp(str, kvmstr, strlen(kvmstr))) {
    - prof_on = KVM_PROFILING;
    + imv_set(prof_on, KVM_PROFILING);
    if (str[strlen(kvmstr)] == ',')
    str += strlen(kvmstr) + 1;
    if (get_option(&str, &par))
    @@ -92,7 +92,7 @@ static int __init profile_setup(char *st
    prof_shift);
    } else if (get_option(&str, &par)) {
    prof_shift = par;
    - prof_on = CPU_PROFILING;
    + imv_set(prof_on, CPU_PROFILING);
    printk(KERN_INFO "kernel profiling enabled (shift: %ld)\n",
    prof_shift);
    }
    @@ -103,7 +103,7 @@ __setup("profile=", profile_setup);

    void __init profile_init(void)
    {
    - if (!prof_on)
    + if (!_imv_read(prof_on))
    return;

    /* only text is profiled */
    @@ -290,7 +290,7 @@ void profile_hits(int type, void *__pc,
    int i, j, cpu;
    struct profile_hit *hits;

    - if (prof_on != type || !prof_buffer)
    + if (!prof_buffer)
    return;
    pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1);
    i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
    @@ -400,7 +400,7 @@ void profile_hits(int type, void *__pc,
    {
    unsigned long pc;

    - if (prof_on != type || !prof_buffer)
    + if (!prof_buffer)
    return;
    pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift;
    atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]);
    @@ -557,7 +557,7 @@ static int __init create_hash_tables(voi
    }
    return 0;
    out_cleanup:
    - prof_on = 0;
    + imv_set(prof_on, 0);
    smp_mb();
    on_each_cpu(profile_nop, NULL, 0, 1);
    for_each_online_cpu(cpu) {
    @@ -584,7 +584,7 @@ static int __init create_proc_profile(vo
    {
    struct proc_dir_entry *entry;

    - if (!prof_on)
    + if (!_imv_read(prof_on))
    return 0;
    if (create_hash_tables())
    return -1;
    Index: linux-2.6-sched-devel/include/linux/profile.h
    ================================================== =================
    --- linux-2.6-sched-devel.orig/include/linux/profile.h 2008-04-16 11:07:24.000000000 -0400
    +++ linux-2.6-sched-devel/include/linux/profile.h 2008-04-16 11:17:00.000000000 -0400
    @@ -7,10 +7,11 @@
    #include
    #include
    #include
    +#include

    #include

    -extern int prof_on __read_mostly;
    +DECLARE_IMV(char, prof_on) __read_mostly;

    #define CPU_PROFILING 1
    #define SCHED_PROFILING 2
    @@ -38,7 +39,7 @@ static inline void profile_hit(int type,
    /*
    * Speedup for the common (no profiling enabled) case:
    */
    - if (unlikely(prof_on == type))
    + if (unlikely(imv_read(prof_on) == type))
    profile_hits(type, ip, 1);
    }

    Index: linux-2.6-sched-devel/kernel/sched_fair.c
    ================================================== =================
    --- linux-2.6-sched-devel.orig/kernel/sched_fair.c 2008-04-16 11:07:24.000000000 -0400
    +++ linux-2.6-sched-devel/kernel/sched_fair.c 2008-04-16 11:17:00.000000000 -0400
    @@ -455,11 +455,8 @@ static void enqueue_sleeper(struct cfs_r
    * get a milliseconds-range estimation of the amount of
    * time that the task spent sleeping:
    */
    - if (unlikely(prof_on == SLEEP_PROFILING)) {
    -
    - profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk),
    + profile_hits(SLEEP_PROFILING, (void *)get_wchan(task_of(se)),
    delta >> 20);
    - }
    account_scheduler_latency(tsk, delta >> 10, 0);
    }
    #endif
    Index: linux-2.6-sched-devel/arch/x86/kvm/x86.c
    ================================================== =================
    --- linux-2.6-sched-devel.orig/arch/x86/kvm/x86.c 2008-04-16 11:07:19.000000000 -0400
    +++ linux-2.6-sched-devel/arch/x86/kvm/x86.c 2008-04-16 11:17:00.000000000 -0400
    @@ -2604,7 +2604,7 @@ again:
    /*
    * Profile KVM exit RIPs:
    */
    - if (unlikely(prof_on == KVM_PROFILING)) {
    + if (unlikely(imv_read(prof_on) == KVM_PROFILING)) {
    kvm_x86_ops->cache_regs(vcpu);
    profile_hit(KVM_PROFILING, (void *)vcpu->arch.rip);
    }

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  19. [RFC patch 11/27] Immediate Values - Kconfig menu in EMBEDDED

    Immediate values provide a way to use dynamic code patching to update variables
    sitting within the instruction stream. It saves caches lines normally used by
    static read mostly variables. Enable it by default, but let users disable it
    through the EMBEDDED menu with the "Disable immediate values" submenu entry.

    Note: Since I think that I really should let embedded systems developers using
    RO memory the option to disable the immediate values, I choose to leave this
    menu option there, in the EMBEDDED menu. Also, the "CONFIG_IMMEDIATE" makes
    sense because we want to compile out all the immediate code when we decide not
    to use optimized immediate values at all (it removes otherwise unused code).

    Changelog:
    - Change ARCH_SUPPORTS_IMMEDIATE for HAS_IMMEDIATE
    - Turn DISABLE_IMMEDIATE into positive logic

    Signed-off-by: Mathieu Desnoyers
    CC: Rusty Russell
    CC: Adrian Bunk
    CC: Andi Kleen
    CC: Christoph Hellwig
    CC: mingo@elte.hu
    CC: akpm@osdl.org
    ---
    init/Kconfig | 18 ++++++++++++++++++
    1 file changed, 18 insertions(+)

    Index: linux-2.6-lttng/init/Kconfig
    ================================================== =================
    --- linux-2.6-lttng.orig/init/Kconfig 2008-04-10 15:59:46.000000000 -0400
    +++ linux-2.6-lttng/init/Kconfig 2008-04-14 19:51:54.000000000 -0400
    @@ -758,6 +758,24 @@ config PROC_PAGE_MONITOR
    /proc/kpagecount, and /proc/kpageflags. Disabling these
    interfaces will reduce the size of the kernel by approximately 4kb.

    +config HAVE_IMMEDIATE
    + def_bool n
    +
    +config IMMEDIATE
    + default y
    + depends on HAVE_IMMEDIATE
    + bool "Immediate value optimization" if EMBEDDED
    + help
    + Immediate values are used as read-mostly variables that are rarely
    + updated. They use code patching to modify the values inscribed in the
    + instruction stream. It provides a way to save precious cache lines
    + that would otherwise have to be used by these variables. They can be
    + disabled through the EMBEDDED menu.
    +
    + It consumes slightly more memory and modifies the instruction stream
    + each time any specially-marked variable is updated. Should really be
    + disabled for embedded systems with read-only text.
    +
    endmenu # General setup

    config SLABINFO

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  20. [RFC patch 05/27] Kprobes - do not use kprobes mutex in arch code

    Remove the kprobes mutex from kprobes.h, since it does not belong there. Also
    remove all use of this mutex in the architecture specific code, replacing it by
    a proper mutex lock/unlock in the architecture agnostic code.

    Changelog :
    - remove unnecessary kprobe_mutex around arch_remove_kprobe()

    Signed-off-by: Mathieu Desnoyers
    Acked-by: Ananth N Mavinakayanahalli
    Acked-by: Masami Hiramatsu
    CC: anil.s.keshavamurthy@intel.com
    CC: davem@davemloft.net
    ---
    arch/ia64/kernel/kprobes.c | 2 --
    arch/powerpc/kernel/kprobes.c | 2 --
    arch/s390/kernel/kprobes.c | 2 --
    arch/x86/kernel/kprobes.c | 2 --
    include/linux/kprobes.h | 2 --
    kernel/kprobes.c | 2 ++
    6 files changed, 2 insertions(+), 10 deletions(-)

    Index: linux-2.6-lttng/include/linux/kprobes.h
    ================================================== =================
    --- linux-2.6-lttng.orig/include/linux/kprobes.h 2008-04-08 11:59:57.000000000 -0400
    +++ linux-2.6-lttng/include/linux/kprobes.h 2008-04-08 12:01:39.000000000 -0400
    @@ -35,7 +35,6 @@
    #include
    #include
    #include
    -#include

    #ifdef CONFIG_KPROBES
    #include
    @@ -195,7 +194,6 @@ static inline int init_test_probes(void)
    #endif /* CONFIG_KPROBES_SANITY_TEST */

    extern spinlock_t kretprobe_lock;
    -extern struct mutex kprobe_mutex;
    extern int arch_prepare_kprobe(struct kprobe *p);
    extern void arch_arm_kprobe(struct kprobe *p);
    extern void arch_disarm_kprobe(struct kprobe *p);
    Index: linux-2.6-lttng/arch/x86/kernel/kprobes.c
    ================================================== =================
    --- linux-2.6-lttng.orig/arch/x86/kernel/kprobes.c 2008-04-08 11:59:57.000000000 -0400
    +++ linux-2.6-lttng/arch/x86/kernel/kprobes.c 2008-04-08 12:01:39.000000000 -0400
    @@ -376,9 +376,7 @@ void __kprobes arch_disarm_kprobe(struct

    void __kprobes arch_remove_kprobe(struct kprobe *p)
    {
    - mutex_lock(&kprobe_mutex);
    free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
    - mutex_unlock(&kprobe_mutex);
    }

    static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
    Index: linux-2.6-lttng/arch/ia64/kernel/kprobes.c
    ================================================== =================
    --- linux-2.6-lttng.orig/arch/ia64/kernel/kprobes.c 2008-04-08 11:59:57.000000000 -0400
    +++ linux-2.6-lttng/arch/ia64/kernel/kprobes.c 2008-04-08 12:01:39.000000000 -0400
    @@ -583,9 +583,7 @@ void __kprobes arch_disarm_kprobe(struct

    void __kprobes arch_remove_kprobe(struct kprobe *p)
    {
    - mutex_lock(&kprobe_mutex);
    free_insn_slot(p->ainsn.insn, 0);
    - mutex_unlock(&kprobe_mutex);
    }
    /*
    * We are resuming execution after a single step fault, so the pt_regs
    Index: linux-2.6-lttng/arch/powerpc/kernel/kprobes.c
    ================================================== =================
    --- linux-2.6-lttng.orig/arch/powerpc/kernel/kprobes.c 2008-04-08 11:59:57.000000000 -0400
    +++ linux-2.6-lttng/arch/powerpc/kernel/kprobes.c 2008-04-08 12:01:39.000000000 -0400
    @@ -88,9 +88,7 @@ void __kprobes arch_disarm_kprobe(struct

    void __kprobes arch_remove_kprobe(struct kprobe *p)
    {
    - mutex_lock(&kprobe_mutex);
    free_insn_slot(p->ainsn.insn, 0);
    - mutex_unlock(&kprobe_mutex);
    }

    static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
    Index: linux-2.6-lttng/arch/s390/kernel/kprobes.c
    ================================================== =================
    --- linux-2.6-lttng.orig/arch/s390/kernel/kprobes.c 2008-04-08 11:59:57.000000000 -0400
    +++ linux-2.6-lttng/arch/s390/kernel/kprobes.c 2008-04-08 12:01:39.000000000 -0400
    @@ -220,9 +220,7 @@ void __kprobes arch_disarm_kprobe(struct

    void __kprobes arch_remove_kprobe(struct kprobe *p)
    {
    - mutex_lock(&kprobe_mutex);
    free_insn_slot(p->ainsn.insn, 0);
    - mutex_unlock(&kprobe_mutex);
    }

    static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)

    --
    Mathieu Desnoyers
    Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread
Page 1 of 2 1 2 LastLast