[PATCH 0/3] stop_machine enhancements and simplifications - Kernel

This is a discussion on [PATCH 0/3] stop_machine enhancements and simplifications - Kernel ; Hi all, Here are the three stop_machine patches I've queued for the next merge window. The first two are pretty well tested via linux-next, the last is recent but fairly straightforward. I'm assuming that Jason and/or Mathieu have need for ...

+ Reply to Thread
Results 1 to 17 of 17

Thread: [PATCH 0/3] stop_machine enhancements and simplifications

  1. [PATCH 0/3] stop_machine enhancements and simplifications

    Hi all,

    Here are the three stop_machine patches I've queued for the next merge
    window. The first two are pretty well tested via linux-next, the last is
    recent but fairly straightforward.

    I'm assuming that Jason and/or Mathieu have need for the ALL_CPUS mod,
    but it can be removed by the last of these patches (left in to reduce
    transition breakage).

    Feedback welcome as always,
    Rusty.
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. [PATCH 3/3] stop_machine: use cpu mask rather than magic numbers

    Instead of a "cpu" arg with magic values NR_CPUS (any cpu) and ~0 (all
    cpus), pass a cpumask_t. Allow NULL for the common case (where we
    don't care which CPU the function is run on): temporary cpumask_t's
    are usually considered bad for stack space.

    Signed-off-by: Rusty Russell

    diff -r 277c5fb41d25 arch/s390/kernel/kprobes.c
    --- a/arch/s390/kernel/kprobes.c Tue Jul 08 12:57:33 2008 +1000
    +++ b/arch/s390/kernel/kprobes.c Tue Jul 08 17:31:41 2008 +1000
    @@ -199,7 +199,7 @@ void __kprobes arch_arm_kprobe(struct kp
    args.new = BREAKPOINT_INSTRUCTION;

    kcb->kprobe_status = KPROBE_SWAP_INST;
    - stop_machine_run(swap_instruction, &args, NR_CPUS);
    + stop_machine_run(swap_instruction, &args, NULL);
    kcb->kprobe_status = status;
    }

    @@ -214,7 +214,7 @@ void __kprobes arch_disarm_kprobe(struct
    args.new = p->opcode;

    kcb->kprobe_status = KPROBE_SWAP_INST;
    - stop_machine_run(swap_instruction, &args, NR_CPUS);
    + stop_machine_run(swap_instruction, &args, NULL);
    kcb->kprobe_status = status;
    }

    diff -r 277c5fb41d25 drivers/char/hw_random/intel-rng.c
    --- a/drivers/char/hw_random/intel-rng.c Tue Jul 08 12:57:33 2008 +1000
    +++ b/drivers/char/hw_random/intel-rng.c Tue Jul 08 17:31:41 2008 +1000
    @@ -368,7 +368,7 @@ static int __init mod_init(void)
    * Use stop_machine_run because IPIs can be blocked by disabling
    * interrupts.
    */
    - err = stop_machine_run(intel_rng_hw_init, intel_rng_hw, NR_CPUS);
    + err = stop_machine_run(intel_rng_hw_init, intel_rng_hw, NULL);
    pci_dev_put(dev);
    iounmap(intel_rng_hw->mem);
    kfree(intel_rng_hw);
    diff -r 277c5fb41d25 include/linux/stop_machine.h
    --- a/include/linux/stop_machine.h Tue Jul 08 12:57:33 2008 +1000
    +++ b/include/linux/stop_machine.h Tue Jul 08 17:31:41 2008 +1000
    @@ -5,19 +5,19 @@
    (and more). So the "read" side to such a lock is anything which
    diables preeempt. */
    #include
    +#include
    #include

    #if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)

    -#define ALL_CPUS ~0U
    +/* Deprecated, but useful for transition. */
    +#define ALL_CPUS CPU_MASK_ALL_PTR

    /**
    * stop_machine_run: freeze the machine on all CPUs and run this function
    * @fn: the function to run
    * @data: the data ptr for the @fn()
    - * @cpu: if @cpu == n, run @fn() on cpu n
    - * if @cpu == NR_CPUS, run @fn() on any cpu
    - * if @cpu == ALL_CPUS, run @fn() on every online CPU.
    + * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
    *
    * Description: This causes a thread to be scheduled on every cpu,
    * each of which disables interrupts. The result is that noone is
    @@ -26,22 +26,22 @@
    *
    * This can be thought of as a very heavy write lock, equivalent to
    * grabbing every spinlock in the kernel. */
    -int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
    +int stop_machine_run(int (*fn)(void *), void *data, const cpumask_t *cpus);

    /**
    * __stop_machine_run: freeze the machine on all CPUs and run this function
    * @fn: the function to run
    * @data: the data ptr for the @fn
    - * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
    + * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
    *
    * Description: This is a special version of the above, which assumes cpus
    * won't come or go while it's being called. Used by hotplug cpu.
    */
    -int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
    +int __stop_machine_run(int (*fn)(void *), void *data, const cpumask_t *cpus);
    #else

    static inline int stop_machine_run(int (*fn)(void *), void *data,
    - unsigned int cpu)
    + const cpumask_t *cpus)
    {
    int ret;
    local_irq_disable();
    diff -r 277c5fb41d25 kernel/cpu.c
    --- a/kernel/cpu.c Tue Jul 08 12:57:33 2008 +1000
    +++ b/kernel/cpu.c Tue Jul 08 17:31:41 2008 +1000
    @@ -224,8 +224,9 @@ static int __ref _cpu_down(unsigned int
    cpus_setall(tmp);
    cpu_clear(cpu, tmp);
    set_cpus_allowed_ptr(current, &tmp);
    + tmp = cpumask_of_cpu(cpu);

    - err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
    + err = __stop_machine_run(take_cpu_down, &tcd_param, &tmp);

    if (err || cpu_online(cpu)) {
    /* CPU didn't die: tell everyone. Can't complain. */
    diff -r 277c5fb41d25 kernel/module.c
    --- a/kernel/module.c Tue Jul 08 12:57:33 2008 +1000
    +++ b/kernel/module.c Tue Jul 08 17:31:41 2008 +1000
    @@ -689,7 +689,7 @@ static int try_stop_module(struct module
    {
    struct stopref sref = { mod, flags, forced };

    - return stop_machine_run(__try_stop_module, &sref, NR_CPUS);
    + return stop_machine_run(__try_stop_module, &sref, NULL);
    }

    unsigned int module_refcount(struct module *mod)
    @@ -1421,7 +1421,7 @@ static void free_module(struct module *m
    static void free_module(struct module *mod)
    {
    /* Delete from various lists */
    - stop_machine_run(__unlink_module, mod, NR_CPUS);
    + stop_machine_run(__unlink_module, mod, NULL);
    remove_notes_attrs(mod);
    remove_sect_attrs(mod);
    mod_kobject_remove(mod);
    @@ -2189,7 +2189,7 @@ static struct module *load_module(void _
    /* Now sew it into the lists so we can get lockdep and oops
    * info during argument parsing. Noone should access us, since
    * strong_try_module_get() will fail. */
    - stop_machine_run(__link_module, mod, NR_CPUS);
    + stop_machine_run(__link_module, mod, NULL);

    /* Size of section 0 is 0, so this works well if no params */
    err = parse_args(mod->name, mod->args,
    @@ -2223,7 +2223,7 @@ static struct module *load_module(void _
    return mod;

    unlink:
    - stop_machine_run(__unlink_module, mod, NR_CPUS);
    + stop_machine_run(__unlink_module, mod, NULL);
    module_arch_cleanup(mod);
    cleanup:
    kobject_del(&mod->mkobj.kobj);
    diff -r 277c5fb41d25 kernel/stop_machine.c
    --- a/kernel/stop_machine.c Tue Jul 08 12:57:33 2008 +1000
    +++ b/kernel/stop_machine.c Tue Jul 08 17:31:41 2008 +1000
    @@ -100,7 +100,7 @@ static int chill(void *unused)
    return 0;
    }

    -int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
    +int __stop_machine_run(int (*fn)(void *), void *data, const cpumask_t *cpus)
    {
    int i, err;
    struct stop_machine_data active, idle;
    @@ -111,10 +111,6 @@ int __stop_machine_run(int (*fn)(void *)
    active.fnret = 0;
    idle.fn = chill;
    idle.data = NULL;
    -
    - /* If they don't care which cpu fn runs on, just pick one. */
    - if (cpu == NR_CPUS)
    - cpu = any_online_cpu(cpu_online_map);

    /* This could be too big for stack on large machines. */
    threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
    @@ -128,13 +124,16 @@ int __stop_machine_run(int (*fn)(void *)
    set_state(STOPMACHINE_PREPARE);

    for_each_online_cpu(i) {
    - struct stop_machine_data *smdata;
    + struct stop_machine_data *smdata = &idle;
    struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };

    - if (cpu == ALL_CPUS || i == cpu)
    - smdata = &active;
    - else
    - smdata = &idle;
    + if (!cpus) {
    + if (i == first_cpu(cpu_online_map))
    + smdata = &active;
    + } else {
    + if (cpu_isset(i, *cpus))
    + smdata = &active;
    + }

    threads[i] = kthread_create(stop_cpu, smdata, "kstop%u", i);
    if (IS_ERR(threads[i])) {
    @@ -153,7 +152,7 @@ int __stop_machine_run(int (*fn)(void *)

    /* We've created all the threads. Wake them all: hold this CPU so one
    * doesn't hit this CPU until we're ready. */
    - cpu = get_cpu();
    + get_cpu();
    for_each_online_cpu(i)
    wake_up_process(threads[i]);

    @@ -176,13 +175,13 @@ kill_threads:
    return err;
    }

    -int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
    +int stop_machine_run(int (*fn)(void *), void *data, const cpumask_t *cpus)
    {
    int ret;

    /* No CPUs can come up or down during this. */
    get_online_cpus();
    - ret = __stop_machine_run(fn, data, cpu);
    + ret = __stop_machine_run(fn, data, cpus);
    put_online_cpus();

    return ret;
    diff -r 277c5fb41d25 mm/page_alloc.c
    --- a/mm/page_alloc.c Tue Jul 08 12:57:33 2008 +1000
    +++ b/mm/page_alloc.c Tue Jul 08 17:31:41 2008 +1000
    @@ -2356,7 +2356,7 @@ void build_all_zonelists(void)
    } else {
    /* we have to stop all cpus to guarantee there is no user
    of zonelist */
    - stop_machine_run(__build_all_zonelists, NULL, NR_CPUS);
    + stop_machine_run(__build_all_zonelists, NULL, NULL);
    /* cpuset refresh routine should be here */
    }
    vm_total_pages = nr_free_pagecache_pages();
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  3. Re: [PATCH 2/3] stop_machine: simplify

    I found a small possible cleanup in this patch.

    > diff --git a/kernel/cpu.c b/kernel/cpu.c
    > --- a/kernel/cpu.c
    > +++ b/kernel/cpu.c
    > @@ -192,7 +192,6 @@ static int __ref _cpu_down(unsigned int
    > static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
    > {
    > int err, nr_calls = 0;
    > - struct task_struct *p;
    > cpumask_t old_allowed, tmp;
    > void *hcpu = (void *)(long)cpu;
    > unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
    > @@ -226,19 +225,15 @@ static int __ref _cpu_down(unsigned int
    > cpu_clear(cpu, tmp);
    > set_cpus_allowed_ptr(current, &tmp);
    >
    > - p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
    > + err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
    >
    > - if (IS_ERR(p) || cpu_online(cpu)) {
    > + if (err || cpu_online(cpu)) {


    I think checking cpu_online() is now unnecessary by __stop_machine_run()
    change in this patch. i.e. this line can simply be:

    if (err) {

    Because __stop_machine_run(take_cpu_down, ...) returned zero means
    take_cpu_down() has already finished and suceeded (returned zero).
    It also means __cpu_disable() in take_cpu_down() has been succeeded.
    So you can remove cpu_online() check.

    > /* CPU didn't die: tell everyone. Can't complain. */
    > if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
    > hcpu) == NOTIFY_BAD)
    > BUG();
    >
    > - if (IS_ERR(p)) {
    > - err = PTR_ERR(p);
    > - goto out_allowed;
    > - }
    > - goto out_thread;
    > + goto out_allowed;
    > }
    >
    > /* Wait for it to sleep (leaving idle task). */

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  4. Re: [PATCH 2/3] stop_machine: simplify

    On Tuesday 08 July 2008 21:44:40 Akinobu Mita wrote:
    > I found a small possible cleanup in this patch.


    Well spotted! I think this cleanup is actually orthogonal to my patch,
    so best served as a separate patch, how's this?

    ===
    Hotplug CPU: don't check cpu_online after take_cpu_down

    Akinobu points out that if take_cpu_down() succeeds, the cpu must be offline
    (otherwise we're in deep trouble anyway.

    Remove the cpu_online() check, and put a BUG_ON().

    Signed-off-by: Rusty Russell
    Cc: "Akinobu Mita"

    diff -r 805a2e5e68dd kernel/cpu.c
    --- a/kernel/cpu.c Tue Jul 08 23:04:48 2008 +1000
    +++ b/kernel/cpu.c Tue Jul 08 23:07:43 2008 +1000
    @@ -226,8 +226,7 @@ static int __ref _cpu_down(unsigned int
    set_cpus_allowed_ptr(current, &tmp);

    err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
    -
    - if (err || cpu_online(cpu)) {
    + if (err) {
    /* CPU didn't die: tell everyone. Can't complain. */
    if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
    hcpu) == NOTIFY_BAD)
    @@ -235,6 +234,7 @@ static int __ref _cpu_down(unsigned int

    goto out_allowed;
    }
    + BUG_ON(cpu_online(cpu));

    /* Wait for it to sleep (leaving idle task). */
    while (!idle_cpu(cpu))
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  5. Re: [PATCH 2/3] stop_machine: simplify

    Hi Rusty,

    * Rusty Russell (rusty@rustcorp.com.au) wrote:
    > stop_machine creates a kthread which creates kernel threads. We can
    > create those threads directly and simplify things a little. Some care
    > must be taken with CPU hotunplug, which has special needs, but that code
    > seems more robust than it was in the past.
    >
    > Signed-off-by: Rusty Russell
    > ---
    > include/linux/stop_machine.h | 12 -
    > kernel/cpu.c | 13 -
    > kernel/stop_machine.c | 299 ++++++++++++++++++-------------------------
    > 3 files changed, 135 insertions(+), 189 deletions(-)
    >
    > diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
    > --- a/include/linux/stop_machine.h
    > +++ b/include/linux/stop_machine.h
    > @@ -17,13 +17,12 @@
    > * @data: the data ptr for the @fn()
    > * @cpu: if @cpu == n, run @fn() on cpu n
    > * if @cpu == NR_CPUS, run @fn() on any cpu
    > - * if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then
    > - * concurrently on all the other cpus
    > + * if @cpu == ALL_CPUS, run @fn() on every online CPU.
    > *


    I agree with this change if it makes things simpler. However, callers
    must be aware of this important change :

    "run @fn() first on the calling cpu, and then concurrently on all the
    other cpus" becomes "run @fn() on every online CPU".

    There were assumptions done in @fn() where a simple non atomic increment
    was used on a static variable to detect that it was the first thread to
    execute. It will have to be changed into an atomic inc/dec and test.
    Given that the other threads have tasks to perform _after_ the first
    thread has executed, they will have to busy-wait (spin) there waiting
    for the first thread to finish its execution.

    Mathieu

    > - * Description: This causes a thread to be scheduled on every other cpu,
    > - * each of which disables interrupts, and finally interrupts are disabled
    > - * on the current CPU. The result is that noone is holding a spinlock
    > - * or inside any other preempt-disabled region when @fn() runs.
    > + * Description: This causes a thread to be scheduled on every cpu,
    > + * each of which disables interrupts. The result is that noone is
    > + * holding a spinlock or inside any other preempt-disabled region when
    > + * @fn() runs.
    > *
    > * This can be thought of as a very heavy write lock, equivalent to
    > * grabbing every spinlock in the kernel. */
    > @@ -35,13 +34,10 @@ int stop_machine_run(int (*fn)(void *),
    > * @data: the data ptr for the @fn
    > * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
    > *
    > - * Description: This is a special version of the above, which returns the
    > - * thread which has run @fn(): kthread_stop will return the return value
    > - * of @fn(). Used by hotplug cpu.
    > + * Description: This is a special version of the above, which assumes cpus
    > + * won't come or go while it's being called. Used by hotplug cpu.
    > */
    > -struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
    > - unsigned int cpu);
    > -
    > +int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
    > #else
    >
    > static inline int stop_machine_run(int (*fn)(void *), void *data,
    > diff --git a/kernel/cpu.c b/kernel/cpu.c
    > --- a/kernel/cpu.c
    > +++ b/kernel/cpu.c
    > @@ -192,7 +192,6 @@ static int __ref _cpu_down(unsigned int
    > static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
    > {
    > int err, nr_calls = 0;
    > - struct task_struct *p;
    > cpumask_t old_allowed, tmp;
    > void *hcpu = (void *)(long)cpu;
    > unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
    > @@ -226,19 +225,15 @@ static int __ref _cpu_down(unsigned int
    > cpu_clear(cpu, tmp);
    > set_cpus_allowed_ptr(current, &tmp);
    >
    > - p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
    > + err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
    >
    > - if (IS_ERR(p) || cpu_online(cpu)) {
    > + if (err || cpu_online(cpu)) {
    > /* CPU didn't die: tell everyone. Can't complain. */
    > if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
    > hcpu) == NOTIFY_BAD)
    > BUG();
    >
    > - if (IS_ERR(p)) {
    > - err = PTR_ERR(p);
    > - goto out_allowed;
    > - }
    > - goto out_thread;
    > + goto out_allowed;
    > }
    >
    > /* Wait for it to sleep (leaving idle task). */
    > @@ -255,8 +250,6 @@ static int __ref _cpu_down(unsigned int
    >
    > check_for_tasks(cpu);
    >
    > -out_thread:
    > - err = kthread_stop(p);
    > out_allowed:
    > set_cpus_allowed_ptr(current, &old_allowed);
    > out_release:
    > diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
    > --- a/kernel/stop_machine.c
    > +++ b/kernel/stop_machine.c
    > @@ -1,4 +1,4 @@
    > -/* Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
    > +/* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
    > * GPL v2 and any later version.
    > */
    > #include
    > @@ -13,219 +13,176 @@
    > #include
    > #include
    >
    > -/* Since we effect priority and affinity (both of which are visible
    > - * to, and settable by outside processes) we do indirection via a
    > - * kthread. */
    > -
    > -/* Thread to stop each CPU in user context. */
    > +/* This controls the threads on each CPU. */
    > enum stopmachine_state {
    > - STOPMACHINE_WAIT,
    > + /* Dummy starting state for thread. */
    > + STOPMACHINE_NONE,
    > + /* Awaiting everyone to be scheduled. */
    > STOPMACHINE_PREPARE,
    > + /* Disable interrupts. */
    > STOPMACHINE_DISABLE_IRQ,
    > + /* Run the function */
    > STOPMACHINE_RUN,
    > + /* Exit */
    > STOPMACHINE_EXIT,
    > };
    > +static enum stopmachine_state state;
    >
    > struct stop_machine_data {
    > int (*fn)(void *);
    > void *data;
    > - struct completion done;
    > - int run_all;
    > -} smdata;
    > + int fnret;
    > +};
    >
    > -static enum stopmachine_state stopmachine_state;
    > -static unsigned int stopmachine_num_threads;
    > -static atomic_t stopmachine_thread_ack;
    > +/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
    > +static unsigned int num_threads;
    > +static atomic_t thread_ack;
    > +static struct completion finished;
    > +static DEFINE_MUTEX(lock);
    >
    > -static int stopmachine(void *cpu)
    > +static void set_state(enum stopmachine_state newstate)
    > {
    > - int irqs_disabled = 0;
    > - int prepared = 0;
    > - int ran = 0;
    > + /* Reset ack counter. */
    > + atomic_set(&thread_ack, num_threads);
    > + smp_wmb();
    > + state = newstate;
    > +}
    >
    > - set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu));
    > +/* Last one to ack a state moves to the next state. */
    > +static void ack_state(void)
    > +{
    > + if (atomic_dec_and_test(&thread_ack)) {
    > + /* If we're the last one to ack the EXIT, we're finished. */
    > + if (state == STOPMACHINE_EXIT)
    > + complete(&finished);
    > + else
    > + set_state(state + 1);
    > + }
    > +}
    >
    > - /* Ack: we are alive */
    > - smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
    > - atomic_inc(&stopmachine_thread_ack);
    > +/* This is the actual thread which stops the CPU. It exits by itself rather
    > + * than waiting for kthread_stop(), because it's easier for hotplug CPU. */
    > +static int stop_cpu(struct stop_machine_data *smdata)
    > +{
    > + enum stopmachine_state curstate = STOPMACHINE_NONE;
    > + int uninitialized_var(ret);
    >
    > /* Simple state machine */
    > - while (stopmachine_state != STOPMACHINE_EXIT) {
    > - if (stopmachine_state == STOPMACHINE_DISABLE_IRQ
    > - && !irqs_disabled) {
    > - local_irq_disable();
    > - hard_irq_disable();
    > - irqs_disabled = 1;
    > - /* Ack: irqs disabled. */
    > - smp_mb(); /* Must read state first. */
    > - atomic_inc(&stopmachine_thread_ack);
    > - } else if (stopmachine_state == STOPMACHINE_PREPARE
    > - && !prepared) {
    > - /* Everyone is in place, hold CPU. */
    > - preempt_disable();
    > - prepared = 1;
    > - smp_mb(); /* Must read state first. */
    > - atomic_inc(&stopmachine_thread_ack);
    > - } else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
    > - smdata.fn(smdata.data);
    > - ran = 1;
    > - smp_mb(); /* Must read state first. */
    > - atomic_inc(&stopmachine_thread_ack);
    > + do {
    > + /* Chill out and ensure we re-read stopmachine_state. */
    > + cpu_relax();
    > + if (state != curstate) {
    > + curstate = state;
    > + switch (curstate) {
    > + case STOPMACHINE_DISABLE_IRQ:
    > + local_irq_disable();
    > + hard_irq_disable();
    > + break;
    > + case STOPMACHINE_RUN:
    > + /* |= allows error detection if functions on
    > + * multiple CPUs. */
    > + smdata->fnret |= smdata->fn(smdata->data);
    > + break;
    > + default:
    > + break;
    > + }
    > + ack_state();
    > }
    > - /* Yield in first stage: migration threads need to
    > - * help our sisters onto their CPUs. */
    > - if (!prepared && !irqs_disabled)
    > - yield();
    > - cpu_relax();
    > - }
    > + } while (curstate != STOPMACHINE_EXIT);
    >
    > - /* Ack: we are exiting. */
    > - smp_mb(); /* Must read state first. */
    > - atomic_inc(&stopmachine_thread_ack);
    > + local_irq_enable();
    > + do_exit(0);
    > +}
    >
    > - if (irqs_disabled)
    > - local_irq_enable();
    > - if (prepared)
    > - preempt_enable();
    > -
    > +/* Callback for CPUs which aren't supposed to do anything. */
    > +static int chill(void *unused)
    > +{
    > return 0;
    > }
    >
    > -/* Change the thread state */
    > -static void stopmachine_set_state(enum stopmachine_state state)
    > +int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
    > {
    > - atomic_set(&stopmachine_thread_ack, 0);
    > - smp_wmb();
    > - stopmachine_state = state;
    > - while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
    > - cpu_relax();
    > -}
    > + int i, err;
    > + struct stop_machine_data active, idle;
    > + struct task_struct **threads;
    >
    > -static int stop_machine(void)
    > -{
    > - int i, ret = 0;
    > + active.fn = fn;
    > + active.data = data;
    > + active.fnret = 0;
    > + idle.fn = chill;
    > + idle.data = NULL;
    >
    > - atomic_set(&stopmachine_thread_ack, 0);
    > - stopmachine_num_threads = 0;
    > - stopmachine_state = STOPMACHINE_WAIT;
    > + /* If they don't care which cpu fn runs on, just pick one. */
    > + if (cpu == NR_CPUS)
    > + cpu = any_online_cpu(cpu_online_map);
    > +
    > + /* This could be too big for stack on large machines. */
    > + threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
    > + if (!threads)
    > + return -ENOMEM;
    > +
    > + /* Set up initial state. */
    > + mutex_lock(&lock);
    > + init_completion(&finished);
    > + num_threads = num_online_cpus();
    > + set_state(STOPMACHINE_PREPARE);
    >
    > for_each_online_cpu(i) {
    > - if (i == raw_smp_processor_id())
    > - continue;
    > - ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
    > - if (ret < 0)
    > - break;
    > - stopmachine_num_threads++;
    > + struct stop_machine_data *smdata;
    > + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
    > +
    > + if (cpu == ALL_CPUS || i == cpu)
    > + smdata = &active;
    > + else
    > + smdata = &idle;
    > +
    > + threads[i] = kthread_create(stop_cpu, smdata, "kstop%u", i);
    > + if (IS_ERR(threads[i])) {
    > + err = PTR_ERR(threads[i]);
    > + threads[i] = NULL;
    > + goto kill_threads;
    > + }
    > +
    > + /* Place it onto correct cpu. */
    > + kthread_bind(threads[i], i);
    > +
    > + /* Make it highest prio. */
    > + if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, &param))
    > + BUG();
    > }
    >
    > - /* Wait for them all to come to life. */
    > - while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) {
    > - yield();
    > - cpu_relax();
    > - }
    > + /* We've created all the threads. Wake them all: hold this CPU so one
    > + * doesn't hit this CPU until we're ready. */
    > + cpu = get_cpu();
    > + for_each_online_cpu(i)
    > + wake_up_process(threads[i]);
    >
    > - /* If some failed, kill them all. */
    > - if (ret < 0) {
    > - stopmachine_set_state(STOPMACHINE_EXIT);
    > - return ret;
    > - }
    > + /* This will release the thread on our CPU. */
    > + put_cpu();
    > + wait_for_completion(&finished);
    > + mutex_unlock(&lock);
    >
    > - /* Now they are all started, make them hold the CPUs, ready. */
    > - preempt_disable();
    > - stopmachine_set_state(STOPMACHINE_PREPARE);
    > + kfree(threads);
    >
    > - /* Make them disable irqs. */
    > - local_irq_disable();
    > - hard_irq_disable();
    > - stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
    > + return active.fnret;
    >
    > - return 0;
    > -}
    > +kill_threads:
    > + for_each_online_cpu(i)
    > + if (threads[i])
    > + kthread_stop(threads[i]);
    > + mutex_unlock(&lock);
    >
    > -static void restart_machine(void)
    > -{
    > - stopmachine_set_state(STOPMACHINE_EXIT);
    > - local_irq_enable();
    > - preempt_enable_no_resched();
    > -}
    > -
    > -static void run_other_cpus(void)
    > -{
    > - stopmachine_set_state(STOPMACHINE_RUN);
    > -}
    > -
    > -static int do_stop(void *_smdata)
    > -{
    > - struct stop_machine_data *smdata = _smdata;
    > - int ret;
    > -
    > - ret = stop_machine();
    > - if (ret == 0) {
    > - ret = smdata->fn(smdata->data);
    > - if (smdata->run_all)
    > - run_other_cpus();
    > - restart_machine();
    > - }
    > -
    > - /* We're done: you can kthread_stop us now */
    > - complete(&smdata->done);
    > -
    > - /* Wait for kthread_stop */
    > - set_current_state(TASK_INTERRUPTIBLE);
    > - while (!kthread_should_stop()) {
    > - schedule();
    > - set_current_state(TASK_INTERRUPTIBLE);
    > - }
    > - __set_current_state(TASK_RUNNING);
    > - return ret;
    > -}
    > -
    > -struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
    > - unsigned int cpu)
    > -{
    > - static DEFINE_MUTEX(stopmachine_mutex);
    > - struct stop_machine_data smdata;
    > - struct task_struct *p;
    > -
    > - mutex_lock(&stopmachine_mutex);
    > -
    > - smdata.fn = fn;
    > - smdata.data = data;
    > - smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0;
    > - init_completion(&smdata.done);
    > -
    > - smp_wmb(); /* make sure other cpus see smdata updates */
    > -
    > - /* If they don't care which CPU fn runs on, bind to any online one. */
    > - if (cpu == NR_CPUS || cpu == ALL_CPUS)
    > - cpu = raw_smp_processor_id();
    > -
    > - p = kthread_create(do_stop, &smdata, "kstopmachine");
    > - if (!IS_ERR(p)) {
    > - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
    > -
    > - /* One high-prio thread per cpu. We'll do this one. */
    > - sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
    > - kthread_bind(p, cpu);
    > - wake_up_process(p);
    > - wait_for_completion(&smdata.done);
    > - }
    > - mutex_unlock(&stopmachine_mutex);
    > - return p;
    > + kfree(threads);
    > + return err;
    > }
    >
    > int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
    > {
    > - struct task_struct *p;
    > int ret;
    >
    > /* No CPUs can come up or down during this. */
    > get_online_cpus();
    > - p = __stop_machine_run(fn, data, cpu);
    > - if (!IS_ERR(p))
    > - ret = kthread_stop(p);
    > - else
    > - ret = PTR_ERR(p);
    > + ret = __stop_machine_run(fn, data, cpu);
    > put_online_cpus();
    >
    > return ret;


    --
    Mathieu Desnoyers
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  6. Re: [PATCH 2/3] stop_machine: simplify

    2008/7/8 Rusty Russell :
    > On Tuesday 08 July 2008 21:44:40 Akinobu Mita wrote:
    >> I found a small possible cleanup in this patch.

    >
    > Well spotted! I think this cleanup is actually orthogonal to my patch,
    > so best served as a separate patch, how's this?


    Actually the cpu_online() check was necessary before appling this
    stop_machine: simplify patch.

    With old __stop_machine_run(), __stop_machine_run() could succeed
    (return !IS_ERR(p) value) even if take_cpu_down() returned non-zero value.
    The return value of take_cpu_down() was obtained through kthread_stop().

    > ===
    > Hotplug CPU: don't check cpu_online after take_cpu_down


    So it seems that folding this patch into stop_machine: simplify patch is
    more appropriate.

    > Akinobu points out that if take_cpu_down() succeeds, the cpu must be offline
    > (otherwise we're in deep trouble anyway.
    >
    > Remove the cpu_online() check, and put a BUG_ON().
    >
    > Signed-off-by: Rusty Russell
    > Cc: "Akinobu Mita"
    >
    > diff -r 805a2e5e68dd kernel/cpu.c
    > --- a/kernel/cpu.c Tue Jul 08 23:04:48 2008 +1000
    > +++ b/kernel/cpu.c Tue Jul 08 23:07:43 2008 +1000
    > @@ -226,8 +226,7 @@ static int __ref _cpu_down(unsigned int
    > set_cpus_allowed_ptr(current, &tmp);
    >
    > err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
    > -
    > - if (err || cpu_online(cpu)) {
    > + if (err) {
    > /* CPU didn't die: tell everyone. Can't complain. */
    > if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
    > hcpu) == NOTIFY_BAD)
    > @@ -235,6 +234,7 @@ static int __ref _cpu_down(unsigned int
    >
    > goto out_allowed;
    > }
    > + BUG_ON(cpu_online(cpu));
    >
    > /* Wait for it to sleep (leaving idle task). */
    > while (!idle_cpu(cpu))
    >

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  7. Re: [PATCH 0/3] stop_machine enhancements and simplifications

    Am Dienstag, 8. Juli 2008 schrieb Rusty Russell:
    > Hi all,
    >
    > Here are the three stop_machine patches I've queued for the next merge
    > window. The first two are pretty well tested via linux-next, the last is
    > recent but fairly straightforward.


    Hmm, these patches dont fit on kvm.git so I tested the version from
    linux-next.

    When using linux-next in a kvm guest on a linux-next host this patch set
    really improves the situation. On a guest with 64 cpus on a 1 cpu host
    stop_machine_run seems to finish immediately.

    I still have some strange problems with 25 guests * 64 cpus on one host cpu,
    but this requires further analysis.

    IMHO this patch set is really an improvement to the earlier version:

    Acked-by: Christian Borntraeger

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  8. Re: [PATCH 0/3] stop_machine enhancements and simplifications

    On Tue, Jul 08, 2008 at 05:50:55PM +1000, Rusty Russell wrote:
    > Hi all,
    >
    > Here are the three stop_machine patches I've queued for the next merge
    > window. The first two are pretty well tested via linux-next, the last is
    > recent but fairly straightforward.
    >
    > I'm assuming that Jason and/or Mathieu have need for the ALL_CPUS mod,
    > but it can be removed by the last of these patches (left in to reduce
    > transition breakage).


    hi,

    I added the 'ALL_CPUS' feature to support the architecture independent immediate
    code that Mathieu wrote. So, if Mathieu needs it great, otherwise I have no
    other code depending on it.

    thanks,

    -Jason
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  9. Re: [PATCH 0/3] stop_machine enhancements and simplifications

    * Jason Baron (jbaron@redhat.com) wrote:
    > On Tue, Jul 08, 2008 at 05:50:55PM +1000, Rusty Russell wrote:
    > > Hi all,
    > >
    > > Here are the three stop_machine patches I've queued for the next merge
    > > window. The first two are pretty well tested via linux-next, the last is
    > > recent but fairly straightforward.
    > >
    > > I'm assuming that Jason and/or Mathieu have need for the ALL_CPUS mod,
    > > but it can be removed by the last of these patches (left in to reduce
    > > transition breakage).

    >
    > hi,
    >
    > I added the 'ALL_CPUS' feature to support the architecture independent immediate
    > code that Mathieu wrote. So, if Mathieu needs it great, otherwise I have no
    > other code depending on it.
    >
    > thanks,
    >
    > -Jason


    Hi Rusty,

    The immediate values implementation (the 'simple' version) uses this
    patch. If we include your simplification, I'll have to modify the
    immediate values patch a little bit so we deal with concurrent execution
    of all the threads, as I pointed out in my earlier email.

    Mathieu


    --
    Mathieu Desnoyers
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  10. Re: [PATCH 2/3] stop_machine: simplify

    On Wednesday 09 July 2008 01:02:02 Akinobu Mita wrote:
    > 2008/7/8 Rusty Russell :
    > > On Tuesday 08 July 2008 21:44:40 Akinobu Mita wrote:
    > >> I found a small possible cleanup in this patch.

    > >
    > > Well spotted! I think this cleanup is actually orthogonal to my patch,
    > > so best served as a separate patch, how's this?

    >
    > Actually the cpu_online() check was necessary before appling this
    > stop_machine: simplify patch.
    >
    > With old __stop_machine_run(), __stop_machine_run() could succeed
    > (return !IS_ERR(p) value) even if take_cpu_down() returned non-zero value.
    > The return value of take_cpu_down() was obtained through kthread_stop().


    Ah, thanks for the analysis!

    It's a little non-obvious, so I've left it as a separate patch (it doesn't
    hurt to have the check there), but included your excellent explanation within
    it.

    Thanks!
    Rusty.
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  11. Re: [PATCH 2/3] stop_machine: simplify

    On Wednesday 09 July 2008 00:27:03 Mathieu Desnoyers wrote:
    > Hi Rusty,
    >
    > * Rusty Russell (rusty@rustcorp.com.au) wrote:
    > > stop_machine creates a kthread which creates kernel threads. We can
    > > create those threads directly and simplify things a little. Some care
    > > must be taken with CPU hotunplug, which has special needs, but that code
    > > seems more robust than it was in the past.
    > >
    > > Signed-off-by: Rusty Russell
    > > ---
    > > include/linux/stop_machine.h | 12 -
    > > kernel/cpu.c | 13 -
    > > kernel/stop_machine.c | 299
    > > ++++++++++++++++++------------------------- 3 files changed, 135
    > > insertions(+), 189 deletions(-)
    > >
    > > diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
    > > --- a/include/linux/stop_machine.h
    > > +++ b/include/linux/stop_machine.h
    > > @@ -17,13 +17,12 @@
    > > * @data: the data ptr for the @fn()
    > > * @cpu: if @cpu == n, run @fn() on cpu n
    > > * if @cpu == NR_CPUS, run @fn() on any cpu
    > > - * if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and
    > > then - * concurrently on all the other cpus
    > > + * if @cpu == ALL_CPUS, run @fn() on every online CPU.
    > > *

    >
    > I agree with this change if it makes things simpler. However, callers
    > must be aware of this important change :
    >
    > "run @fn() first on the calling cpu, and then concurrently on all the
    > other cpus" becomes "run @fn() on every online CPU".


    OK. Since that was never in mainline, I think you're the only one who needs
    to be aware of the semantic change?

    The new symmetric implementation breaks it; hope that isn't a showstopper for
    you?

    > There were assumptions done in @fn() where a simple non atomic increment
    > was used on a static variable to detect that it was the first thread to
    > execute. It will have to be changed into an atomic inc/dec and test.
    > Given that the other threads have tasks to perform _after_ the first
    > thread has executed, they will have to busy-wait (spin) there waiting
    > for the first thread to finish its execution.


    I assume you can't do that step then call stop_machine.

    Thanks,
    Rusty.
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  12. Re: [PATCH 2/3] stop_machine: simplify

    * Rusty Russell (rusty@rustcorp.com.au) wrote:
    > On Wednesday 09 July 2008 00:27:03 Mathieu Desnoyers wrote:
    > > Hi Rusty,
    > >
    > > * Rusty Russell (rusty@rustcorp.com.au) wrote:
    > > > stop_machine creates a kthread which creates kernel threads. We can
    > > > create those threads directly and simplify things a little. Some care
    > > > must be taken with CPU hotunplug, which has special needs, but that code
    > > > seems more robust than it was in the past.
    > > >
    > > > Signed-off-by: Rusty Russell
    > > > ---
    > > > include/linux/stop_machine.h | 12 -
    > > > kernel/cpu.c | 13 -
    > > > kernel/stop_machine.c | 299
    > > > ++++++++++++++++++------------------------- 3 files changed, 135
    > > > insertions(+), 189 deletions(-)
    > > >
    > > > diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
    > > > --- a/include/linux/stop_machine.h
    > > > +++ b/include/linux/stop_machine.h
    > > > @@ -17,13 +17,12 @@
    > > > * @data: the data ptr for the @fn()
    > > > * @cpu: if @cpu == n, run @fn() on cpu n
    > > > * if @cpu == NR_CPUS, run @fn() on any cpu
    > > > - * if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and
    > > > then - * concurrently on all the other cpus
    > > > + * if @cpu == ALL_CPUS, run @fn() on every online CPU.
    > > > *

    > >
    > > I agree with this change if it makes things simpler. However, callers
    > > must be aware of this important change :
    > >
    > > "run @fn() first on the calling cpu, and then concurrently on all the
    > > other cpus" becomes "run @fn() on every online CPU".

    >
    > OK. Since that was never in mainline, I think you're the only one who needs
    > to be aware of the semantic change?
    >
    > The new symmetric implementation breaks it; hope that isn't a showstopper for
    > you?
    >


    Nope, that should be ok with something like :

    ....
    atomic_set(1, &stop_machine_first);
    wrote_text = 0;
    stop_machine_run(stop_machine_imv_update, (void *)imv,
    ALL_CPUS);
    ....

    static int stop_machine_imv_update(void *imv_ptr)
    {
    struct __imv *imv = imv_ptr;

    if (atomic_dec_and_test(&stop_machine_first)) {
    text_poke((void *)imv->imv, (void *)imv->var, imv->size);
    smp_wmb(); /* make sure other cpus see that this has run */
    wrote_text = 1;
    } else {
    while (!wrote_text)
    smp_rmb();
    sync_core();
    }

    flush_icache_range(imv->imv, imv->imv + imv->size);

    return 0;
    }

    > > There were assumptions done in @fn() where a simple non atomic increment
    > > was used on a static variable to detect that it was the first thread to
    > > execute. It will have to be changed into an atomic inc/dec and test.
    > > Given that the other threads have tasks to perform _after_ the first
    > > thread has executed, they will have to busy-wait (spin) there waiting
    > > for the first thread to finish its execution.

    >
    > I assume you can't do that step then call stop_machine.
    >


    Indeed, I can't, because I need to have all other CPUs busy looping with
    interrupts disabled while I do the text_poke.

    Mathieu


    > Thanks,
    > Rusty.


    --
    Mathieu Desnoyers
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  13. references:in-reply-to:content-type:

    Rusty Russell wrote:
    > stop_machine creates a kthread which creates kernel threads. We can
    > create those threads directly and simplify things a little. Some care
    > must be taken with CPU hotunplug, which has special needs, but that code
    > seems more robust than it was in the past.
    >
    > Signed-off-by: Rusty Russell


    Rusty,

    You mentioned (in private conversation) that you were going to add some
    logic that checks whether CPU is running user-space code and not holding
    any locks to avoid scheduling stop_machine thread on it. Was it supposed
    to be part of this patch ?

    Max
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  14. Re: [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace

    On Friday 11 July 2008 07:07:57 Milton Miller wrote:
    > Hi Rusty, Ingo.
    >
    > Rusty's patch [PATCH 3/3] stop_machine: use cpu mask rather than magic
    > numbers didn't find kernel/trace/ftrace.c in -next, causing an immediate
    > almost NULL pointer dereference in ftrace_dynamic_init.


    Yes, I'm switching the patches around, so it does the transition correctly.
    Introduces a new stop_machine() fn with the new interface and deprecates the
    old stop_machine_run(). We can remove stop_machine_run() after everyone is
    switched.

    Thanks,
    Rusty.
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  15. Re: [PATCH 2/3] stop_machine: simplify

    On Thursday 10 July 2008 10:30:37 Max Krasnyansky wrote:
    > Rusty Russell wrote:
    > > stop_machine creates a kthread which creates kernel threads. We can
    > > create those threads directly and simplify things a little. Some care
    > > must be taken with CPU hotunplug, which has special needs, but that code
    > > seems more robust than it was in the past.
    > >
    > > Signed-off-by: Rusty Russell

    >
    > Rusty,
    >
    > You mentioned (in private conversation) that you were going to add some
    > logic that checks whether CPU is running user-space code and not holding
    > any locks to avoid scheduling stop_machine thread on it. Was it supposed
    > to be part of this patch ?
    >
    > Max


    No... I tried it, and it killed my machine. I didn't chase it for the moment,
    but it's on the "experimental" end of my patch queue.

    Will play with it again and report,
    Rusty.




    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  16. Re: [PATCH 2/3] stop_machine: simplify

    * Rusty Russell (rusty@rustcorp.com.au) wrote:
    > On Thursday 10 July 2008 10:30:37 Max Krasnyansky wrote:
    > > Rusty Russell wrote:
    > > > stop_machine creates a kthread which creates kernel threads. We can
    > > > create those threads directly and simplify things a little. Some care
    > > > must be taken with CPU hotunplug, which has special needs, but that code
    > > > seems more robust than it was in the past.
    > > >
    > > > Signed-off-by: Rusty Russell

    > >
    > > Rusty,
    > >
    > > You mentioned (in private conversation) that you were going to add some
    > > logic that checks whether CPU is running user-space code and not holding
    > > any locks to avoid scheduling stop_machine thread on it. Was it supposed
    > > to be part of this patch ?
    > >
    > > Max

    >
    > No... I tried it, and it killed my machine. I didn't chase it for the moment,
    > but it's on the "experimental" end of my patch queue.
    >
    > Will play with it again and report,
    > Rusty.
    >


    Hrm, I must be missing something, but using the fact that other CPUs are
    running in userspace as a guarantee that they are not running within
    critical kernel sections seems kind of.. racy ? I'd like to have a look
    at this experimental patch : does it inhibit interrupts somehow and/or
    does it take control of userspace processes doing system calls at that
    precise moment ?

    Mathieu

    >
    >
    >


    --
    Mathieu Desnoyers
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  17. Re: [PATCH 2/3] stop_machine: simplify

    On Friday 11 July 2008 23:12:21 Mathieu Desnoyers wrote:
    > * Rusty Russell (rusty@rustcorp.com.au) wrote:
    > > On Thursday 10 July 2008 10:30:37 Max Krasnyansky wrote:
    > > > You mentioned (in private conversation) that you were going to add some
    > > > logic that checks whether CPU is running user-space code and not
    > > > holding any locks to avoid scheduling stop_machine thread on it.

    > >
    > > Will play with it again and report,
    > > Rusty.

    >
    > Hrm, I must be missing something, but using the fact that other CPUs are
    > running in userspace as a guarantee that they are not running within
    > critical kernel sections seems kind of.. racy ? I'd like to have a look
    > at this experimental patch : does it inhibit interrupts somehow and/or
    > does it take control of userspace processes doing system calls at that
    > precise moment ?


    The idea was to try this ipi-to-all-cpus and fall back on the current thread
    method if it doesn't work. I suspect it will succeed in the vast majority of
    cases (with CONFIG_PREEMPT, we can also let the function execute if in-kernel
    but preemptible). Something like this:

    +struct ipi_data {
    + atomic_t acked;
    + atomic_t failed;
    + unsigned int cpu;
    + int fnret;
    + int (*fn)(void *data);
    + void *data;
    +};
    +
    +static void ipi_func(void *info)
    +{
    + struct ipi_data *ipi = info;
    + bool ok = false;
    +
    + printk("get_irq_regs(%i) = %p\n", smp_processor_id(), get_irq_regs());
    + goto fail;
    +
    + if (user_mode(get_irq_regs()))
    + ok = true;
    + else {
    +#ifdef CONFIG_PREEMPT
    + /* We're in an interrupt, ok, but were we preemptible
    + * before that? */
    + if ((hardirq_count() >> HARDIRQ_SHIFT) == 1) {
    + int prev = preempt_count() & ~HARDIRQ_MASK;
    + if ((prev & ~PREEMPT_ACTIVE) == PREEMPT_INATOMIC_BASE)
    + ok = true;
    + }
    +#endif
    + }
    +
    +fail:
    + if (!ok) {
    + /* Mark our failure before acking. */
    + atomic_inc(&ipi->failed);
    + wmb();
    + }
    +
    + if (smp_processor_id() != ipi->cpu) {
    + /* Wait for cpu to call function (last to ack). */
    + atomic_inc(&ipi->acked);
    + while (atomic_read(&ipi->acked) != num_online_cpus())
    + cpu_relax();
    + } else {
    + while (atomic_read(&ipi->acked) != num_online_cpus() - 1)
    + cpu_relax();
    + /* Must read acked before failed. */
    + rmb();
    +
    + /* Call function if noone failed. */
    + if (atomic_read(&ipi->failed) == 0)
    + ipi->fnret = ipi->fn(ipi->data);
    + atomic_inc(&ipi->acked);
    + }
    +}
    +
    +static bool try_ipi_stop(int (*fn)(void *), void *data, unsigned int cpu,
    + int *ret)
    +{
    + struct ipi_data ipi;
    +
    + /* If they don't care which cpu fn runs on, just pick one. */
    + if (cpu == NR_CPUS)
    + ipi.cpu = any_online_cpu(cpu_online_map);
    + else
    + ipi.cpu = cpu;
    +
    + atomic_set(&ipi.acked, 0);
    + atomic_set(&ipi.failed, 0);
    + ipi.fn = fn;
    + ipi.data = data;
    + ipi.fnret = 0;
    +
    + smp_call_function(ipi_func, &ipi, 0, 1);
    +
    + printk("stop_machine: ipi acked %u failed %u\n",
    + atomic_read(&ipi.acked), atomic_read(&ipi.failed));
    + *ret = ipi.fnret;
    + return (atomic_read(&ipi.failed) == 0);
    +}

    Hope that clarifies!
    Rusty.

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread