[PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU - Kernel

This is a discussion on [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU - Kernel ; Hello! This patch adds stalled-CPU detection to Classic RCU. This capability is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, which defaults disabled. This is a debugging feature, not something that non-kernel-hackers would be expected to care about. This feature can ...

+ Reply to Thread
Results 1 to 17 of 17

Thread: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU

  1. [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU

    Hello!

    This patch adds stalled-CPU detection to Classic RCU. This capability
    is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR,
    which defaults disabled. This is a debugging feature, not something
    that non-kernel-hackers would be expected to care about. This feature
    can detect looping CPUs in !PREEMPT builds and looping CPUs with
    preemption disabled in PREEMPT builds. This is essentially a port of
    this functionality from the treercu patch.

    One current shortcoming: on some systems, stalls are detected during
    early boot, when we normally would not care about them. My thought is
    to add a call from late initialization to suppress stall detection until
    the system is well along its way to being booted, but thought I should
    check to see if there might already be something for this purpose.

    (Currently against 2.6.27-rc8, FYI.)

    Thoughts?

    Signed-off-by: Paul E. McKenney
    ---

    include/linux/rcuclassic.h | 9 ++++
    kernel/rcuclassic.c | 88 +++++++++++++++++++++++++++++++++++++++++++++
    lib/Kconfig.debug | 13 ++++++
    3 files changed, 110 insertions(+)

    diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
    index 4ab8436..9b62e9a 100644
    --- a/include/linux/rcuclassic.h
    +++ b/include/linux/rcuclassic.h
    @@ -40,6 +40,10 @@
    #include
    #include

    +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    +#define RCU_SECONDS_TILL_STALL_CHECK 3 /* for rcp->seconds_stall */
    +#define RCU_SECONDS_TILL_STALL_RECHECK 30 /* for rcp->seconds_stall */
    +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */

    /* Global control variables for rcupdate callback mechanism. */
    struct rcu_ctrlblk {
    @@ -52,6 +56,11 @@ struct rcu_ctrlblk {
    spinlock_t lock ____cacheline_internodealigned_in_smp;
    cpumask_t cpumask; /* CPUs that need to switch in order */
    /* for current batch to proceed. */
    +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    + unsigned long gp_start; /* Time at which GP started in jiffies. */
    + unsigned long seconds_stall;
    + /* Time at which to check for CPU stalls. */
    +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    } ____cacheline_internodealigned_in_smp;

    /* Is batch a before batch b ? */
    diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
    index aad93cd..c092ba9 100644
    --- a/kernel/rcuclassic.c
    +++ b/kernel/rcuclassic.c
    @@ -118,6 +118,87 @@ static inline void force_quiescent_state(struct rcu_data *rdp,
    }
    #endif

    +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    +
    +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    +{
    + rcp->gp_start = jiffies;
    + rcp->seconds_stall = get_seconds() + RCU_SECONDS_TILL_STALL_CHECK;
    +}
    +
    +static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
    +{
    + int cpu;
    + long delta;
    + unsigned long flags;
    +
    + /* Only let one CPU complain about others per time interval. */
    +
    + spin_lock_irqsave(&rcp->lock, flags);
    + delta = get_seconds() - rcp->seconds_stall;
    + if (delta < 2 || rcp->cur != rcp->completed) {
    + spin_unlock_irqrestore(&rcp->lock, flags);
    + return;
    + }
    + rcp->seconds_stall = get_seconds() + RCU_SECONDS_TILL_STALL_RECHECK;
    + spin_unlock_irqrestore(&rcp->lock, flags);
    +
    + /* OK, time to rat on our buddy... */
    +
    + printk(KERN_ERR "RCU detected CPU stalls:");
    + for_each_possible_cpu(cpu) {
    + if (cpu_isset(cpu, rcp->cpumask))
    + printk(" %d", cpu);
    + }
    + printk(" (detected by %d, t=%ld jiffies)\n",
    + smp_processor_id(), (long)(jiffies - rcp->gp_start));
    +}
    +
    +static void print_cpu_stall(struct rcu_ctrlblk *rcp)
    +{
    + unsigned long flags;
    +
    + printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
    + smp_processor_id(), get_seconds(),
    + jiffies - rcp->gp_start);
    + dump_stack();
    + spin_lock_irqsave(&rcp->lock, flags);
    + if ((long)(get_seconds() - rcp->seconds_stall) >= 0)
    + rcp->seconds_stall =
    + get_seconds() + RCU_SECONDS_TILL_STALL_RECHECK;
    + spin_unlock_irqrestore(&rcp->lock, flags);
    + set_need_resched(); /* kick ourselves to get things going. */
    +}
    +
    +static void check_cpu_stall(struct rcu_ctrlblk *rcp)
    +{
    + long delta;
    +
    + delta = get_seconds() - rcp->seconds_stall;
    + if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
    +
    + /* We haven't checked in, so go dump stack. */
    + print_cpu_stall(rcp);
    +
    + } else if (rcp->cur != rcp->completed && delta >= 2) {
    +
    + /* They had two seconds to dump stack, so complain. */
    + print_other_cpu_stall(rcp);
    + }
    +}
    +
    +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    +
    +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    +{
    +}
    +
    +static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    +{
    +}
    +
    +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    +
    /**
    * call_rcu - Queue an RCU callback for invocation after a grace period.
    * @head: structure to be used for queueing the RCU updates.
    @@ -285,6 +366,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
    */
    smp_wmb();
    rcp->cur++;
    + record_gp_stall_check_time(rcp);

    /*
    * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
    @@ -468,6 +550,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)

    static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    {
    + /* Check for CPU stalls, if enabled. */
    + check_cpu_stall(rcp);
    +
    /* This cpu has pending rcu entries and the grace period
    * for them has completed.
    */
    @@ -558,6 +643,9 @@ void rcu_check_callbacks(int cpu, int user)
    static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
    struct rcu_data *rdp)
    {
    +#ifdef CONFIG_DEBUG_RCU_STALL
    + printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
    +#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
    memset(rdp, 0, sizeof(*rdp));
    rdp->curtail = &rdp->curlist;
    rdp->nxttail = &rdp->nxtlist;
    diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
    index 0b50481..9fee969 100644
    --- a/lib/Kconfig.debug
    +++ b/lib/Kconfig.debug
    @@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
    Say N here if you want the RCU torture tests to start only
    after being manually enabled via /proc.

    +config RCU_CPU_STALL_DETECTOR
    + bool "Check for stalled CPUs delaying RCU grace periods"
    + depends on CLASSIC_RCU
    + default n
    + help
    + This option causes RCU to printk information on which
    + CPUs are delaying the current grace period, but only when
    + the grace period extends for excessive time periods.
    +
    + Say Y if you want RCU to perform such checks.
    +
    + Say N if you are unsure.
    +
    config KPROBES_SANITY_TEST
    bool "Kprobes sanity tests"
    depends on DEBUG_KERNEL
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU


    * Paul E. McKenney wrote:

    > Hello!
    >
    > This patch adds stalled-CPU detection to Classic RCU. This capability
    > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR,
    > which defaults disabled. This is a debugging feature, not something
    > that non-kernel-hackers would be expected to care about. This feature
    > can detect looping CPUs in !PREEMPT builds and looping CPUs with
    > preemption disabled in PREEMPT builds. This is essentially a port of
    > this functionality from the treercu patch.
    >
    > One current shortcoming: on some systems, stalls are detected during
    > early boot, when we normally would not care about them. My thought is
    > to add a call from late initialization to suppress stall detection
    > until the system is well along its way to being booted, but thought I
    > should check to see if there might already be something for this
    > purpose.


    could you be a bit more specific, why do those warnings show up and why
    dont we care about them? There are things like networking that
    occasionally do an rcu_sync() and a stall could mean a bootup hang.

    > (Currently against 2.6.27-rc8, FYI.)
    >
    > Thoughts?
    >
    > Signed-off-by: Paul E. McKenney


    i think this is a very good idea in general - often the question comes
    up whether a hang seen in the RCU code is indeed caused by RCU or other
    factors. Could you perhaps rebase it against tip/core/rcu ? [or
    tip/master for convenience]

    Ingo
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  3. Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU

    On Thu, Oct 02, 2008 at 10:07:26AM +0200, Ingo Molnar wrote:
    >
    > * Paul E. McKenney wrote:
    >
    > > Hello!
    > >
    > > This patch adds stalled-CPU detection to Classic RCU. This capability
    > > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR,
    > > which defaults disabled. This is a debugging feature, not something
    > > that non-kernel-hackers would be expected to care about. This feature
    > > can detect looping CPUs in !PREEMPT builds and looping CPUs with
    > > preemption disabled in PREEMPT builds. This is essentially a port of
    > > this functionality from the treercu patch.
    > >
    > > One current shortcoming: on some systems, stalls are detected during
    > > early boot, when we normally would not care about them. My thought is
    > > to add a call from late initialization to suppress stall detection
    > > until the system is well along its way to being booted, but thought I
    > > should check to see if there might already be something for this
    > > purpose.

    >
    > could you be a bit more specific, why do those warnings show up and why
    > dont we care about them? There are things like networking that
    > occasionally do an rcu_sync() and a stall could mean a bootup hang.


    Hmmm...

    Good point, I was just falling back on my old "we don't care about RCU
    stalls in boot-time code" rule from long ago. It is entirely possible
    (in fact reasonably likely) that this rule no longer applies to Linux as
    it exists today. So please see below for the console output. Not all
    systems report this stall. I have recently been running only on Power
    systems, will fire off on some x86s. My kneejerk reaction was that the
    "stall" was really due to the clock-setting operation -- RCU stalls are
    based on get_seconds().

    So maybe I need to change the stall-detection code to use jiffies
    instead.

    Thoughts?

    > > (Currently against 2.6.27-rc8, FYI.)
    > >
    > > Thoughts?
    > >
    > > Signed-off-by: Paul E. McKenney

    >
    > i think this is a very good idea in general - often the question comes
    > up whether a hang seen in the RCU code is indeed caused by RCU or other
    > factors. Could you perhaps rebase it against tip/core/rcu ? [or
    > tip/master for convenience]


    Will do!

    Thanx, Paul

    ------------------------------------------------------------------------
    Console output: search for "RCU detected CPU 5 stall" near the end.
    ------------------------------------------------------------------------

    Elapsed time since release of system processors: 7742 mins 58 secs

    Config file read, 2048 bytes
    Welcome
    Welcome to yaboot version 1.3.13-cas (Red Hat 1.3.13.cas-5)
    Enter "help" to get some basic usage information
    boot: autobench
    Please wait, loading kernel...
    Elf64 kernel loaded...
    Loading ramdisk...
    ramdisk loaded at 02b00000, size: 2331 Kbytes
    OF stdout device is: /vdevice/vty@30000000
    Hypertas detected, assuming LPAR !
    -\|/-\|/-\|/-\|
    Elapsed time since release of system processors: 7743 mins 29 secs

    Config file read, 2048 bytes
    Welcome
    Welcome to yaboot version 1.3.13-cas (Red Hat 1.3.13.cas-5)
    Enter "help" to get some basic usage information
    boot: autobench
    Please wait, loading kernel...
    Elf64 kernel loaded...
    Loading ramdisk...
    ramdisk loaded at 02b00000, size: 2331 Kbytes
    OF stdout device is: /vdevice/vty@30000000
    Hypertas detected, assuming LPAR !
    command line: ro console=hvc0 autobench_args: root=/dev/sda6
    ABAT:1222922465
    memory layout at init:
    alloc_bottom : 0000000002d47000
    alloc_top : 0000000008000000
    alloc_top_hi : 0000000008000000
    rmo_top : 0000000008000000
    ram_top : 0000000008000000
    Looking for displays
    instantiating rtas at 0x000000000757a000 ... done
    boot cpu hw idx 0000000000000000
    starting cpu hw idx 0000000000000002... done
    starting cpu hw idx 0000000000000004... done
    starting cpu hw idx 0000000000000006... done
    copying OF device tree ...
    Building dt strings...
    Building dt structure...
    Device tree strings 0x0000000002f48000 -> 0x0000000002f49537
    Device tree struct 0x0000000002f4a000 -> 0x0000000002f58000
    Calling quiesce ...
    returning from prom_init
    Using pSeries machine description
    Using 1TB segments
    Found initrd at 0xc000000002b00000:0xc000000002d46c00
    console [udbg0] enabled
    Partition configured for 8 cpus.
    CPU maps initialized for 2 threads per core
    Starting Linux PPC64 #1 SMP PREEMPT Thu Oct 2 00:39:54 EDT 2008
    -----------------------------------------------------
    ppc64_pft_size = 0x19
    physicalMemorySize = 0x80000000
    htab_hash_mask = 0x3ffff
    -----------------------------------------------------
    Initializing cgroup subsys cpuset
    Linux version 2.6.27-rc7-autokern1 (root@tundro2.rchland.ibm.com) (gcc
    version 4.1.2 20071124 (Red Hat 4.1.2-42)) #1 SMP PREEMPT Thu Oct 2
    00:39:54 EDT 2008
    [boot]0012 Setup Arch
    EEH: No capable adapters found
    PPC64 nvram contains 15360 bytes
    Zone PFN ranges:
    DMA 0x00000000 -> 0x00080000
    Normal 0x00080000 -> 0x00080000
    Movable zone start PFN for each node
    early_node_map[2] active PFN ranges
    0: 0x00000000 -> 0x00044000
    1: 0x00044000 -> 0x00080000
    [boot]0015 Setup Done
    Built 2 zonelists in Node order, mobility grouping on. Total pages:
    517120
    Policy zone: DMA
    Kernel command line: ro console=hvc0 autobench_args: root=/dev/sda6
    ABAT:1222922465
    Experimental hierarchical RCU implementation.
    RCU-based detection of stalled CPUs is enabled.
    Experimental hierarchical RCU init done.
    [boot]0020 XICS Init
    [boot]0021 XICS Done
    PID hash table entries: 4096 (order: 12, 32768 bytes)
    clocksource: timebase mult[7d0000] shift[22] registered
    Console: colour dummy device 80x25
    console handover: boot [udbg0] -> real [hvc0]
    Dentry cache hash table entries: 262144 (order: 9, 2097152 bytes)
    Inode-cache hash table entries: 131072 (order: 8, 1048576 bytes)
    freeing bootmem node 0
    freeing bootmem node 1
    Memory: 2038156k/2097152k available (7200k kernel code, 58996k reserved,
    1052k data, 513k bss, 292k init)
    SLUB: Genslabs=13, HWalign=128, Order=0-3, MinObjects=0, CPUs=8,
    Nodes=16
    Calibrating delay loop... 915.45 BogoMIPS (lpj=1830912)
    Mount-cache hash table entries: 256
    Initializing cgroup subsys ns
    Initializing cgroup subsys cpuacct
    Processor 1 found.
    Processor 2 found.
    Processor 3 found.
    Processor 4 found.
    Processor 5 found.
    Processor 6 found.
    Processor 7 found.
    Brought up 8 CPUs
    net_namespace: 1152 bytes
    NET: Registered protocol family 16
    IBM eBus Device Driver
    PCI: Probing PCI hardware
    SCSI subsystem initialized
    usbcore: registered new interface driver usbfs
    usbcore: registered new interface driver hub
    usbcore: registered new device driver usb
    NET: Registered protocol family 2
    IP route cache hash table entries: 65536 (order: 7, 524288 bytes)
    TCP established hash table entries: 262144 (order: 10, 4194304 bytes)
    TCP bind hash table entries: 65536 (order: 8, 1048576 bytes)
    TCP: Hash tables configured (established 262144 bind 65536)
    TCP reno registered
    NET: Registered protocol family 1
    checking if image is initramfs... it is
    Freeing initrd memory: 2331k freed
    IOMMU table initialized, virtual merging enabled
    audit: initializing netlink socket (disabled)
    type=2000 audit(1222908206.468:1): initialized
    HugeTLB registered 16 MB page size, pre-allocated 0 pages
    HugeTLB registered 16 GB page size, pre-allocated 0 pages
    HugeTLB registered 64 KB page size, pre-allocated 0 pages
    Installing knfsd (copyright (C) 1996 okir@monad.swb.de).
    msgmni has been set to 3985
    Block layer SCSI generic (bsg) driver version 0.4 loaded (major 254)
    io scheduler noop registered
    io scheduler anticipatory registered (default)
    io scheduler deadline registered
    io scheduler cfq registered
    Generic RTC Driver v1.07
    Serial: 8250/16550 driver4 ports, IRQ sharing disabled
    brd: module loaded
    loop: module loaded
    Intel(R) PRO/1000 Network Driver - version 7.3.20-k3-NAPI
    Copyright (c) 1999-2006 Intel Corporation.
    pcnet32.c:v1.35 21.Apr.2008 tsbogend@alpha.franken.de
    e100: Intel(R) PRO/100 Network Driver, 3.5.23-k4-NAPI
    e100: Copyright(c) 1999-2006 Intel Corporation
    console [netcon0] enabled
    netconsole: network logging started
    Uniform Multi-Platform E-IDE driver
    ipr: IBM Power RAID SCSI Device Driver version: 2.4.1 (April 24, 2007)
    ibmvscsi 30000003: SRP_VERSION: 16.a
    scsi0 : IBM POWER Virtual SCSI Adapter 1.5.8
    ibmvscsi 30000003: partner initialization complete
    ibmvscsi 30000003: sent SRP login
    ibmvscsi 30000003: SRP_LOGIN succeeded
    ibmvscsi 30000003: host srp version: 16.a, host partition tundro1 (1),
    OS 2, max io 262144
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi 0:0:1:0: Direct-Access IBM VDASD blkdev 0001 PQ: 0
    ANSI: 4
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    scsi scan: INQUIRY result too short (5), using 36
    st: Version 20080504, fixed bufsize 32768, s/g segs 256
    Driver 'st' needs updating - please use bus_type methods
    Driver 'sd' needs updating - please use bus_type methods
    sd 0:0:1:0: [sda] 73400922 512-byte hardware sectors (37581 MB)
    sd 0:0:1:0: [sda] Write Protect is off
    sd 0:0:1:0: [sda] Write cache: disabled, read cache: disabled, doesn't
    support DPO or FUA
    sd 0:0:1:0: [sda] 73400922 512-byte hardware sectors (37581 MB)
    sd 0:0:1:0: [sda] Write Protect is off
    sd 0:0:1:0: [sda] Write cache: disabled, read cache: disabled, doesn't
    support DPO or FUA
    sda: sda1 sda2 sda3 sda4 < sda5 sda6 sda7 >
    sd 0:0:1:0: [sda] Attached SCSI disk
    Driver 'sr' needs updating - please use bus_type methods
    sd 0:0:1:0: Attached scsi generic sg0 type 0
    Initializing USB Mass Storage driver...
    usbcore: registered new interface driver usb-storage
    USB Mass Storage support registered.
    mice: PS/2 mouse device common for all mice
    md: linear personality registered for level -1
    md: raid0 personality registered for level 0
    md: raid1 personality registered for level 1
    device-mapper: ioctl: 4.14.0-ioctl (2008-04-23) initialised:
    dm-devel@redhat.com
    usbcore: registered new interface driver hiddev
    usbcore: registered new interface driver usbhid
    usbhid: v2.6:USB HID core driver
    IPv4 over IPv4 tunneling driver
    TCP cubic registered
    NET: Registered protocol family 17
    RPC: Registered udp transport module.
    RPC: Registered tcp transport module.
    registered taskstats version 1
    Freeing unused kernel memory: 292k freed
    Red Hat nash version 5.1.19.6 starting
    Mounting proc filesystem
    Mounting sysfs filesystem
    Creating /dev
    Creating initial device nodes
    Setting up hotplug.
    Creating block device nodes.
    Loading ehea.ko module
    IBM eHEA ethernet device driver (Release EHEA_0092)
    Scanning and configuring dmraid supported devices
    Creating root device.
    Mounting root filesystem.
    kjournald starting. Commit interval 5 seconds
    EXT3-fs: mounted filesystem with ordered data mode.
    Setting up other filesystems.
    Setting up new root fs
    no fstab.sys, mounting internal defaults
    Switching to new root and running init.
    unmounting old /dev
    unmounting old /proc
    unmounting old /sys
    INIT: version 2.86 booting
    Welcome to Red Hat Enterprise Linux Server
    Press 'I' to enter interactive startup.
    RCU detected CPU 5 stall (t=1222922609/2)
    Call Trace:
    [c000000042d03920] [c00000000000fdec] .show_stack+0x70/0x184
    (unreliable)
    [c000000042d039d0] [c0000000000a1de0] .__rcu_pending+0x98/0x2ec
    [c000000042d03a70] [c0000000000a206c] .rcu_pending+0x38/0x88
    [c000000042d03af0] [c00000000006363c] .update_process_times+0x48/0x94
    [c000000042d03b80] [c00000000007bae4] .tick_sched_timer+0xbc/0x118
    [c000000042d03c20] [c000000000073a88] .__run_hrtimer+0x78/0x118
    [c000000042d03cc0] [c000000000074b3c] .hrtimer_interrupt+0x128/0x1e4
    [c000000042d03d90] [c00000000002304c] .timer_interrupt+0xe0/0x15c
    [c000000042d03e30] [c000000000003700] decrementer_common+0x100/0x180
    Setting clock (localtime): Thu Oct 2 00:43:30 EDT 2008 [ OK ]
    Starting udev: [ OK ]
    Setting hostname tundro2.rchland.ibm.com: [ OK ]
    No devices found
    Setting up Logical Volume Management: No volume groups found
    [ OK ]
    Checking filesystems
    Checking all file systems.
    [/sbin/fsck.ext3 (1) -- /] fsck.ext3 -a /dev/sda6
    /dev/sda6: Superblock last mount time is in the future. FIXED.
    /dev/sda6: clean, 134758/1281696 files, 853722/2560336 blocks
    [ OK ]
    Remounting root filesystem in read-write mode: [ OK ]
    Mounting local filesystems: [ OK ]
    Enabling local filesystem quotas: quotaon: Warning: No quota format
    detected in the kernel.
    [ OK ]
    Enabling /etc/fstab swaps: swapon: /dev/sda5: Invalid argument
    [FAILED]
    INIT: Entering runlevel: 3
    Entering non-interactive startup
    Starting boot.loadmodules: [ OK ]
    Starting background readahead: [ OK ]
    Bringing up loopback interface: [ OK ]
    Bringing up interface eth0: [ OK ]
    Starting auditd: [ OK ]
    Starting system logger: [ OK ]
    Starting kernel logger: [ OK ]
    Starting irqbalance: [ OK ]
    Starting RPC idmapd: FATAL: Module sunrpc not found.
    FATAL: Error running install command for sunrpc
    Error: RPC MTAB does not exist.
    Starting system message bus: [ OK ]
    Starting Bluetooth services:[ OK ][ OK ]Can't open RFCOMM control
    socket: Address family not supported by protocol

    Mounting other filesystems: [ OK ]
    Starting PC/SC smart card daemon (pcscd): [ OK ]
    Starting hidd: Can't open HIDP control socket: Address family not
    supported by protocol
    [FAILED]
    Starting autofs: Loading autofs4: [ OK ]
    Starting automount: [ OK ]
    [ OK ]
    Starting iprinit: Starting ipr initialization daemon[ OK ]
    [ OK ]
    Starting iprupdate: Checking ipr microcode levels
    Completed ipr microcode updates[ OK ]
    [ OK ]
    Starting iprdump: Starting ipr dump daemon[ OK ]
    [ OK ]
    Starting sshd: [ OK ]
    Starting cups: [ OK ]
    Starting xinetd: [ OK ]
    Starting vpdupdate: [ OK ]
    Starting console mouse services: [ OK ]
    Starting crond: [ OK ]
    Starting xfs: [ OK ]
    Starting anacron: [ OK ]
    Starting atd: [ OK ]
    Starting yum-updatesd: [ OK ]
    Starting Avahi daemon... [ OK ]
    Starting HAL daemon: [ OK ]
    Starting smartd: [ OK ]

    Red Hat Enterprise Linux Server release 5.2 (Tikanga)
    Kernel 2.6.27-rc7-autokern1 on an ppc64

    tundro2.rchland.ibm.com login:-- 0:conmux-control -- time-stamp --
    Oct/01/08 21:45:47 --
    -- 0:conmux-control -- time-stamp -- Oct/01/08 22:01:03 --
    (bot:conmon-payload) disconnected
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  4. Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU

    * Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
    > On Thu, Oct 02, 2008 at 10:07:26AM +0200, Ingo Molnar wrote:
    > >
    > > * Paul E. McKenney wrote:
    > >
    > > > Hello!
    > > >
    > > > This patch adds stalled-CPU detection to Classic RCU. This capability
    > > > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR,
    > > > which defaults disabled. This is a debugging feature, not something
    > > > that non-kernel-hackers would be expected to care about. This feature
    > > > can detect looping CPUs in !PREEMPT builds and looping CPUs with
    > > > preemption disabled in PREEMPT builds. This is essentially a port of
    > > > this functionality from the treercu patch.
    > > >
    > > > One current shortcoming: on some systems, stalls are detected during
    > > > early boot, when we normally would not care about them. My thought is
    > > > to add a call from late initialization to suppress stall detection
    > > > until the system is well along its way to being booted, but thought I
    > > > should check to see if there might already be something for this
    > > > purpose.

    > >
    > > could you be a bit more specific, why do those warnings show up and why
    > > dont we care about them? There are things like networking that
    > > occasionally do an rcu_sync() and a stall could mean a bootup hang.

    >
    > Hmmm...
    >
    > Good point, I was just falling back on my old "we don't care about RCU
    > stalls in boot-time code" rule from long ago. It is entirely possible
    > (in fact reasonably likely) that this rule no longer applies to Linux as
    > it exists today. So please see below for the console output. Not all
    > systems report this stall. I have recently been running only on Power
    > systems, will fire off on some x86s. My kneejerk reaction was that the
    > "stall" was really due to the clock-setting operation -- RCU stalls are
    > based on get_seconds().
    >
    > So maybe I need to change the stall-detection code to use jiffies
    > instead.
    >
    > Thoughts?
    >


    You could possibly use get monotonic time, which should hopefully be a
    bit better than its NTP-corrected alternatives.

    Also, do you have all the tools you need to poinpoint the precise source
    of RCU stalls ? I haven't looked at RCU trace, but I should say adding a
    new tracepoint to instrument RCU callback execution at boot time and to
    trace it with LTTng is close to trivial. (actually, very early kernel
    boot is not so trivial as we would have to create a small kernel module
    to activate lttng tracing, but tracing the early phases of userspace
    bootup is trivial given we can use the userspace API).

    Mathieu

    > > > (Currently against 2.6.27-rc8, FYI.)
    > > >
    > > > Thoughts?
    > > >
    > > > Signed-off-by: Paul E. McKenney

    > >
    > > i think this is a very good idea in general - often the question comes
    > > up whether a hang seen in the RCU code is indeed caused by RCU or other
    > > factors. Could you perhaps rebase it against tip/core/rcu ? [or
    > > tip/master for convenience]

    >
    > Will do!
    >
    > Thanx, Paul
    >
    > ------------------------------------------------------------------------
    > Console output: search for "RCU detected CPU 5 stall" near the end.
    > ------------------------------------------------------------------------
    >
    > Elapsed time since release of system processors: 7742 mins 58 secs
    >
    > Config file read, 2048 bytes
    > Welcome
    > Welcome to yaboot version 1.3.13-cas (Red Hat 1.3.13.cas-5)
    > Enter "help" to get some basic usage information
    > boot: autobench
    > Please wait, loading kernel...
    > Elf64 kernel loaded...
    > Loading ramdisk...
    > ramdisk loaded at 02b00000, size: 2331 Kbytes
    > OF stdout device is: /vdevice/vty@30000000
    > Hypertas detected, assuming LPAR !
    > -\|/-\|/-\|/-\|
    > Elapsed time since release of system processors: 7743 mins 29 secs
    >
    > Config file read, 2048 bytes
    > Welcome
    > Welcome to yaboot version 1.3.13-cas (Red Hat 1.3.13.cas-5)
    > Enter "help" to get some basic usage information
    > boot: autobench
    > Please wait, loading kernel...
    > Elf64 kernel loaded...
    > Loading ramdisk...
    > ramdisk loaded at 02b00000, size: 2331 Kbytes
    > OF stdout device is: /vdevice/vty@30000000
    > Hypertas detected, assuming LPAR !
    > command line: ro console=hvc0 autobench_args: root=/dev/sda6
    > ABAT:1222922465
    > memory layout at init:
    > alloc_bottom : 0000000002d47000
    > alloc_top : 0000000008000000
    > alloc_top_hi : 0000000008000000
    > rmo_top : 0000000008000000
    > ram_top : 0000000008000000
    > Looking for displays
    > instantiating rtas at 0x000000000757a000 ... done
    > boot cpu hw idx 0000000000000000
    > starting cpu hw idx 0000000000000002... done
    > starting cpu hw idx 0000000000000004... done
    > starting cpu hw idx 0000000000000006... done
    > copying OF device tree ...
    > Building dt strings...
    > Building dt structure...
    > Device tree strings 0x0000000002f48000 -> 0x0000000002f49537
    > Device tree struct 0x0000000002f4a000 -> 0x0000000002f58000
    > Calling quiesce ...
    > returning from prom_init
    > Using pSeries machine description
    > Using 1TB segments
    > Found initrd at 0xc000000002b00000:0xc000000002d46c00
    > console [udbg0] enabled
    > Partition configured for 8 cpus.
    > CPU maps initialized for 2 threads per core
    > Starting Linux PPC64 #1 SMP PREEMPT Thu Oct 2 00:39:54 EDT 2008
    > -----------------------------------------------------
    > ppc64_pft_size = 0x19
    > physicalMemorySize = 0x80000000
    > htab_hash_mask = 0x3ffff
    > -----------------------------------------------------
    > Initializing cgroup subsys cpuset
    > Linux version 2.6.27-rc7-autokern1 (root@tundro2.rchland.ibm.com) (gcc
    > version 4.1.2 20071124 (Red Hat 4.1.2-42)) #1 SMP PREEMPT Thu Oct 2
    > 00:39:54 EDT 2008
    > [boot]0012 Setup Arch
    > EEH: No capable adapters found
    > PPC64 nvram contains 15360 bytes
    > Zone PFN ranges:
    > DMA 0x00000000 -> 0x00080000
    > Normal 0x00080000 -> 0x00080000
    > Movable zone start PFN for each node
    > early_node_map[2] active PFN ranges
    > 0: 0x00000000 -> 0x00044000
    > 1: 0x00044000 -> 0x00080000
    > [boot]0015 Setup Done
    > Built 2 zonelists in Node order, mobility grouping on. Total pages:
    > 517120
    > Policy zone: DMA
    > Kernel command line: ro console=hvc0 autobench_args: root=/dev/sda6
    > ABAT:1222922465
    > Experimental hierarchical RCU implementation.
    > RCU-based detection of stalled CPUs is enabled.
    > Experimental hierarchical RCU init done.
    > [boot]0020 XICS Init
    > [boot]0021 XICS Done
    > PID hash table entries: 4096 (order: 12, 32768 bytes)
    > clocksource: timebase mult[7d0000] shift[22] registered
    > Console: colour dummy device 80x25
    > console handover: boot [udbg0] -> real [hvc0]
    > Dentry cache hash table entries: 262144 (order: 9, 2097152 bytes)
    > Inode-cache hash table entries: 131072 (order: 8, 1048576 bytes)
    > freeing bootmem node 0
    > freeing bootmem node 1
    > Memory: 2038156k/2097152k available (7200k kernel code, 58996k reserved,
    > 1052k data, 513k bss, 292k init)
    > SLUB: Genslabs=13, HWalign=128, Order=0-3, MinObjects=0, CPUs=8,
    > Nodes=16
    > Calibrating delay loop... 915.45 BogoMIPS (lpj=1830912)
    > Mount-cache hash table entries: 256
    > Initializing cgroup subsys ns
    > Initializing cgroup subsys cpuacct
    > Processor 1 found.
    > Processor 2 found.
    > Processor 3 found.
    > Processor 4 found.
    > Processor 5 found.
    > Processor 6 found.
    > Processor 7 found.
    > Brought up 8 CPUs
    > net_namespace: 1152 bytes
    > NET: Registered protocol family 16
    > IBM eBus Device Driver
    > PCI: Probing PCI hardware
    > SCSI subsystem initialized
    > usbcore: registered new interface driver usbfs
    > usbcore: registered new interface driver hub
    > usbcore: registered new device driver usb
    > NET: Registered protocol family 2
    > IP route cache hash table entries: 65536 (order: 7, 524288 bytes)
    > TCP established hash table entries: 262144 (order: 10, 4194304 bytes)
    > TCP bind hash table entries: 65536 (order: 8, 1048576 bytes)
    > TCP: Hash tables configured (established 262144 bind 65536)
    > TCP reno registered
    > NET: Registered protocol family 1
    > checking if image is initramfs... it is
    > Freeing initrd memory: 2331k freed
    > IOMMU table initialized, virtual merging enabled
    > audit: initializing netlink socket (disabled)
    > type=2000 audit(1222908206.468:1): initialized
    > HugeTLB registered 16 MB page size, pre-allocated 0 pages
    > HugeTLB registered 16 GB page size, pre-allocated 0 pages
    > HugeTLB registered 64 KB page size, pre-allocated 0 pages
    > Installing knfsd (copyright (C) 1996 okir@monad.swb.de).
    > msgmni has been set to 3985
    > Block layer SCSI generic (bsg) driver version 0.4 loaded (major 254)
    > io scheduler noop registered
    > io scheduler anticipatory registered (default)
    > io scheduler deadline registered
    > io scheduler cfq registered
    > Generic RTC Driver v1.07
    > Serial: 8250/16550 driver4 ports, IRQ sharing disabled
    > brd: module loaded
    > loop: module loaded
    > Intel(R) PRO/1000 Network Driver - version 7.3.20-k3-NAPI
    > Copyright (c) 1999-2006 Intel Corporation.
    > pcnet32.c:v1.35 21.Apr.2008 tsbogend@alpha.franken.de
    > e100: Intel(R) PRO/100 Network Driver, 3.5.23-k4-NAPI
    > e100: Copyright(c) 1999-2006 Intel Corporation
    > console [netcon0] enabled
    > netconsole: network logging started
    > Uniform Multi-Platform E-IDE driver
    > ipr: IBM Power RAID SCSI Device Driver version: 2.4.1 (April 24, 2007)
    > ibmvscsi 30000003: SRP_VERSION: 16.a
    > scsi0 : IBM POWER Virtual SCSI Adapter 1.5.8
    > ibmvscsi 30000003: partner initialization complete
    > ibmvscsi 30000003: sent SRP login
    > ibmvscsi 30000003: SRP_LOGIN succeeded
    > ibmvscsi 30000003: host srp version: 16.a, host partition tundro1 (1),
    > OS 2, max io 262144
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi 0:0:1:0: Direct-Access IBM VDASD blkdev 0001 PQ: 0
    > ANSI: 4
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > scsi scan: INQUIRY result too short (5), using 36
    > st: Version 20080504, fixed bufsize 32768, s/g segs 256
    > Driver 'st' needs updating - please use bus_type methods
    > Driver 'sd' needs updating - please use bus_type methods
    > sd 0:0:1:0: [sda] 73400922 512-byte hardware sectors (37581 MB)
    > sd 0:0:1:0: [sda] Write Protect is off
    > sd 0:0:1:0: [sda] Write cache: disabled, read cache: disabled, doesn't
    > support DPO or FUA
    > sd 0:0:1:0: [sda] 73400922 512-byte hardware sectors (37581 MB)
    > sd 0:0:1:0: [sda] Write Protect is off
    > sd 0:0:1:0: [sda] Write cache: disabled, read cache: disabled, doesn't
    > support DPO or FUA
    > sda: sda1 sda2 sda3 sda4 < sda5 sda6 sda7 >
    > sd 0:0:1:0: [sda] Attached SCSI disk
    > Driver 'sr' needs updating - please use bus_type methods
    > sd 0:0:1:0: Attached scsi generic sg0 type 0
    > Initializing USB Mass Storage driver...
    > usbcore: registered new interface driver usb-storage
    > USB Mass Storage support registered.
    > mice: PS/2 mouse device common for all mice
    > md: linear personality registered for level -1
    > md: raid0 personality registered for level 0
    > md: raid1 personality registered for level 1
    > device-mapper: ioctl: 4.14.0-ioctl (2008-04-23) initialised:
    > dm-devel@redhat.com
    > usbcore: registered new interface driver hiddev
    > usbcore: registered new interface driver usbhid
    > usbhid: v2.6:USB HID core driver
    > IPv4 over IPv4 tunneling driver
    > TCP cubic registered
    > NET: Registered protocol family 17
    > RPC: Registered udp transport module.
    > RPC: Registered tcp transport module.
    > registered taskstats version 1
    > Freeing unused kernel memory: 292k freed
    > Red Hat nash version 5.1.19.6 starting
    > Mounting proc filesystem
    > Mounting sysfs filesystem
    > Creating /dev
    > Creating initial device nodes
    > Setting up hotplug.
    > Creating block device nodes.
    > Loading ehea.ko module
    > IBM eHEA ethernet device driver (Release EHEA_0092)
    > Scanning and configuring dmraid supported devices
    > Creating root device.
    > Mounting root filesystem.
    > kjournald starting. Commit interval 5 seconds
    > EXT3-fs: mounted filesystem with ordered data mode.
    > Setting up other filesystems.
    > Setting up new root fs
    > no fstab.sys, mounting internal defaults
    > Switching to new root and running init.
    > unmounting old /dev
    > unmounting old /proc
    > unmounting old /sys
    > INIT: version 2.86 booting
    > Welcome to Red Hat Enterprise Linux Server
    > Press 'I' to enter interactive startup.
    > RCU detected CPU 5 stall (t=1222922609/2)
    > Call Trace:
    > [c000000042d03920] [c00000000000fdec] .show_stack+0x70/0x184
    > (unreliable)
    > [c000000042d039d0] [c0000000000a1de0] .__rcu_pending+0x98/0x2ec
    > [c000000042d03a70] [c0000000000a206c] .rcu_pending+0x38/0x88
    > [c000000042d03af0] [c00000000006363c] .update_process_times+0x48/0x94
    > [c000000042d03b80] [c00000000007bae4] .tick_sched_timer+0xbc/0x118
    > [c000000042d03c20] [c000000000073a88] .__run_hrtimer+0x78/0x118
    > [c000000042d03cc0] [c000000000074b3c] .hrtimer_interrupt+0x128/0x1e4
    > [c000000042d03d90] [c00000000002304c] .timer_interrupt+0xe0/0x15c
    > [c000000042d03e30] [c000000000003700] decrementer_common+0x100/0x180
    > Setting clock (localtime): Thu Oct 2 00:43:30 EDT 2008 [ OK ]
    > Starting udev: [ OK ]
    > Setting hostname tundro2.rchland.ibm.com: [ OK ]
    > No devices found
    > Setting up Logical Volume Management: No volume groups found
    > [ OK ]
    > Checking filesystems
    > Checking all file systems.
    > [/sbin/fsck.ext3 (1) -- /] fsck.ext3 -a /dev/sda6
    > /dev/sda6: Superblock last mount time is in the future. FIXED.
    > /dev/sda6: clean, 134758/1281696 files, 853722/2560336 blocks
    > [ OK ]
    > Remounting root filesystem in read-write mode: [ OK ]
    > Mounting local filesystems: [ OK ]
    > Enabling local filesystem quotas: quotaon: Warning: No quota format
    > detected in the kernel.
    > [ OK ]
    > Enabling /etc/fstab swaps: swapon: /dev/sda5: Invalid argument
    > [FAILED]
    > INIT: Entering runlevel: 3
    > Entering non-interactive startup
    > Starting boot.loadmodules: [ OK ]
    > Starting background readahead: [ OK ]
    > Bringing up loopback interface: [ OK ]
    > Bringing up interface eth0: [ OK ]
    > Starting auditd: [ OK ]
    > Starting system logger: [ OK ]
    > Starting kernel logger: [ OK ]
    > Starting irqbalance: [ OK ]
    > Starting RPC idmapd: FATAL: Module sunrpc not found.
    > FATAL: Error running install command for sunrpc
    > Error: RPC MTAB does not exist.
    > Starting system message bus: [ OK ]
    > Starting Bluetooth services:[ OK ][ OK ]Can't open RFCOMM control
    > socket: Address family not supported by protocol
    >
    > Mounting other filesystems: [ OK ]
    > Starting PC/SC smart card daemon (pcscd): [ OK ]
    > Starting hidd: Can't open HIDP control socket: Address family not
    > supported by protocol
    > [FAILED]
    > Starting autofs: Loading autofs4: [ OK ]
    > Starting automount: [ OK ]
    > [ OK ]
    > Starting iprinit: Starting ipr initialization daemon[ OK ]
    > [ OK ]
    > Starting iprupdate: Checking ipr microcode levels
    > Completed ipr microcode updates[ OK ]
    > [ OK ]
    > Starting iprdump: Starting ipr dump daemon[ OK ]
    > [ OK ]
    > Starting sshd: [ OK ]
    > Starting cups: [ OK ]
    > Starting xinetd: [ OK ]
    > Starting vpdupdate: [ OK ]
    > Starting console mouse services: [ OK ]
    > Starting crond: [ OK ]
    > Starting xfs: [ OK ]
    > Starting anacron: [ OK ]
    > Starting atd: [ OK ]
    > Starting yum-updatesd: [ OK ]
    > Starting Avahi daemon... [ OK ]
    > Starting HAL daemon: [ OK ]
    > Starting smartd: [ OK ]
    >
    > Red Hat Enterprise Linux Server release 5.2 (Tikanga)
    > Kernel 2.6.27-rc7-autokern1 on an ppc64
    >
    > tundro2.rchland.ibm.com login:-- 0:conmux-control -- time-stamp --
    > Oct/01/08 21:45:47 --
    > -- 0:conmux-control -- time-stamp -- Oct/01/08 22:01:03 --
    > (bot:conmon-payload) disconnected


    --
    Mathieu Desnoyers
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  5. Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU

    Hello again!

    This patch adds stalled-CPU detection to Classic RCU. This capability
    is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR,
    which defaults disabled. This is a debugging feature, not something
    that non-kernel-hackers would be expected to care about. This feature
    can detect looping CPUs in !PREEMPT builds and looping CPUs with
    preemption disabled in PREEMPT builds. This is essentially a port of
    this functionality from the treercu patch.

    This version uses jiffies rather than get_seconds(), which eliminates
    the spurious boot-time CPU stall warnings seen on some systems with
    the previous patch.

    This is still against v2.6.27-rc8 -- I will do a version against tip
    this evening (Pacific Time) when I get back to the combination of better
    bandwidth and AC power.

    Signed-off-by: Paul E. McKenney
    ---

    include/linux/rcuclassic.h | 9 ++++
    kernel/rcuclassic.c | 88 +++++++++++++++++++++++++++++++++++++++++++++
    lib/Kconfig.debug | 13 ++++++
    3 files changed, 110 insertions(+)

    diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
    index 4ab8436..cab055b 100644
    --- a/include/linux/rcuclassic.h
    +++ b/include/linux/rcuclassic.h
    @@ -40,6 +40,10 @@
    #include
    #include

    +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    +#define RCU_SECONDS_TILL_STALL_CHECK 3 * HZ /* for rcp->jiffies_stall */
    +#define RCU_SECONDS_TILL_STALL_RECHECK 30 * HZ /* for rcp->jiffies_stall */
    +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */

    /* Global control variables for rcupdate callback mechanism. */
    struct rcu_ctrlblk {
    @@ -52,6 +56,11 @@ struct rcu_ctrlblk {
    spinlock_t lock ____cacheline_internodealigned_in_smp;
    cpumask_t cpumask; /* CPUs that need to switch in order */
    /* for current batch to proceed. */
    +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    + unsigned long gp_start; /* Time at which GP started in jiffies. */
    + unsigned long jiffies_stall;
    + /* Time at which to check for CPU stalls. */
    +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    } ____cacheline_internodealigned_in_smp;

    /* Is batch a before batch b ? */
    diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
    index aad93cd..a299876 100644
    --- a/kernel/rcuclassic.c
    +++ b/kernel/rcuclassic.c
    @@ -118,6 +118,87 @@ static inline void force_quiescent_state(struct rcu_data *rdp,
    }
    #endif

    +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    +
    +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    +{
    + rcp->gp_start = jiffies;
    + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
    +}
    +
    +static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
    +{
    + int cpu;
    + long delta;
    + unsigned long flags;
    +
    + /* Only let one CPU complain about others per time interval. */
    +
    + spin_lock_irqsave(&rcp->lock, flags);
    + delta = jiffies - rcp->jiffies_stall;
    + if (delta < 2 || rcp->cur != rcp->completed) {
    + spin_unlock_irqrestore(&rcp->lock, flags);
    + return;
    + }
    + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
    + spin_unlock_irqrestore(&rcp->lock, flags);
    +
    + /* OK, time to rat on our buddy... */
    +
    + printk(KERN_ERR "RCU detected CPU stalls:");
    + for_each_possible_cpu(cpu) {
    + if (cpu_isset(cpu, rcp->cpumask))
    + printk(" %d", cpu);
    + }
    + printk(" (detected by %d, t=%ld jiffies)\n",
    + smp_processor_id(), (long)(jiffies - rcp->gp_start));
    +}
    +
    +static void print_cpu_stall(struct rcu_ctrlblk *rcp)
    +{
    + unsigned long flags;
    +
    + printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
    + smp_processor_id(), jiffies,
    + jiffies - rcp->gp_start);
    + dump_stack();
    + spin_lock_irqsave(&rcp->lock, flags);
    + if ((long)(jiffies - rcp->jiffies_stall) >= 0)
    + rcp->jiffies_stall =
    + jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
    + spin_unlock_irqrestore(&rcp->lock, flags);
    + set_need_resched(); /* kick ourselves to get things going. */
    +}
    +
    +static void check_cpu_stall(struct rcu_ctrlblk *rcp)
    +{
    + long delta;
    +
    + delta = jiffies - rcp->jiffies_stall;
    + if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
    +
    + /* We haven't checked in, so go dump stack. */
    + print_cpu_stall(rcp);
    +
    + } else if (rcp->cur != rcp->completed && delta >= 2) {
    +
    + /* They had two seconds to dump stack, so complain. */
    + print_other_cpu_stall(rcp);
    + }
    +}
    +
    +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    +
    +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    +{
    +}
    +
    +static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    +{
    +}
    +
    +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    +
    /**
    * call_rcu - Queue an RCU callback for invocation after a grace period.
    * @head: structure to be used for queueing the RCU updates.
    @@ -285,6 +366,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
    */
    smp_wmb();
    rcp->cur++;
    + record_gp_stall_check_time(rcp);

    /*
    * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
    @@ -468,6 +550,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)

    static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    {
    + /* Check for CPU stalls, if enabled. */
    + check_cpu_stall(rcp);
    +
    /* This cpu has pending rcu entries and the grace period
    * for them has completed.
    */
    @@ -558,6 +643,9 @@ void rcu_check_callbacks(int cpu, int user)
    static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
    struct rcu_data *rdp)
    {
    +#ifdef CONFIG_DEBUG_RCU_STALL
    + printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
    +#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
    memset(rdp, 0, sizeof(*rdp));
    rdp->curtail = &rdp->curlist;
    rdp->nxttail = &rdp->nxtlist;
    diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
    index 0b50481..9fee969 100644
    --- a/lib/Kconfig.debug
    +++ b/lib/Kconfig.debug
    @@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
    Say N here if you want the RCU torture tests to start only
    after being manually enabled via /proc.

    +config RCU_CPU_STALL_DETECTOR
    + bool "Check for stalled CPUs delaying RCU grace periods"
    + depends on CLASSIC_RCU
    + default n
    + help
    + This option causes RCU to printk information on which
    + CPUs are delaying the current grace period, but only when
    + the grace period extends for excessive time periods.
    +
    + Say Y if you want RCU to perform such checks.
    +
    + Say N if you are unsure.
    +
    config KPROBES_SANITY_TEST
    bool "Kprobes sanity tests"
    depends on DEBUG_KERNEL
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  6. Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU

    On Thu, Oct 02, 2008 at 11:10:46AM -0400, Mathieu Desnoyers wrote:
    > * Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
    > > On Thu, Oct 02, 2008 at 10:07:26AM +0200, Ingo Molnar wrote:
    > > >
    > > > * Paul E. McKenney wrote:
    > > >
    > > > > Hello!
    > > > >
    > > > > This patch adds stalled-CPU detection to Classic RCU. This capability
    > > > > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR,
    > > > > which defaults disabled. This is a debugging feature, not something
    > > > > that non-kernel-hackers would be expected to care about. This feature
    > > > > can detect looping CPUs in !PREEMPT builds and looping CPUs with
    > > > > preemption disabled in PREEMPT builds. This is essentially a port of
    > > > > this functionality from the treercu patch.
    > > > >
    > > > > One current shortcoming: on some systems, stalls are detected during
    > > > > early boot, when we normally would not care about them. My thought is
    > > > > to add a call from late initialization to suppress stall detection
    > > > > until the system is well along its way to being booted, but thought I
    > > > > should check to see if there might already be something for this
    > > > > purpose.
    > > >
    > > > could you be a bit more specific, why do those warnings show up and why
    > > > dont we care about them? There are things like networking that
    > > > occasionally do an rcu_sync() and a stall could mean a bootup hang.

    > >
    > > Hmmm...
    > >
    > > Good point, I was just falling back on my old "we don't care about RCU
    > > stalls in boot-time code" rule from long ago. It is entirely possible
    > > (in fact reasonably likely) that this rule no longer applies to Linux as
    > > it exists today. So please see below for the console output. Not all
    > > systems report this stall. I have recently been running only on Power
    > > systems, will fire off on some x86s. My kneejerk reaction was that the
    > > "stall" was really due to the clock-setting operation -- RCU stalls are
    > > based on get_seconds().
    > >
    > > So maybe I need to change the stall-detection code to use jiffies
    > > instead.
    > >
    > > Thoughts?
    > >

    >
    > You could possibly use get monotonic time, which should hopefully be a
    > bit better than its NTP-corrected alternatives.


    OK. I thought jiffies were monotonic (though not perfectly correlated
    to the passage of time). This is a warning timeout, so does not need
    better than a few percent accuracy, as long as time never jumps too far.

    But what API did you have in mind?

    > Also, do you have all the tools you need to poinpoint the precise source
    > of RCU stalls ? I haven't looked at RCU trace, but I should say adding a
    > new tracepoint to instrument RCU callback execution at boot time and to
    > trace it with LTTng is close to trivial. (actually, very early kernel
    > boot is not so trivial as we would have to create a small kernel module
    > to activate lttng tracing, but tracing the early phases of userspace
    > bootup is trivial given we can use the userspace API).


    I just dump stack, which historically worked pretty well.

    Could you please tell me more about what you think should be added?

    Thanx, Paul

    > Mathieu
    >
    > > > > (Currently against 2.6.27-rc8, FYI.)
    > > > >
    > > > > Thoughts?
    > > > >
    > > > > Signed-off-by: Paul E. McKenney
    > > >
    > > > i think this is a very good idea in general - often the question comes
    > > > up whether a hang seen in the RCU code is indeed caused by RCU or other
    > > > factors. Could you perhaps rebase it against tip/core/rcu ? [or
    > > > tip/master for convenience]

    > >
    > > Will do!
    > >
    > > Thanx, Paul
    > >
    > > ------------------------------------------------------------------------
    > > Console output: search for "RCU detected CPU 5 stall" near the end.
    > > ------------------------------------------------------------------------
    > >
    > > Elapsed time since release of system processors: 7742 mins 58 secs
    > >
    > > Config file read, 2048 bytes
    > > Welcome
    > > Welcome to yaboot version 1.3.13-cas (Red Hat 1.3.13.cas-5)
    > > Enter "help" to get some basic usage information
    > > boot: autobench
    > > Please wait, loading kernel...
    > > Elf64 kernel loaded...
    > > Loading ramdisk...
    > > ramdisk loaded at 02b00000, size: 2331 Kbytes
    > > OF stdout device is: /vdevice/vty@30000000
    > > Hypertas detected, assuming LPAR !
    > > -\|/-\|/-\|/-\|
    > > Elapsed time since release of system processors: 7743 mins 29 secs
    > >
    > > Config file read, 2048 bytes
    > > Welcome
    > > Welcome to yaboot version 1.3.13-cas (Red Hat 1.3.13.cas-5)
    > > Enter "help" to get some basic usage information
    > > boot: autobench
    > > Please wait, loading kernel...
    > > Elf64 kernel loaded...
    > > Loading ramdisk...
    > > ramdisk loaded at 02b00000, size: 2331 Kbytes
    > > OF stdout device is: /vdevice/vty@30000000
    > > Hypertas detected, assuming LPAR !
    > > command line: ro console=hvc0 autobench_args: root=/dev/sda6
    > > ABAT:1222922465
    > > memory layout at init:
    > > alloc_bottom : 0000000002d47000
    > > alloc_top : 0000000008000000
    > > alloc_top_hi : 0000000008000000
    > > rmo_top : 0000000008000000
    > > ram_top : 0000000008000000
    > > Looking for displays
    > > instantiating rtas at 0x000000000757a000 ... done
    > > boot cpu hw idx 0000000000000000
    > > starting cpu hw idx 0000000000000002... done
    > > starting cpu hw idx 0000000000000004... done
    > > starting cpu hw idx 0000000000000006... done
    > > copying OF device tree ...
    > > Building dt strings...
    > > Building dt structure...
    > > Device tree strings 0x0000000002f48000 -> 0x0000000002f49537
    > > Device tree struct 0x0000000002f4a000 -> 0x0000000002f58000
    > > Calling quiesce ...
    > > returning from prom_init
    > > Using pSeries machine description
    > > Using 1TB segments
    > > Found initrd at 0xc000000002b00000:0xc000000002d46c00
    > > console [udbg0] enabled
    > > Partition configured for 8 cpus.
    > > CPU maps initialized for 2 threads per core
    > > Starting Linux PPC64 #1 SMP PREEMPT Thu Oct 2 00:39:54 EDT 2008
    > > -----------------------------------------------------
    > > ppc64_pft_size = 0x19
    > > physicalMemorySize = 0x80000000
    > > htab_hash_mask = 0x3ffff
    > > -----------------------------------------------------
    > > Initializing cgroup subsys cpuset
    > > Linux version 2.6.27-rc7-autokern1 (root@tundro2.rchland.ibm.com) (gcc
    > > version 4.1.2 20071124 (Red Hat 4.1.2-42)) #1 SMP PREEMPT Thu Oct 2
    > > 00:39:54 EDT 2008
    > > [boot]0012 Setup Arch
    > > EEH: No capable adapters found
    > > PPC64 nvram contains 15360 bytes
    > > Zone PFN ranges:
    > > DMA 0x00000000 -> 0x00080000
    > > Normal 0x00080000 -> 0x00080000
    > > Movable zone start PFN for each node
    > > early_node_map[2] active PFN ranges
    > > 0: 0x00000000 -> 0x00044000
    > > 1: 0x00044000 -> 0x00080000
    > > [boot]0015 Setup Done
    > > Built 2 zonelists in Node order, mobility grouping on. Total pages:
    > > 517120
    > > Policy zone: DMA
    > > Kernel command line: ro console=hvc0 autobench_args: root=/dev/sda6
    > > ABAT:1222922465
    > > Experimental hierarchical RCU implementation.
    > > RCU-based detection of stalled CPUs is enabled.
    > > Experimental hierarchical RCU init done.
    > > [boot]0020 XICS Init
    > > [boot]0021 XICS Done
    > > PID hash table entries: 4096 (order: 12, 32768 bytes)
    > > clocksource: timebase mult[7d0000] shift[22] registered
    > > Console: colour dummy device 80x25
    > > console handover: boot [udbg0] -> real [hvc0]
    > > Dentry cache hash table entries: 262144 (order: 9, 2097152 bytes)
    > > Inode-cache hash table entries: 131072 (order: 8, 1048576 bytes)
    > > freeing bootmem node 0
    > > freeing bootmem node 1
    > > Memory: 2038156k/2097152k available (7200k kernel code, 58996k reserved,
    > > 1052k data, 513k bss, 292k init)
    > > SLUB: Genslabs=13, HWalign=128, Order=0-3, MinObjects=0, CPUs=8,
    > > Nodes=16
    > > Calibrating delay loop... 915.45 BogoMIPS (lpj=1830912)
    > > Mount-cache hash table entries: 256
    > > Initializing cgroup subsys ns
    > > Initializing cgroup subsys cpuacct
    > > Processor 1 found.
    > > Processor 2 found.
    > > Processor 3 found.
    > > Processor 4 found.
    > > Processor 5 found.
    > > Processor 6 found.
    > > Processor 7 found.
    > > Brought up 8 CPUs
    > > net_namespace: 1152 bytes
    > > NET: Registered protocol family 16
    > > IBM eBus Device Driver
    > > PCI: Probing PCI hardware
    > > SCSI subsystem initialized
    > > usbcore: registered new interface driver usbfs
    > > usbcore: registered new interface driver hub
    > > usbcore: registered new device driver usb
    > > NET: Registered protocol family 2
    > > IP route cache hash table entries: 65536 (order: 7, 524288 bytes)
    > > TCP established hash table entries: 262144 (order: 10, 4194304 bytes)
    > > TCP bind hash table entries: 65536 (order: 8, 1048576 bytes)
    > > TCP: Hash tables configured (established 262144 bind 65536)
    > > TCP reno registered
    > > NET: Registered protocol family 1
    > > checking if image is initramfs... it is
    > > Freeing initrd memory: 2331k freed
    > > IOMMU table initialized, virtual merging enabled
    > > audit: initializing netlink socket (disabled)
    > > type=2000 audit(1222908206.468:1): initialized
    > > HugeTLB registered 16 MB page size, pre-allocated 0 pages
    > > HugeTLB registered 16 GB page size, pre-allocated 0 pages
    > > HugeTLB registered 64 KB page size, pre-allocated 0 pages
    > > Installing knfsd (copyright (C) 1996 okir@monad.swb.de).
    > > msgmni has been set to 3985
    > > Block layer SCSI generic (bsg) driver version 0.4 loaded (major 254)
    > > io scheduler noop registered
    > > io scheduler anticipatory registered (default)
    > > io scheduler deadline registered
    > > io scheduler cfq registered
    > > Generic RTC Driver v1.07
    > > Serial: 8250/16550 driver4 ports, IRQ sharing disabled
    > > brd: module loaded
    > > loop: module loaded
    > > Intel(R) PRO/1000 Network Driver - version 7.3.20-k3-NAPI
    > > Copyright (c) 1999-2006 Intel Corporation.
    > > pcnet32.c:v1.35 21.Apr.2008 tsbogend@alpha.franken.de
    > > e100: Intel(R) PRO/100 Network Driver, 3.5.23-k4-NAPI
    > > e100: Copyright(c) 1999-2006 Intel Corporation
    > > console [netcon0] enabled
    > > netconsole: network logging started
    > > Uniform Multi-Platform E-IDE driver
    > > ipr: IBM Power RAID SCSI Device Driver version: 2.4.1 (April 24, 2007)
    > > ibmvscsi 30000003: SRP_VERSION: 16.a
    > > scsi0 : IBM POWER Virtual SCSI Adapter 1.5.8
    > > ibmvscsi 30000003: partner initialization complete
    > > ibmvscsi 30000003: sent SRP login
    > > ibmvscsi 30000003: SRP_LOGIN succeeded
    > > ibmvscsi 30000003: host srp version: 16.a, host partition tundro1 (1),
    > > OS 2, max io 262144
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi 0:0:1:0: Direct-Access IBM VDASD blkdev 0001 PQ: 0
    > > ANSI: 4
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > scsi scan: INQUIRY result too short (5), using 36
    > > st: Version 20080504, fixed bufsize 32768, s/g segs 256
    > > Driver 'st' needs updating - please use bus_type methods
    > > Driver 'sd' needs updating - please use bus_type methods
    > > sd 0:0:1:0: [sda] 73400922 512-byte hardware sectors (37581 MB)
    > > sd 0:0:1:0: [sda] Write Protect is off
    > > sd 0:0:1:0: [sda] Write cache: disabled, read cache: disabled, doesn't
    > > support DPO or FUA
    > > sd 0:0:1:0: [sda] 73400922 512-byte hardware sectors (37581 MB)
    > > sd 0:0:1:0: [sda] Write Protect is off
    > > sd 0:0:1:0: [sda] Write cache: disabled, read cache: disabled, doesn't
    > > support DPO or FUA
    > > sda: sda1 sda2 sda3 sda4 < sda5 sda6 sda7 >
    > > sd 0:0:1:0: [sda] Attached SCSI disk
    > > Driver 'sr' needs updating - please use bus_type methods
    > > sd 0:0:1:0: Attached scsi generic sg0 type 0
    > > Initializing USB Mass Storage driver...
    > > usbcore: registered new interface driver usb-storage
    > > USB Mass Storage support registered.
    > > mice: PS/2 mouse device common for all mice
    > > md: linear personality registered for level -1
    > > md: raid0 personality registered for level 0
    > > md: raid1 personality registered for level 1
    > > device-mapper: ioctl: 4.14.0-ioctl (2008-04-23) initialised:
    > > dm-devel@redhat.com
    > > usbcore: registered new interface driver hiddev
    > > usbcore: registered new interface driver usbhid
    > > usbhid: v2.6:USB HID core driver
    > > IPv4 over IPv4 tunneling driver
    > > TCP cubic registered
    > > NET: Registered protocol family 17
    > > RPC: Registered udp transport module.
    > > RPC: Registered tcp transport module.
    > > registered taskstats version 1
    > > Freeing unused kernel memory: 292k freed
    > > Red Hat nash version 5.1.19.6 starting
    > > Mounting proc filesystem
    > > Mounting sysfs filesystem
    > > Creating /dev
    > > Creating initial device nodes
    > > Setting up hotplug.
    > > Creating block device nodes.
    > > Loading ehea.ko module
    > > IBM eHEA ethernet device driver (Release EHEA_0092)
    > > Scanning and configuring dmraid supported devices
    > > Creating root device.
    > > Mounting root filesystem.
    > > kjournald starting. Commit interval 5 seconds
    > > EXT3-fs: mounted filesystem with ordered data mode.
    > > Setting up other filesystems.
    > > Setting up new root fs
    > > no fstab.sys, mounting internal defaults
    > > Switching to new root and running init.
    > > unmounting old /dev
    > > unmounting old /proc
    > > unmounting old /sys
    > > INIT: version 2.86 booting
    > > Welcome to Red Hat Enterprise Linux Server
    > > Press 'I' to enter interactive startup.
    > > RCU detected CPU 5 stall (t=1222922609/2)
    > > Call Trace:
    > > [c000000042d03920] [c00000000000fdec] .show_stack+0x70/0x184
    > > (unreliable)
    > > [c000000042d039d0] [c0000000000a1de0] .__rcu_pending+0x98/0x2ec
    > > [c000000042d03a70] [c0000000000a206c] .rcu_pending+0x38/0x88
    > > [c000000042d03af0] [c00000000006363c] .update_process_times+0x48/0x94
    > > [c000000042d03b80] [c00000000007bae4] .tick_sched_timer+0xbc/0x118
    > > [c000000042d03c20] [c000000000073a88] .__run_hrtimer+0x78/0x118
    > > [c000000042d03cc0] [c000000000074b3c] .hrtimer_interrupt+0x128/0x1e4
    > > [c000000042d03d90] [c00000000002304c] .timer_interrupt+0xe0/0x15c
    > > [c000000042d03e30] [c000000000003700] decrementer_common+0x100/0x180
    > > Setting clock (localtime): Thu Oct 2 00:43:30 EDT 2008 [ OK ]
    > > Starting udev: [ OK ]
    > > Setting hostname tundro2.rchland.ibm.com: [ OK ]
    > > No devices found
    > > Setting up Logical Volume Management: No volume groups found
    > > [ OK ]
    > > Checking filesystems
    > > Checking all file systems.
    > > [/sbin/fsck.ext3 (1) -- /] fsck.ext3 -a /dev/sda6
    > > /dev/sda6: Superblock last mount time is in the future. FIXED.
    > > /dev/sda6: clean, 134758/1281696 files, 853722/2560336 blocks
    > > [ OK ]
    > > Remounting root filesystem in read-write mode: [ OK ]
    > > Mounting local filesystems: [ OK ]
    > > Enabling local filesystem quotas: quotaon: Warning: No quota format
    > > detected in the kernel.
    > > [ OK ]
    > > Enabling /etc/fstab swaps: swapon: /dev/sda5: Invalid argument
    > > [FAILED]
    > > INIT: Entering runlevel: 3
    > > Entering non-interactive startup
    > > Starting boot.loadmodules: [ OK ]
    > > Starting background readahead: [ OK ]
    > > Bringing up loopback interface: [ OK ]
    > > Bringing up interface eth0: [ OK ]
    > > Starting auditd: [ OK ]
    > > Starting system logger: [ OK ]
    > > Starting kernel logger: [ OK ]
    > > Starting irqbalance: [ OK ]
    > > Starting RPC idmapd: FATAL: Module sunrpc not found.
    > > FATAL: Error running install command for sunrpc
    > > Error: RPC MTAB does not exist.
    > > Starting system message bus: [ OK ]
    > > Starting Bluetooth services:[ OK ][ OK ]Can't open RFCOMM control
    > > socket: Address family not supported by protocol
    > >
    > > Mounting other filesystems: [ OK ]
    > > Starting PC/SC smart card daemon (pcscd): [ OK ]
    > > Starting hidd: Can't open HIDP control socket: Address family not
    > > supported by protocol
    > > [FAILED]
    > > Starting autofs: Loading autofs4: [ OK ]
    > > Starting automount: [ OK ]
    > > [ OK ]
    > > Starting iprinit: Starting ipr initialization daemon[ OK ]
    > > [ OK ]
    > > Starting iprupdate: Checking ipr microcode levels
    > > Completed ipr microcode updates[ OK ]
    > > [ OK ]
    > > Starting iprdump: Starting ipr dump daemon[ OK ]
    > > [ OK ]
    > > Starting sshd: [ OK ]
    > > Starting cups: [ OK ]
    > > Starting xinetd: [ OK ]
    > > Starting vpdupdate: [ OK ]
    > > Starting console mouse services: [ OK ]
    > > Starting crond: [ OK ]
    > > Starting xfs: [ OK ]
    > > Starting anacron: [ OK ]
    > > Starting atd: [ OK ]
    > > Starting yum-updatesd: [ OK ]
    > > Starting Avahi daemon... [ OK ]
    > > Starting HAL daemon: [ OK ]
    > > Starting smartd: [ OK ]
    > >
    > > Red Hat Enterprise Linux Server release 5.2 (Tikanga)
    > > Kernel 2.6.27-rc7-autokern1 on an ppc64
    > >
    > > tundro2.rchland.ibm.com login:-- 0:conmux-control -- time-stamp --
    > > Oct/01/08 21:45:47 --
    > > -- 0:conmux-control -- time-stamp -- Oct/01/08 22:01:03 --
    > > (bot:conmon-payload) disconnected

    >
    > --
    > Mathieu Desnoyers
    > OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  7. Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU (LTTng support)

    * Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
    > On Thu, Oct 02, 2008 at 11:10:46AM -0400, Mathieu Desnoyers wrote:
    > > * Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
    > > > On Thu, Oct 02, 2008 at 10:07:26AM +0200, Ingo Molnar wrote:
    > > > >
    > > > > * Paul E. McKenney wrote:
    > > > >
    > > > > > Hello!
    > > > > >
    > > > > > This patch adds stalled-CPU detection to Classic RCU. This capability
    > > > > > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR,
    > > > > > which defaults disabled. This is a debugging feature, not something
    > > > > > that non-kernel-hackers would be expected to care about. This feature
    > > > > > can detect looping CPUs in !PREEMPT builds and looping CPUs with
    > > > > > preemption disabled in PREEMPT builds. This is essentially a port of
    > > > > > this functionality from the treercu patch.
    > > > > >
    > > > > > One current shortcoming: on some systems, stalls are detected during
    > > > > > early boot, when we normally would not care about them. My thought is
    > > > > > to add a call from late initialization to suppress stall detection
    > > > > > until the system is well along its way to being booted, but thought I
    > > > > > should check to see if there might already be something for this
    > > > > > purpose.
    > > > >
    > > > > could you be a bit more specific, why do those warnings show up and why
    > > > > dont we care about them? There are things like networking that
    > > > > occasionally do an rcu_sync() and a stall could mean a bootup hang.
    > > >
    > > > Hmmm...
    > > >
    > > > Good point, I was just falling back on my old "we don't care about RCU
    > > > stalls in boot-time code" rule from long ago. It is entirely possible
    > > > (in fact reasonably likely) that this rule no longer applies to Linux as
    > > > it exists today. So please see below for the console output. Not all
    > > > systems report this stall. I have recently been running only on Power
    > > > systems, will fire off on some x86s. My kneejerk reaction was that the
    > > > "stall" was really due to the clock-setting operation -- RCU stalls are
    > > > based on get_seconds().
    > > >
    > > > So maybe I need to change the stall-detection code to use jiffies
    > > > instead.
    > > >
    > > > Thoughts?
    > > >

    > >
    > > You could possibly use get monotonic time, which should hopefully be a
    > > bit better than its NTP-corrected alternatives.

    >
    > OK. I thought jiffies were monotonic (though not perfectly correlated
    > to the passage of time). This is a warning timeout, so does not need
    > better than a few percent accuracy, as long as time never jumps too far.
    >


    Well, jiffies should also be monotonic, but I don't know their exact
    status wrt dyntick systems.

    > But what API did you have in mind?
    >


    Something like do_posix_clock_monotonic_gettime() in linux/time.h. I
    think it should be ok after early boot.

    > > Also, do you have all the tools you need to poinpoint the precise source
    > > of RCU stalls ? I haven't looked at RCU trace, but I should say adding a
    > > new tracepoint to instrument RCU callback execution at boot time and to
    > > trace it with LTTng is close to trivial. (actually, very early kernel
    > > boot is not so trivial as we would have to create a small kernel module
    > > to activate lttng tracing, but tracing the early phases of userspace
    > > bootup is trivial given we can use the userspace API).

    >
    > I just dump stack, which historically worked pretty well.
    >
    > Could you please tell me more about what you think should be added?
    >


    I just created two patches which instrument RCU callbacks and call_rcu
    in classic and preempt RCU to the -lttng tree to show how simple it is
    once the LTTng infrastructure is in place.

    Basically, all you need to do is to pull this tree (branch
    2.6.27-rc8-lttng-0.31) :

    git://git.kernel.org/pub/scm/linux/kernel/git/compudj/linux-2.6-lttng.git

    And follow :
    http://ltt.polymtl.ca/svn/trunk/lttv/QUICKSTART

    Which will basically tell you to enable markers/tracepoints and leave
    most ltt-related options to default (all in general setup menu).

    wget http://ltt.polymtl.ca/lttng/ltt-cont...2102008.tar.gz
    extract, ./configure, make, make install

    wget http://ltt.polymtl.ca/packages/lttv-...2082008.tar.gz
    extract, ./configure, make, make install
    (see QUICKSTART for package dependencies. Will be automatically checked
    by configure)

    You may have to add a mount point for debugfs and load some kernel
    modules (as explained in the QUICKSTART). Don't forget rcu-trace.ko.

    To take a trace, once you are booted in the kernel :

    ltt-armall
    (check that the rcu_* markers are enabled by looking at the "state"
    field of cat /proc/ltt | grep rcu)
    lttctl -n trace -d -l /mnt/debugfs/ltt -t /tmp/trace
    [Reproduce behavior to trace]
    lttctl -n trace -R
    # Produce a text dump
    lttv -m textDump -t /tmp/trace

    # Produce a text dump filtering only rcu events :
    lttv -m textDump -t /tmp/trace \
    -e "event.name=rcu_classic_callback|event.name=rcu_cla ssic_call_rcu|\
    event.name=rcu_classic_call_rcu_bh|event.name=rcu_ preempt_callback|\
    event.name=rcu_preempt_call_rcu_sched|event.name=r cu_preempt_call_rcu"

    Then some scripting could be required to turn the function pointers into
    symbols. Keep you kernel vmlinux around to perform symbol resolution.
    A quick trick is to use objdump -S --start-address=0x.... vmlinux|less
    but I am certain there are much much better ways to do it.

    There is also the gui you can use to dig into the trace :
    lttv-gui -t /tmp/trace

    That should help you pinpoint which are the functions that took so much
    time and which is their associated call_rcu caller IP.

    If you run a trace with preempt rcu, the textdump should give you an
    output like this :

    [...]
    rcu_preempt_call_rcu: 130.773191675 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
    rcu_preempt_call_rcu: 130.773194003 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
    rcu_preempt_call_rcu: 130.773195929 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
    rcu_preempt_call_rcu: 130.773197399 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
    rcu_preempt_call_rcu: 130.773200429 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802c6670, ip = 0xFFFFFFFF8023A45B }
    rcu_preempt_call_rcu: 130.773213907 (/tmp/trace/cpu_5), 3961, 3961, bash, , 3960, 0x0, SYSCALL { func = 0xffffffff8024a980, ip = 0xFFFFFFFF8023937D }
    rcu_preempt_call_rcu: 130.773215527 (/tmp/trace/cpu_5), 3961, 3961, bash, , 3960, 0x0, SYSCALL { func = 0xffffffff80238550, ip = 0xFFFFFFFF80239985 }
    rcu_preempt_callback: 130.789025735 (/tmp/trace/cpu_2), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    rcu_preempt_callback: 130.789026731 (/tmp/trace/cpu_2), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    rcu_preempt_callback: 130.789050610 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    rcu_preempt_callback: 130.789051096 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802c6670 }
    rcu_preempt_callback: 130.789052083 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff8024a980 }
    rcu_preempt_callback: 130.789052713 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff80238550 }
    rcu_preempt_callback: 130.805051097 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    rcu_preempt_callback: 130.805052318 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    rcu_preempt_callback: 130.805052822 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    rcu_preempt_callback: 130.805053434 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    rcu_preempt_callback: 130.805054226 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802c6670 }
    rcu_preempt_callback: 130.805055339 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff8024a980 }
    rcu_preempt_callback: 130.805056014 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff80238550 }
    rcu_preempt_call_rcu: 131.442990122 (/tmp/trace/cpu_7), 3685, 3685, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    rcu_preempt_call_rcu: 131.443615323 (/tmp/trace/cpu_6), 3687, 3687, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    rcu_preempt_call_rcu: 131.446861604 (/tmp/trace/cpu_7), 3683, 3683, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    rcu_preempt_call_rcu: 131.447487425 (/tmp/trace/cpu_5), 3681, 3681, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    rcu_preempt_callback: 131.473052317 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    rcu_preempt_callback: 131.473078626 (/tmp/trace/cpu_6), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    rcu_preempt_callback: 131.477001607 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    rcu_preempt_callback: 131.477002435 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    rcu_preempt_call_rcu: 133.443017217 (/tmp/trace/cpu_5), 3681, 3681, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    rcu_preempt_call_rcu: 133.443636900 (/tmp/trace/cpu_6), 3687, 3687, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    rcu_preempt_call_rcu: 133.446887423 (/tmp/trace/cpu_7), 3683, 3683, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    rcu_preempt_call_rcu: 133.447509515 (/tmp/trace/cpu_7), 3685, 3685, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    rcu_preempt_callback: 133.473044696 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    rcu_preempt_callback: 133.473070649 (/tmp/trace/cpu_6), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    rcu_preempt_callback: 133.476994292 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    rcu_preempt_callback: 133.476994961 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    rcu_preempt_call_rcu: 134.085511140 (/tmp/trace/cpu_5), 3961, 3961, bash, , 3960, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
    rcu_preempt_call_rcu: 134.085545237 (/tmp/trace/cpu_5), 3981, 3981, bash, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
    rcu_preempt_call_rcu: 134.086124869 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
    rcu_preempt_call_rcu: 134.086135078 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
    rcu_preempt_call_rcu: 134.086143519 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
    rcu_preempt_call_rcu: 134.086206323 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
    rcu_preempt_call_rcu: 134.086526166 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
    [...]

    Basically, once you have the LTTng infrastructure in place, all that
    needs to be added to RCU to do that is this (which is already in the
    -lttng tree), plus a small LTTng "probe" module. If you want, you could
    add a tracepoint in your stalled CPU detector and send the data through
    LTTng so you have a starting point to dig from in large traces. From
    that point, finding the root cause of long RCU-based CPU stalls becomes
    trivial.


    RCU : tracepoint instrumentation

    Instrument RCU classic and RCU preempt callback execution and call_rcu.

    Signed-off-by: Mathieu Desnoyers
    CC: "Paul E. McKenney"
    ---
    include/trace/rcu.h | 31 +++++++++++++++++++++++++++++++
    kernel/rcuclassic.c | 4 ++++
    kernel/rcupreempt.c | 4 ++++
    3 files changed, 39 insertions(+)

    Index: linux-2.6-lttng/include/trace/rcu.h
    ================================================== =================
    --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    +++ linux-2.6-lttng/include/trace/rcu.h 2008-10-02 14:18:05.000000000 -0400
    @@ -0,0 +1,31 @@
    +#ifndef _TRACE_RCU_H
    +#define _TRACE_RCU_H
    +
    +#include
    +#include
    +
    +DEFINE_TRACE(rcu_classic_callback,
    + TPPROTO(struct rcu_head *head),
    + TPARGS(head));
    +
    +DEFINE_TRACE(rcu_classic_call_rcu,
    + TPPROTO(struct rcu_head *head, unsigned long ip),
    + TPARGS(head, ip));
    +
    +DEFINE_TRACE(rcu_classic_call_rcu_bh,
    + TPPROTO(struct rcu_head *head, unsigned long ip),
    + TPARGS(head, ip));
    +
    +DEFINE_TRACE(rcu_preempt_callback,
    + TPPROTO(struct rcu_head *head),
    + TPARGS(head));
    +
    +DEFINE_TRACE(rcu_preempt_call_rcu,
    + TPPROTO(struct rcu_head *head, unsigned long ip),
    + TPARGS(head, ip));
    +
    +DEFINE_TRACE(rcu_preempt_call_rcu_sched,
    + TPPROTO(struct rcu_head *head, unsigned long ip),
    + TPARGS(head, ip));
    +
    +#endif
    Index: linux-2.6-lttng/kernel/rcuclassic.c
    ================================================== =================
    --- linux-2.6-lttng.orig/kernel/rcuclassic.c 2008-10-02 13:44:35.000000000 -0400
    +++ linux-2.6-lttng/kernel/rcuclassic.c 2008-10-02 14:16:45.000000000 -0400
    @@ -47,6 +47,7 @@
    #include
    #include
    #include
    +#include

    #ifdef CONFIG_DEBUG_LOCK_ALLOC
    static struct lock_class_key rcu_lock_key;
    @@ -138,6 +139,7 @@ void call_rcu(struct rcu_head *head,
    head->func = func;
    head->next = NULL;
    local_irq_save(flags);
    + trace_rcu_classic_call_rcu(head, _RET_IP_);
    rdp = &__get_cpu_var(rcu_data);
    *rdp->nxttail = head;
    rdp->nxttail = &head->next;
    @@ -174,6 +176,7 @@ void call_rcu_bh(struct rcu_head *head,
    head->func = func;
    head->next = NULL;
    local_irq_save(flags);
    + trace_rcu_classic_call_rcu_bh(head, _RET_IP_);
    rdp = &__get_cpu_var(rcu_bh_data);
    *rdp->nxttail = head;
    rdp->nxttail = &head->next;
    @@ -232,6 +235,7 @@ static void rcu_do_batch(struct rcu_data
    while (list) {
    next = list->next;
    prefetch(next);
    + trace_rcu_classic_callback(list);
    list->func(list);
    list = next;
    if (++count >= rdp->blimit)
    Index: linux-2.6-lttng/kernel/rcupreempt.c
    ================================================== =================
    --- linux-2.6-lttng.orig/kernel/rcupreempt.c 2008-10-02 13:53:41.000000000 -0400
    +++ linux-2.6-lttng/kernel/rcupreempt.c 2008-10-02 14:15:47.000000000 -0400
    @@ -57,6 +57,7 @@
    #include
    #include
    #include
    +#include

    /*
    * Macro that prevents the compiler from reordering accesses, but does
    @@ -1108,6 +1109,7 @@ static void rcu_process_callbacks(struct
    spin_unlock_irqrestore(&rdp->lock, flags);
    while (list) {
    next = list->next;
    + trace_rcu_preempt_callback(list);
    list->func(list);
    list = next;
    RCU_TRACE_ME(rcupreempt_trace_invoke);
    @@ -1122,6 +1124,7 @@ void call_rcu(struct rcu_head *head, voi
    head->func = func;
    head->next = NULL;
    local_irq_save(flags);
    + trace_rcu_preempt_call_rcu(head, _RET_IP_);
    rdp = RCU_DATA_ME();
    spin_lock(&rdp->lock);
    __rcu_advance_callbacks(rdp);
    @@ -1141,6 +1144,7 @@ void call_rcu_sched(struct rcu_head *hea
    head->func = func;
    head->next = NULL;
    local_irq_save(flags);
    + trace_rcu_preempt_call_rcu_sched(head, _RET_IP_);
    rdp = RCU_DATA_ME();
    spin_lock(&rdp->lock);
    *rdp->nextschedtail = head;

    The LTTng probe module looks like this :


    LTTng trace RCU probe

    RCU callback/call_rcu LTTng probe.

    Signed-off-by: Mathieu Desnoyers
    CC: "Paul E. McKenney"
    ---
    ltt/probes/Makefile | 2 -
    ltt/probes/rcu-trace.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++
    2 files changed, 55 insertions(+), 1 deletion(-)

    Index: linux-2.6-lttng/ltt/probes/rcu-trace.c
    ================================================== =================
    --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    +++ linux-2.6-lttng/ltt/probes/rcu-trace.c 2008-10-02 14:23:17.000000000 -0400
    @@ -0,0 +1,54 @@
    +/*
    + * ltt/probes/rcu-trace.c
    + *
    + * RCU tracepoint probes.
    + */
    +
    +#include
    +#include
    +
    +#ifdef CONFIG_CLASSIC_RCU
    +void probe_rcu_classic_callback(struct rcu_head *head)
    +{
    + trace_mark_tp(rcu_classic_callback, rcu_classic_callback,
    + probe_rcu_classic_callback, "func %p", head->func);
    +}
    +
    +void probe_rcu_classic_call_rcu(struct rcu_head *head, unsigned long ip)
    +{
    + trace_mark_tp(rcu_classic_call_rcu, rcu_classic_call_rcu,
    + probe_rcu_classic_call_rcu, "func %p ip 0x%lX", head->func, ip);
    +}
    +
    +void probe_rcu_classic_call_rcu_bh(struct rcu_head *head, unsigned long ip)
    +{
    + trace_mark_tp(rcu_classic_call_rcu_bh, rcu_classic_call_rcu_bh,
    + probe_rcu_classic_call_rcu_bh, "func %p ip 0x%lX",
    + head->func, ip);
    +}
    +#endif
    +
    +#ifdef CONFIG_PREEMPT_RCU
    +void probe_rcu_preempt_callback(struct rcu_head *head)
    +{
    + trace_mark_tp(rcu_preempt_callback, rcu_preempt_callback,
    + probe_rcu_preempt_callback, "func %p", head->func);
    +}
    +
    +void probe_rcu_preempt_call_rcu(struct rcu_head *head, unsigned long ip)
    +{
    + trace_mark_tp(rcu_preempt_call_rcu, rcu_preempt_call_rcu,
    + probe_rcu_preempt_call_rcu, "func %p ip 0x%lX", head->func, ip);
    +}
    +
    +void probe_rcu_preempt_call_rcu_sched(struct rcu_head *head, unsigned long ip)
    +{
    + trace_mark_tp(rcu_preempt_call_rcu_sched, rcu_preempt_call_rcu_sched,
    + probe_rcu_preempt_call_rcu_sched, "func %p ip 0x%lX",
    + head->func, ip);
    +}
    +#endif
    +
    +MODULE_LICENSE("GPL");
    +MODULE_AUTHOR("Mathieu Desnoyers");
    +MODULE_DESCRIPTION("RCU Tracepoint Probes");
    Index: linux-2.6-lttng/ltt/probes/Makefile
    ================================================== =================
    --- linux-2.6-lttng.orig/ltt/probes/Makefile 2008-10-02 14:11:20.000000000 -0400
    +++ linux-2.6-lttng/ltt/probes/Makefile 2008-10-02 14:18:16.000000000 -0400
    @@ -9,7 +9,7 @@ CFLAGS_REMOVE_lockdep-trace.o = -pg
    endif

    obj-$(CONFIG_LTT_TRACEPROBES) += kernel-trace.o mm-trace.o fs-trace.o \
    - ipc-trace.o lockdep-trace.o
    + ipc-trace.o lockdep-trace.o rcu-trace.o
    ifeq ($(CONFIG_NET),y)
    CFLAGS_REMOVE_net-trace.o = -pg
    obj-$(CONFIG_TRACEPROBES) += net-trace.o


    Mathieu

    --
    Mathieu Desnoyers
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  8. Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU (LTTng support)

    On Thu, Oct 02, 2008 at 02:51:15PM -0400, Mathieu Desnoyers wrote:
    > * Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
    > > On Thu, Oct 02, 2008 at 11:10:46AM -0400, Mathieu Desnoyers wrote:
    > > > * Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
    > > > > On Thu, Oct 02, 2008 at 10:07:26AM +0200, Ingo Molnar wrote:
    > > > > >
    > > > > > * Paul E. McKenney wrote:
    > > > > >
    > > > > > > Hello!
    > > > > > >
    > > > > > > This patch adds stalled-CPU detection to Classic RCU. This capability
    > > > > > > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR,
    > > > > > > which defaults disabled. This is a debugging feature, not something
    > > > > > > that non-kernel-hackers would be expected to care about. This feature
    > > > > > > can detect looping CPUs in !PREEMPT builds and looping CPUs with
    > > > > > > preemption disabled in PREEMPT builds. This is essentially a port of
    > > > > > > this functionality from the treercu patch.
    > > > > > >
    > > > > > > One current shortcoming: on some systems, stalls are detected during
    > > > > > > early boot, when we normally would not care about them. My thought is
    > > > > > > to add a call from late initialization to suppress stall detection
    > > > > > > until the system is well along its way to being booted, but thought I
    > > > > > > should check to see if there might already be something for this
    > > > > > > purpose.
    > > > > >
    > > > > > could you be a bit more specific, why do those warnings show up and why
    > > > > > dont we care about them? There are things like networking that
    > > > > > occasionally do an rcu_sync() and a stall could mean a bootup hang.
    > > > >
    > > > > Hmmm...
    > > > >
    > > > > Good point, I was just falling back on my old "we don't care about RCU
    > > > > stalls in boot-time code" rule from long ago. It is entirely possible
    > > > > (in fact reasonably likely) that this rule no longer applies to Linux as
    > > > > it exists today. So please see below for the console output. Not all
    > > > > systems report this stall. I have recently been running only on Power
    > > > > systems, will fire off on some x86s. My kneejerk reaction was that the
    > > > > "stall" was really due to the clock-setting operation -- RCU stalls are
    > > > > based on get_seconds().
    > > > >
    > > > > So maybe I need to change the stall-detection code to use jiffies
    > > > > instead.
    > > > >
    > > > > Thoughts?
    > > > >
    > > >
    > > > You could possibly use get monotonic time, which should hopefully be a
    > > > bit better than its NTP-corrected alternatives.

    > >
    > > OK. I thought jiffies were monotonic (though not perfectly correlated
    > > to the passage of time). This is a warning timeout, so does not need
    > > better than a few percent accuracy, as long as time never jumps too far.
    > >

    >
    > Well, jiffies should also be monotonic, but I don't know their exact
    > status wrt dyntick systems.


    There will be at least one non-dyntick CPU if there are RCU callbacks
    pending. If there are no RCU callbacks pending, then RCU won't be
    doing anything, hence will not be detecting stalls.

    > > But what API did you have in mind?
    > >

    >
    > Something like do_posix_clock_monotonic_gettime() in linux/time.h. I
    > think it should be ok after early boot.


    I like jiffies for this non-super-exacting purpose. ;-)

    > > > Also, do you have all the tools you need to poinpoint the precise source
    > > > of RCU stalls ? I haven't looked at RCU trace, but I should say adding a
    > > > new tracepoint to instrument RCU callback execution at boot time and to
    > > > trace it with LTTng is close to trivial. (actually, very early kernel
    > > > boot is not so trivial as we would have to create a small kernel module
    > > > to activate lttng tracing, but tracing the early phases of userspace
    > > > bootup is trivial given we can use the userspace API).

    > >
    > > I just dump stack, which historically worked pretty well.
    > >
    > > Could you please tell me more about what you think should be added?
    > >

    >
    > I just created two patches which instrument RCU callbacks and call_rcu
    > in classic and preempt RCU to the -lttng tree to show how simple it is
    > once the LTTng infrastructure is in place.


    OK, I think I see the disconnection here...

    When RCU detects a stall, that often indicates that some CPU is spinning
    in the kernel (for !CONFIG_PREEMPT) or that some CPU is spinning in the
    kernel with preemption disabled (for CONFIG_PREEMPT). The stack trace
    will normally show what is spinning.

    Tracing the RCU activity in this case will not help -- it is not RCU that
    is spinning, but rather that RCU is stalled waiting for some other part
    of the kernel that is spinning. (In a past life, RCU once complained
    about a specific CPU that had managed to fail in a way that left the
    system running! Except that RCU grace periods never ended...)

    FWIW, what I normally use for debugging RCU itself is counters rather
    than log messages. The reason I avoid log messages is that the race
    conditions I often find myself chasing are reasonably narrow, so that
    I would have a very large number of log messages to pile through.
    Nevertheless, I will keep track of these patches in case I come across
    some situation that is more friendly to log-based debugging -- and thank
    you for putting them together!!!

    Thanx, Paul

    > Basically, all you need to do is to pull this tree (branch
    > 2.6.27-rc8-lttng-0.31) :
    >
    > git://git.kernel.org/pub/scm/linux/kernel/git/compudj/linux-2.6-lttng.git
    >
    > And follow :
    > http://ltt.polymtl.ca/svn/trunk/lttv/QUICKSTART
    >
    > Which will basically tell you to enable markers/tracepoints and leave
    > most ltt-related options to default (all in general setup menu).
    >
    > wget http://ltt.polymtl.ca/lttng/ltt-cont...2102008.tar.gz
    > extract, ./configure, make, make install
    >
    > wget http://ltt.polymtl.ca/packages/lttv-...2082008.tar.gz
    > extract, ./configure, make, make install
    > (see QUICKSTART for package dependencies. Will be automatically checked
    > by configure)
    >
    > You may have to add a mount point for debugfs and load some kernel
    > modules (as explained in the QUICKSTART). Don't forget rcu-trace.ko.
    >
    > To take a trace, once you are booted in the kernel :
    >
    > ltt-armall
    > (check that the rcu_* markers are enabled by looking at the "state"
    > field of cat /proc/ltt | grep rcu)
    > lttctl -n trace -d -l /mnt/debugfs/ltt -t /tmp/trace
    > [Reproduce behavior to trace]
    > lttctl -n trace -R
    > # Produce a text dump
    > lttv -m textDump -t /tmp/trace
    >
    > # Produce a text dump filtering only rcu events :
    > lttv -m textDump -t /tmp/trace \
    > -e "event.name=rcu_classic_callback|event.name=rcu_cla ssic_call_rcu|\
    > event.name=rcu_classic_call_rcu_bh|event.name=rcu_ preempt_callback|\
    > event.name=rcu_preempt_call_rcu_sched|event.name=r cu_preempt_call_rcu"
    >
    > Then some scripting could be required to turn the function pointers into
    > symbols. Keep you kernel vmlinux around to perform symbol resolution.
    > A quick trick is to use objdump -S --start-address=0x.... vmlinux|less
    > but I am certain there are much much better ways to do it.
    >
    > There is also the gui you can use to dig into the trace :
    > lttv-gui -t /tmp/trace
    >
    > That should help you pinpoint which are the functions that took so much
    > time and which is their associated call_rcu caller IP.
    >
    > If you run a trace with preempt rcu, the textdump should give you an
    > output like this :
    >
    > [...]
    > rcu_preempt_call_rcu: 130.773191675 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
    > rcu_preempt_call_rcu: 130.773194003 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
    > rcu_preempt_call_rcu: 130.773195929 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
    > rcu_preempt_call_rcu: 130.773197399 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
    > rcu_preempt_call_rcu: 130.773200429 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802c6670, ip = 0xFFFFFFFF8023A45B }
    > rcu_preempt_call_rcu: 130.773213907 (/tmp/trace/cpu_5), 3961, 3961, bash, , 3960, 0x0, SYSCALL { func = 0xffffffff8024a980, ip = 0xFFFFFFFF8023937D }
    > rcu_preempt_call_rcu: 130.773215527 (/tmp/trace/cpu_5), 3961, 3961, bash, , 3960, 0x0, SYSCALL { func = 0xffffffff80238550, ip = 0xFFFFFFFF80239985 }
    > rcu_preempt_callback: 130.789025735 (/tmp/trace/cpu_2), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    > rcu_preempt_callback: 130.789026731 (/tmp/trace/cpu_2), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    > rcu_preempt_callback: 130.789050610 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    > rcu_preempt_callback: 130.789051096 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802c6670 }
    > rcu_preempt_callback: 130.789052083 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff8024a980 }
    > rcu_preempt_callback: 130.789052713 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff80238550 }
    > rcu_preempt_callback: 130.805051097 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    > rcu_preempt_callback: 130.805052318 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    > rcu_preempt_callback: 130.805052822 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    > rcu_preempt_callback: 130.805053434 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    > rcu_preempt_callback: 130.805054226 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802c6670 }
    > rcu_preempt_callback: 130.805055339 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff8024a980 }
    > rcu_preempt_callback: 130.805056014 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff80238550 }
    > rcu_preempt_call_rcu: 131.442990122 (/tmp/trace/cpu_7), 3685, 3685, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    > rcu_preempt_call_rcu: 131.443615323 (/tmp/trace/cpu_6), 3687, 3687, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    > rcu_preempt_call_rcu: 131.446861604 (/tmp/trace/cpu_7), 3683, 3683, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    > rcu_preempt_call_rcu: 131.447487425 (/tmp/trace/cpu_5), 3681, 3681, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    > rcu_preempt_callback: 131.473052317 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    > rcu_preempt_callback: 131.473078626 (/tmp/trace/cpu_6), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    > rcu_preempt_callback: 131.477001607 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    > rcu_preempt_callback: 131.477002435 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    > rcu_preempt_call_rcu: 133.443017217 (/tmp/trace/cpu_5), 3681, 3681, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    > rcu_preempt_call_rcu: 133.443636900 (/tmp/trace/cpu_6), 3687, 3687, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    > rcu_preempt_call_rcu: 133.446887423 (/tmp/trace/cpu_7), 3683, 3683, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    > rcu_preempt_call_rcu: 133.447509515 (/tmp/trace/cpu_7), 3685, 3685, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
    > rcu_preempt_callback: 133.473044696 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    > rcu_preempt_callback: 133.473070649 (/tmp/trace/cpu_6), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    > rcu_preempt_callback: 133.476994292 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    > rcu_preempt_callback: 133.476994961 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
    > rcu_preempt_call_rcu: 134.085511140 (/tmp/trace/cpu_5), 3961, 3961, bash, , 3960, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
    > rcu_preempt_call_rcu: 134.085545237 (/tmp/trace/cpu_5), 3981, 3981, bash, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
    > rcu_preempt_call_rcu: 134.086124869 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
    > rcu_preempt_call_rcu: 134.086135078 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
    > rcu_preempt_call_rcu: 134.086143519 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
    > rcu_preempt_call_rcu: 134.086206323 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
    > rcu_preempt_call_rcu: 134.086526166 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
    > [...]
    >
    > Basically, once you have the LTTng infrastructure in place, all that
    > needs to be added to RCU to do that is this (which is already in the
    > -lttng tree), plus a small LTTng "probe" module. If you want, you could
    > add a tracepoint in your stalled CPU detector and send the data through
    > LTTng so you have a starting point to dig from in large traces. From
    > that point, finding the root cause of long RCU-based CPU stalls becomes
    > trivial.
    >
    >
    > RCU : tracepoint instrumentation
    >
    > Instrument RCU classic and RCU preempt callback execution and call_rcu.
    >
    > Signed-off-by: Mathieu Desnoyers
    > CC: "Paul E. McKenney"
    > ---
    > include/trace/rcu.h | 31 +++++++++++++++++++++++++++++++
    > kernel/rcuclassic.c | 4 ++++
    > kernel/rcupreempt.c | 4 ++++
    > 3 files changed, 39 insertions(+)
    >
    > Index: linux-2.6-lttng/include/trace/rcu.h
    > ================================================== =================
    > --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    > +++ linux-2.6-lttng/include/trace/rcu.h 2008-10-02 14:18:05.000000000 -0400
    > @@ -0,0 +1,31 @@
    > +#ifndef _TRACE_RCU_H
    > +#define _TRACE_RCU_H
    > +
    > +#include
    > +#include
    > +
    > +DEFINE_TRACE(rcu_classic_callback,
    > + TPPROTO(struct rcu_head *head),
    > + TPARGS(head));
    > +
    > +DEFINE_TRACE(rcu_classic_call_rcu,
    > + TPPROTO(struct rcu_head *head, unsigned long ip),
    > + TPARGS(head, ip));
    > +
    > +DEFINE_TRACE(rcu_classic_call_rcu_bh,
    > + TPPROTO(struct rcu_head *head, unsigned long ip),
    > + TPARGS(head, ip));
    > +
    > +DEFINE_TRACE(rcu_preempt_callback,
    > + TPPROTO(struct rcu_head *head),
    > + TPARGS(head));
    > +
    > +DEFINE_TRACE(rcu_preempt_call_rcu,
    > + TPPROTO(struct rcu_head *head, unsigned long ip),
    > + TPARGS(head, ip));
    > +
    > +DEFINE_TRACE(rcu_preempt_call_rcu_sched,
    > + TPPROTO(struct rcu_head *head, unsigned long ip),
    > + TPARGS(head, ip));
    > +
    > +#endif
    > Index: linux-2.6-lttng/kernel/rcuclassic.c
    > ================================================== =================
    > --- linux-2.6-lttng.orig/kernel/rcuclassic.c 2008-10-02 13:44:35.000000000 -0400
    > +++ linux-2.6-lttng/kernel/rcuclassic.c 2008-10-02 14:16:45.000000000 -0400
    > @@ -47,6 +47,7 @@
    > #include
    > #include
    > #include
    > +#include
    >
    > #ifdef CONFIG_DEBUG_LOCK_ALLOC
    > static struct lock_class_key rcu_lock_key;
    > @@ -138,6 +139,7 @@ void call_rcu(struct rcu_head *head,
    > head->func = func;
    > head->next = NULL;
    > local_irq_save(flags);
    > + trace_rcu_classic_call_rcu(head, _RET_IP_);
    > rdp = &__get_cpu_var(rcu_data);
    > *rdp->nxttail = head;
    > rdp->nxttail = &head->next;
    > @@ -174,6 +176,7 @@ void call_rcu_bh(struct rcu_head *head,
    > head->func = func;
    > head->next = NULL;
    > local_irq_save(flags);
    > + trace_rcu_classic_call_rcu_bh(head, _RET_IP_);
    > rdp = &__get_cpu_var(rcu_bh_data);
    > *rdp->nxttail = head;
    > rdp->nxttail = &head->next;
    > @@ -232,6 +235,7 @@ static void rcu_do_batch(struct rcu_data
    > while (list) {
    > next = list->next;
    > prefetch(next);
    > + trace_rcu_classic_callback(list);
    > list->func(list);
    > list = next;
    > if (++count >= rdp->blimit)
    > Index: linux-2.6-lttng/kernel/rcupreempt.c
    > ================================================== =================
    > --- linux-2.6-lttng.orig/kernel/rcupreempt.c 2008-10-02 13:53:41.000000000 -0400
    > +++ linux-2.6-lttng/kernel/rcupreempt.c 2008-10-02 14:15:47.000000000 -0400
    > @@ -57,6 +57,7 @@
    > #include
    > #include
    > #include
    > +#include
    >
    > /*
    > * Macro that prevents the compiler from reordering accesses, but does
    > @@ -1108,6 +1109,7 @@ static void rcu_process_callbacks(struct
    > spin_unlock_irqrestore(&rdp->lock, flags);
    > while (list) {
    > next = list->next;
    > + trace_rcu_preempt_callback(list);
    > list->func(list);
    > list = next;
    > RCU_TRACE_ME(rcupreempt_trace_invoke);
    > @@ -1122,6 +1124,7 @@ void call_rcu(struct rcu_head *head, voi
    > head->func = func;
    > head->next = NULL;
    > local_irq_save(flags);
    > + trace_rcu_preempt_call_rcu(head, _RET_IP_);
    > rdp = RCU_DATA_ME();
    > spin_lock(&rdp->lock);
    > __rcu_advance_callbacks(rdp);
    > @@ -1141,6 +1144,7 @@ void call_rcu_sched(struct rcu_head *hea
    > head->func = func;
    > head->next = NULL;
    > local_irq_save(flags);
    > + trace_rcu_preempt_call_rcu_sched(head, _RET_IP_);
    > rdp = RCU_DATA_ME();
    > spin_lock(&rdp->lock);
    > *rdp->nextschedtail = head;
    >
    > The LTTng probe module looks like this :
    >
    >
    > LTTng trace RCU probe
    >
    > RCU callback/call_rcu LTTng probe.
    >
    > Signed-off-by: Mathieu Desnoyers
    > CC: "Paul E. McKenney"
    > ---
    > ltt/probes/Makefile | 2 -
    > ltt/probes/rcu-trace.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++
    > 2 files changed, 55 insertions(+), 1 deletion(-)
    >
    > Index: linux-2.6-lttng/ltt/probes/rcu-trace.c
    > ================================================== =================
    > --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    > +++ linux-2.6-lttng/ltt/probes/rcu-trace.c 2008-10-02 14:23:17.000000000 -0400
    > @@ -0,0 +1,54 @@
    > +/*
    > + * ltt/probes/rcu-trace.c
    > + *
    > + * RCU tracepoint probes.
    > + */
    > +
    > +#include
    > +#include
    > +
    > +#ifdef CONFIG_CLASSIC_RCU
    > +void probe_rcu_classic_callback(struct rcu_head *head)
    > +{
    > + trace_mark_tp(rcu_classic_callback, rcu_classic_callback,
    > + probe_rcu_classic_callback, "func %p", head->func);
    > +}
    > +
    > +void probe_rcu_classic_call_rcu(struct rcu_head *head, unsigned long ip)
    > +{
    > + trace_mark_tp(rcu_classic_call_rcu, rcu_classic_call_rcu,
    > + probe_rcu_classic_call_rcu, "func %p ip 0x%lX", head->func, ip);
    > +}
    > +
    > +void probe_rcu_classic_call_rcu_bh(struct rcu_head *head, unsigned long ip)
    > +{
    > + trace_mark_tp(rcu_classic_call_rcu_bh, rcu_classic_call_rcu_bh,
    > + probe_rcu_classic_call_rcu_bh, "func %p ip 0x%lX",
    > + head->func, ip);
    > +}
    > +#endif
    > +
    > +#ifdef CONFIG_PREEMPT_RCU
    > +void probe_rcu_preempt_callback(struct rcu_head *head)
    > +{
    > + trace_mark_tp(rcu_preempt_callback, rcu_preempt_callback,
    > + probe_rcu_preempt_callback, "func %p", head->func);
    > +}
    > +
    > +void probe_rcu_preempt_call_rcu(struct rcu_head *head, unsigned long ip)
    > +{
    > + trace_mark_tp(rcu_preempt_call_rcu, rcu_preempt_call_rcu,
    > + probe_rcu_preempt_call_rcu, "func %p ip 0x%lX", head->func, ip);
    > +}
    > +
    > +void probe_rcu_preempt_call_rcu_sched(struct rcu_head *head, unsigned long ip)
    > +{
    > + trace_mark_tp(rcu_preempt_call_rcu_sched, rcu_preempt_call_rcu_sched,
    > + probe_rcu_preempt_call_rcu_sched, "func %p ip 0x%lX",
    > + head->func, ip);
    > +}
    > +#endif
    > +
    > +MODULE_LICENSE("GPL");
    > +MODULE_AUTHOR("Mathieu Desnoyers");
    > +MODULE_DESCRIPTION("RCU Tracepoint Probes");
    > Index: linux-2.6-lttng/ltt/probes/Makefile
    > ================================================== =================
    > --- linux-2.6-lttng.orig/ltt/probes/Makefile 2008-10-02 14:11:20.000000000 -0400
    > +++ linux-2.6-lttng/ltt/probes/Makefile 2008-10-02 14:18:16.000000000 -0400
    > @@ -9,7 +9,7 @@ CFLAGS_REMOVE_lockdep-trace.o = -pg
    > endif
    >
    > obj-$(CONFIG_LTT_TRACEPROBES) += kernel-trace.o mm-trace.o fs-trace.o \
    > - ipc-trace.o lockdep-trace.o
    > + ipc-trace.o lockdep-trace.o rcu-trace.o
    > ifeq ($(CONFIG_NET),y)
    > CFLAGS_REMOVE_net-trace.o = -pg
    > obj-$(CONFIG_TRACEPROBES) += net-trace.o
    >
    >
    > Mathieu
    >
    > --
    > Mathieu Desnoyers
    > OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  9. [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU

    Hello!

    This patch adds stalled-CPU detection to Classic RCU. This capability
    is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, which
    defaults disabled. This is a debugging feature to detect infinite loops
    in kernel code, not something that non-kernel-hackers would be expected
    to care about. This feature can detect looping CPUs in !PREEMPT builds
    and looping CPUs with preemption disabled in PREEMPT builds. This is
    essentially a port of this functionality from the treercu patch, replacing
    the stall debug patch that is already in tip/core/rcu (commit 67182ae1c4).

    The changes from the patch in tip/core/rcu include making the config
    variable name match that in treercu, changing from seconds to jiffies to
    avoid spurious warnings, and printing a boot message when this feature
    is enabled.

    Signed-off-by: Paul E. McKenney
    ---

    include/linux/rcuclassic.h | 12 ++-
    kernel/rcuclassic.c | 166 +++++++++++++++++++++++----------------------
    lib/Kconfig.debug | 2
    3 files changed, 96 insertions(+), 84 deletions(-)

    diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
    index 29bf528..2d72d20 100644
    --- a/include/linux/rcuclassic.h
    +++ b/include/linux/rcuclassic.h
    @@ -40,15 +40,21 @@
    #include
    #include

    +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    +#define RCU_SECONDS_TILL_STALL_CHECK 3 * HZ /* for rcp->jiffies_stall */
    +#define RCU_SECONDS_TILL_STALL_RECHECK 30 * HZ /* for rcp->jiffies_stall */
    +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */

    /* Global control variables for rcupdate callback mechanism. */
    struct rcu_ctrlblk {
    long cur; /* Current batch number. */
    long completed; /* Number of the last completed batch */
    long pending; /* Number of the last pending batch */
    -#ifdef CONFIG_DEBUG_RCU_STALL
    - unsigned long gp_check; /* Time grace period should end, in seconds. */
    -#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
    +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    + unsigned long gp_start; /* Time at which GP started in jiffies. */
    + unsigned long jiffies_stall;
    + /* Time at which to check for CPU stalls. */
    +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */

    int signaled;

    diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
    index ed15128..eae2fb6 100644
    --- a/kernel/rcuclassic.c
    +++ b/kernel/rcuclassic.c
    @@ -164,6 +164,87 @@ static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
    }
    }

    +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    +
    +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    +{
    + rcp->gp_start = jiffies;
    + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
    +}
    +
    +static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
    +{
    + int cpu;
    + long delta;
    + unsigned long flags;
    +
    + /* Only let one CPU complain about others per time interval. */
    +
    + spin_lock_irqsave(&rcp->lock, flags);
    + delta = jiffies - rcp->jiffies_stall;
    + if (delta < 2 || rcp->cur != rcp->completed) {
    + spin_unlock_irqrestore(&rcp->lock, flags);
    + return;
    + }
    + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
    + spin_unlock_irqrestore(&rcp->lock, flags);
    +
    + /* OK, time to rat on our buddy... */
    +
    + printk(KERN_ERR "RCU detected CPU stalls:");
    + for_each_possible_cpu(cpu) {
    + if (cpu_isset(cpu, rcp->cpumask))
    + printk(" %d", cpu);
    + }
    + printk(" (detected by %d, t=%ld jiffies)\n",
    + smp_processor_id(), (long)(jiffies - rcp->gp_start));
    +}
    +
    +static void print_cpu_stall(struct rcu_ctrlblk *rcp)
    +{
    + unsigned long flags;
    +
    + printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
    + smp_processor_id(), jiffies,
    + jiffies - rcp->gp_start);
    + dump_stack();
    + spin_lock_irqsave(&rcp->lock, flags);
    + if ((long)(jiffies - rcp->jiffies_stall) >= 0)
    + rcp->jiffies_stall =
    + jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
    + spin_unlock_irqrestore(&rcp->lock, flags);
    + set_need_resched(); /* kick ourselves to get things going. */
    +}
    +
    +static void check_cpu_stall(struct rcu_ctrlblk *rcp)
    +{
    + long delta;
    +
    + delta = jiffies - rcp->jiffies_stall;
    + if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
    +
    + /* We haven't checked in, so go dump stack. */
    + print_cpu_stall(rcp);
    +
    + } else if (rcp->cur != rcp->completed && delta >= 2) {
    +
    + /* They had two seconds to dump stack, so complain. */
    + print_other_cpu_stall(rcp);
    + }
    +}
    +
    +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    +
    +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    +{
    +}
    +
    +static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    +{
    +}
    +
    +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    +
    /**
    * call_rcu - Queue an RCU callback for invocation after a grace period.
    * @head: structure to be used for queueing the RCU updates.
    @@ -293,84 +374,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
    * period (if necessary).
    */

    -#ifdef CONFIG_DEBUG_RCU_STALL
    -
    -static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
    -{
    - rcp->gp_check = get_seconds() + 3;
    -}
    -
    -static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
    -{
    - int cpu;
    - long delta;
    - unsigned long flags;
    -
    - /* Only let one CPU complain about others per time interval. */
    -
    - spin_lock_irqsave(&rcp->lock, flags);
    - delta = get_seconds() - rcp->gp_check;
    - if (delta < 2L || cpus_empty(rcp->cpumask)) {
    - spin_unlock(&rcp->lock);
    - return;
    - }
    - rcp->gp_check = get_seconds() + 30;
    - spin_unlock_irqrestore(&rcp->lock, flags);
    -
    - /* OK, time to rat on our buddy... */
    -
    - printk(KERN_ERR "RCU detected CPU stalls:");
    - for_each_cpu_mask(cpu, rcp->cpumask)
    - printk(" %d", cpu);
    - printk(" (detected by %d, t=%lu/%lu)\n",
    - smp_processor_id(), get_seconds(), rcp->gp_check);
    -}
    -
    -static void print_cpu_stall(struct rcu_ctrlblk *rcp)
    -{
    - unsigned long flags;
    -
    - printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
    - smp_processor_id(), get_seconds(), rcp->gp_check);
    - dump_stack();
    - spin_lock_irqsave(&rcp->lock, flags);
    - if ((long)(get_seconds() - rcp->gp_check) >= 0L)
    - rcp->gp_check = get_seconds() + 30;
    - spin_unlock_irqrestore(&rcp->lock, flags);
    -}
    -
    -static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    -{
    - long delta;
    -
    - delta = get_seconds() - rcp->gp_check;
    - if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
    -
    - /* We haven't checked in, so go dump stack. */
    -
    - print_cpu_stall(rcp);
    -
    - } else {
    - if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
    - /* They had two seconds to dump stack, so complain. */
    - print_other_cpu_stall(rcp);
    - }
    - }
    -}
    -
    -#else /* #ifdef CONFIG_DEBUG_RCU_STALL */
    -
    -static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
    -{
    -}
    -
    -static inline void
    -check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    -{
    -}
    -
    -#endif /* #else #ifdef CONFIG_DEBUG_RCU_STALL */
    -
    /*
    * Register a new batch of callbacks, and start it up if there is currently no
    * active batch and the batch to be registered has not already occurred.
    @@ -381,7 +384,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
    if (rcp->cur != rcp->pending &&
    rcp->completed == rcp->cur) {
    rcp->cur++;
    - record_gp_check_time(rcp);
    + record_gp_stall_check_time(rcp);

    /*
    * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
    @@ -603,7 +606,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
    static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    {
    /* Check for CPU stalls, if enabled. */
    - check_cpu_stall(rcp, rdp);
    + check_cpu_stall(rcp);

    if (rdp->nxtlist) {
    long completed_snap = ACCESS_ONCE(rcp->completed);
    @@ -769,6 +772,9 @@ static struct notifier_block __cpuinitdata rcu_nb = {
    */
    void __init __rcu_init(void)
    {
    +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    + printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
    +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
    (void *)(long)smp_processor_id());
    /* Register notifier for non-boot CPUs */
    diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
    index 4e921a8..e0e0582 100644
    --- a/lib/Kconfig.debug
    +++ b/lib/Kconfig.debug
    @@ -616,7 +616,7 @@ config RCU_TORTURE_TEST_RUNNABLE
    Say N here if you want the RCU torture tests to start only
    after being manually enabled via /proc.

    -config RCU_CPU_STALL
    +config RCU_CPU_STALL_DETECTOR
    bool "Check for stalled CPUs delaying RCU grace periods"
    depends on CLASSIC_RCU
    default n
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  10. Re: [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU

    * Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
    > Hello!
    >
    > This patch adds stalled-CPU detection to Classic RCU. This capability
    > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, which
    > defaults disabled. This is a debugging feature to detect infinite loops
    > in kernel code, not something that non-kernel-hackers would be expected
    > to care about. This feature can detect looping CPUs in !PREEMPT builds
    > and looping CPUs with preemption disabled in PREEMPT builds. This is
    > essentially a port of this functionality from the treercu patch, replacing
    > the stall debug patch that is already in tip/core/rcu (commit 67182ae1c4).
    >
    > The changes from the patch in tip/core/rcu include making the config
    > variable name match that in treercu, changing from seconds to jiffies to
    > avoid spurious warnings, and printing a boot message when this feature
    > is enabled.
    >


    Hi Paul,

    Thanks for the previous explanations. Out of curiosity, what can this
    patch do that the nmi watchdog can't do ?

    Mathieu

    > Signed-off-by: Paul E. McKenney
    > ---
    >
    > include/linux/rcuclassic.h | 12 ++-
    > kernel/rcuclassic.c | 166 +++++++++++++++++++++++----------------------
    > lib/Kconfig.debug | 2
    > 3 files changed, 96 insertions(+), 84 deletions(-)
    >
    > diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
    > index 29bf528..2d72d20 100644
    > --- a/include/linux/rcuclassic.h
    > +++ b/include/linux/rcuclassic.h
    > @@ -40,15 +40,21 @@
    > #include
    > #include
    >
    > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    > +#define RCU_SECONDS_TILL_STALL_CHECK 3 * HZ /* for rcp->jiffies_stall */
    > +#define RCU_SECONDS_TILL_STALL_RECHECK 30 * HZ /* for rcp->jiffies_stall */
    > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    >
    > /* Global control variables for rcupdate callback mechanism. */
    > struct rcu_ctrlblk {
    > long cur; /* Current batch number. */
    > long completed; /* Number of the last completed batch */
    > long pending; /* Number of the last pending batch */
    > -#ifdef CONFIG_DEBUG_RCU_STALL
    > - unsigned long gp_check; /* Time grace period should end, in seconds. */
    > -#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
    > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    > + unsigned long gp_start; /* Time at which GP started in jiffies. */
    > + unsigned long jiffies_stall;
    > + /* Time at which to check for CPU stalls. */
    > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    >
    > int signaled;
    >
    > diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
    > index ed15128..eae2fb6 100644
    > --- a/kernel/rcuclassic.c
    > +++ b/kernel/rcuclassic.c
    > @@ -164,6 +164,87 @@ static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
    > }
    > }
    >
    > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    > +
    > +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    > +{
    > + rcp->gp_start = jiffies;
    > + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
    > +}
    > +
    > +static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
    > +{
    > + int cpu;
    > + long delta;
    > + unsigned long flags;
    > +
    > + /* Only let one CPU complain about others per time interval. */
    > +
    > + spin_lock_irqsave(&rcp->lock, flags);
    > + delta = jiffies - rcp->jiffies_stall;
    > + if (delta < 2 || rcp->cur != rcp->completed) {
    > + spin_unlock_irqrestore(&rcp->lock, flags);
    > + return;
    > + }
    > + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
    > + spin_unlock_irqrestore(&rcp->lock, flags);
    > +
    > + /* OK, time to rat on our buddy... */
    > +
    > + printk(KERN_ERR "RCU detected CPU stalls:");
    > + for_each_possible_cpu(cpu) {
    > + if (cpu_isset(cpu, rcp->cpumask))
    > + printk(" %d", cpu);
    > + }
    > + printk(" (detected by %d, t=%ld jiffies)\n",
    > + smp_processor_id(), (long)(jiffies - rcp->gp_start));
    > +}
    > +
    > +static void print_cpu_stall(struct rcu_ctrlblk *rcp)
    > +{
    > + unsigned long flags;
    > +
    > + printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
    > + smp_processor_id(), jiffies,
    > + jiffies - rcp->gp_start);
    > + dump_stack();
    > + spin_lock_irqsave(&rcp->lock, flags);
    > + if ((long)(jiffies - rcp->jiffies_stall) >= 0)
    > + rcp->jiffies_stall =
    > + jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
    > + spin_unlock_irqrestore(&rcp->lock, flags);
    > + set_need_resched(); /* kick ourselves to get things going. */
    > +}
    > +
    > +static void check_cpu_stall(struct rcu_ctrlblk *rcp)
    > +{
    > + long delta;
    > +
    > + delta = jiffies - rcp->jiffies_stall;
    > + if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
    > +
    > + /* We haven't checked in, so go dump stack. */
    > + print_cpu_stall(rcp);
    > +
    > + } else if (rcp->cur != rcp->completed && delta >= 2) {
    > +
    > + /* They had two seconds to dump stack, so complain. */
    > + print_other_cpu_stall(rcp);
    > + }
    > +}
    > +
    > +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    > +
    > +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    > +{
    > +}
    > +
    > +static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    > +{
    > +}
    > +
    > +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    > +
    > /**
    > * call_rcu - Queue an RCU callback for invocation after a grace period.
    > * @head: structure to be used for queueing the RCU updates.
    > @@ -293,84 +374,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
    > * period (if necessary).
    > */
    >
    > -#ifdef CONFIG_DEBUG_RCU_STALL
    > -
    > -static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
    > -{
    > - rcp->gp_check = get_seconds() + 3;
    > -}
    > -
    > -static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
    > -{
    > - int cpu;
    > - long delta;
    > - unsigned long flags;
    > -
    > - /* Only let one CPU complain about others per time interval. */
    > -
    > - spin_lock_irqsave(&rcp->lock, flags);
    > - delta = get_seconds() - rcp->gp_check;
    > - if (delta < 2L || cpus_empty(rcp->cpumask)) {
    > - spin_unlock(&rcp->lock);
    > - return;
    > - }
    > - rcp->gp_check = get_seconds() + 30;
    > - spin_unlock_irqrestore(&rcp->lock, flags);
    > -
    > - /* OK, time to rat on our buddy... */
    > -
    > - printk(KERN_ERR "RCU detected CPU stalls:");
    > - for_each_cpu_mask(cpu, rcp->cpumask)
    > - printk(" %d", cpu);
    > - printk(" (detected by %d, t=%lu/%lu)\n",
    > - smp_processor_id(), get_seconds(), rcp->gp_check);
    > -}
    > -
    > -static void print_cpu_stall(struct rcu_ctrlblk *rcp)
    > -{
    > - unsigned long flags;
    > -
    > - printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
    > - smp_processor_id(), get_seconds(), rcp->gp_check);
    > - dump_stack();
    > - spin_lock_irqsave(&rcp->lock, flags);
    > - if ((long)(get_seconds() - rcp->gp_check) >= 0L)
    > - rcp->gp_check = get_seconds() + 30;
    > - spin_unlock_irqrestore(&rcp->lock, flags);
    > -}
    > -
    > -static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    > -{
    > - long delta;
    > -
    > - delta = get_seconds() - rcp->gp_check;
    > - if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
    > -
    > - /* We haven't checked in, so go dump stack. */
    > -
    > - print_cpu_stall(rcp);
    > -
    > - } else {
    > - if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
    > - /* They had two seconds to dump stack, so complain. */
    > - print_other_cpu_stall(rcp);
    > - }
    > - }
    > -}
    > -
    > -#else /* #ifdef CONFIG_DEBUG_RCU_STALL */
    > -
    > -static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
    > -{
    > -}
    > -
    > -static inline void
    > -check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    > -{
    > -}
    > -
    > -#endif /* #else #ifdef CONFIG_DEBUG_RCU_STALL */
    > -
    > /*
    > * Register a new batch of callbacks, and start it up if there is currently no
    > * active batch and the batch to be registered has not already occurred.
    > @@ -381,7 +384,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
    > if (rcp->cur != rcp->pending &&
    > rcp->completed == rcp->cur) {
    > rcp->cur++;
    > - record_gp_check_time(rcp);
    > + record_gp_stall_check_time(rcp);
    >
    > /*
    > * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
    > @@ -603,7 +606,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
    > static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    > {
    > /* Check for CPU stalls, if enabled. */
    > - check_cpu_stall(rcp, rdp);
    > + check_cpu_stall(rcp);
    >
    > if (rdp->nxtlist) {
    > long completed_snap = ACCESS_ONCE(rcp->completed);
    > @@ -769,6 +772,9 @@ static struct notifier_block __cpuinitdata rcu_nb = {
    > */
    > void __init __rcu_init(void)
    > {
    > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    > + printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
    > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    > rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
    > (void *)(long)smp_processor_id());
    > /* Register notifier for non-boot CPUs */
    > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
    > index 4e921a8..e0e0582 100644
    > --- a/lib/Kconfig.debug
    > +++ b/lib/Kconfig.debug
    > @@ -616,7 +616,7 @@ config RCU_TORTURE_TEST_RUNNABLE
    > Say N here if you want the RCU torture tests to start only
    > after being manually enabled via /proc.
    >
    > -config RCU_CPU_STALL
    > +config RCU_CPU_STALL_DETECTOR
    > bool "Check for stalled CPUs delaying RCU grace periods"
    > depends on CLASSIC_RCU
    > default n


    --
    Mathieu Desnoyers
    OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  11. Re: [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU

    On Fri, Oct 03, 2008 at 12:41:27AM -0400, Mathieu Desnoyers wrote:
    > * Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
    > > Hello!
    > >
    > > This patch adds stalled-CPU detection to Classic RCU. This capability
    > > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, which
    > > defaults disabled. This is a debugging feature to detect infinite loops
    > > in kernel code, not something that non-kernel-hackers would be expected
    > > to care about. This feature can detect looping CPUs in !PREEMPT builds
    > > and looping CPUs with preemption disabled in PREEMPT builds. This is
    > > essentially a port of this functionality from the treercu patch, replacing
    > > the stall debug patch that is already in tip/core/rcu (commit 67182ae1c4).
    > >
    > > The changes from the patch in tip/core/rcu include making the config
    > > variable name match that in treercu, changing from seconds to jiffies to
    > > avoid spurious warnings, and printing a boot message when this feature
    > > is enabled.
    > >

    >
    > Hi Paul,
    >
    > Thanks for the previous explanations. Out of curiosity, what can this
    > patch do that the nmi watchdog can't do ?


    Hello, Mathieu,

    Operate on architectures that don't have NMIs. Possibly allow shorter
    timeouts than the NMI watchdog. Operate on a per-CPU basis (or does
    the NMI watchdog do that these days?).

    Thanx, Paul

    > Mathieu
    >
    > > Signed-off-by: Paul E. McKenney
    > > ---
    > >
    > > include/linux/rcuclassic.h | 12 ++-
    > > kernel/rcuclassic.c | 166 +++++++++++++++++++++++----------------------
    > > lib/Kconfig.debug | 2
    > > 3 files changed, 96 insertions(+), 84 deletions(-)
    > >
    > > diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
    > > index 29bf528..2d72d20 100644
    > > --- a/include/linux/rcuclassic.h
    > > +++ b/include/linux/rcuclassic.h
    > > @@ -40,15 +40,21 @@
    > > #include
    > > #include
    > >
    > > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    > > +#define RCU_SECONDS_TILL_STALL_CHECK 3 * HZ /* for rcp->jiffies_stall */
    > > +#define RCU_SECONDS_TILL_STALL_RECHECK 30 * HZ /* for rcp->jiffies_stall */
    > > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    > >
    > > /* Global control variables for rcupdate callback mechanism. */
    > > struct rcu_ctrlblk {
    > > long cur; /* Current batch number. */
    > > long completed; /* Number of the last completed batch */
    > > long pending; /* Number of the last pending batch */
    > > -#ifdef CONFIG_DEBUG_RCU_STALL
    > > - unsigned long gp_check; /* Time grace period should end, in seconds. */
    > > -#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
    > > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    > > + unsigned long gp_start; /* Time at which GP started in jiffies. */
    > > + unsigned long jiffies_stall;
    > > + /* Time at which to check for CPU stalls. */
    > > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    > >
    > > int signaled;
    > >
    > > diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
    > > index ed15128..eae2fb6 100644
    > > --- a/kernel/rcuclassic.c
    > > +++ b/kernel/rcuclassic.c
    > > @@ -164,6 +164,87 @@ static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
    > > }
    > > }
    > >
    > > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    > > +
    > > +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    > > +{
    > > + rcp->gp_start = jiffies;
    > > + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
    > > +}
    > > +
    > > +static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
    > > +{
    > > + int cpu;
    > > + long delta;
    > > + unsigned long flags;
    > > +
    > > + /* Only let one CPU complain about others per time interval. */
    > > +
    > > + spin_lock_irqsave(&rcp->lock, flags);
    > > + delta = jiffies - rcp->jiffies_stall;
    > > + if (delta < 2 || rcp->cur != rcp->completed) {
    > > + spin_unlock_irqrestore(&rcp->lock, flags);
    > > + return;
    > > + }
    > > + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
    > > + spin_unlock_irqrestore(&rcp->lock, flags);
    > > +
    > > + /* OK, time to rat on our buddy... */
    > > +
    > > + printk(KERN_ERR "RCU detected CPU stalls:");
    > > + for_each_possible_cpu(cpu) {
    > > + if (cpu_isset(cpu, rcp->cpumask))
    > > + printk(" %d", cpu);
    > > + }
    > > + printk(" (detected by %d, t=%ld jiffies)\n",
    > > + smp_processor_id(), (long)(jiffies - rcp->gp_start));
    > > +}
    > > +
    > > +static void print_cpu_stall(struct rcu_ctrlblk *rcp)
    > > +{
    > > + unsigned long flags;
    > > +
    > > + printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
    > > + smp_processor_id(), jiffies,
    > > + jiffies - rcp->gp_start);
    > > + dump_stack();
    > > + spin_lock_irqsave(&rcp->lock, flags);
    > > + if ((long)(jiffies - rcp->jiffies_stall) >= 0)
    > > + rcp->jiffies_stall =
    > > + jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
    > > + spin_unlock_irqrestore(&rcp->lock, flags);
    > > + set_need_resched(); /* kick ourselves to get things going. */
    > > +}
    > > +
    > > +static void check_cpu_stall(struct rcu_ctrlblk *rcp)
    > > +{
    > > + long delta;
    > > +
    > > + delta = jiffies - rcp->jiffies_stall;
    > > + if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
    > > +
    > > + /* We haven't checked in, so go dump stack. */
    > > + print_cpu_stall(rcp);
    > > +
    > > + } else if (rcp->cur != rcp->completed && delta >= 2) {
    > > +
    > > + /* They had two seconds to dump stack, so complain. */
    > > + print_other_cpu_stall(rcp);
    > > + }
    > > +}
    > > +
    > > +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    > > +
    > > +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    > > +{
    > > +}
    > > +
    > > +static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    > > +{
    > > +}
    > > +
    > > +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    > > +
    > > /**
    > > * call_rcu - Queue an RCU callback for invocation after a grace period.
    > > * @head: structure to be used for queueing the RCU updates.
    > > @@ -293,84 +374,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
    > > * period (if necessary).
    > > */
    > >
    > > -#ifdef CONFIG_DEBUG_RCU_STALL
    > > -
    > > -static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
    > > -{
    > > - rcp->gp_check = get_seconds() + 3;
    > > -}
    > > -
    > > -static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
    > > -{
    > > - int cpu;
    > > - long delta;
    > > - unsigned long flags;
    > > -
    > > - /* Only let one CPU complain about others per time interval. */
    > > -
    > > - spin_lock_irqsave(&rcp->lock, flags);
    > > - delta = get_seconds() - rcp->gp_check;
    > > - if (delta < 2L || cpus_empty(rcp->cpumask)) {
    > > - spin_unlock(&rcp->lock);
    > > - return;
    > > - }
    > > - rcp->gp_check = get_seconds() + 30;
    > > - spin_unlock_irqrestore(&rcp->lock, flags);
    > > -
    > > - /* OK, time to rat on our buddy... */
    > > -
    > > - printk(KERN_ERR "RCU detected CPU stalls:");
    > > - for_each_cpu_mask(cpu, rcp->cpumask)
    > > - printk(" %d", cpu);
    > > - printk(" (detected by %d, t=%lu/%lu)\n",
    > > - smp_processor_id(), get_seconds(), rcp->gp_check);
    > > -}
    > > -
    > > -static void print_cpu_stall(struct rcu_ctrlblk *rcp)
    > > -{
    > > - unsigned long flags;
    > > -
    > > - printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
    > > - smp_processor_id(), get_seconds(), rcp->gp_check);
    > > - dump_stack();
    > > - spin_lock_irqsave(&rcp->lock, flags);
    > > - if ((long)(get_seconds() - rcp->gp_check) >= 0L)
    > > - rcp->gp_check = get_seconds() + 30;
    > > - spin_unlock_irqrestore(&rcp->lock, flags);
    > > -}
    > > -
    > > -static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    > > -{
    > > - long delta;
    > > -
    > > - delta = get_seconds() - rcp->gp_check;
    > > - if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
    > > -
    > > - /* We haven't checked in, so go dump stack. */
    > > -
    > > - print_cpu_stall(rcp);
    > > -
    > > - } else {
    > > - if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
    > > - /* They had two seconds to dump stack, so complain. */
    > > - print_other_cpu_stall(rcp);
    > > - }
    > > - }
    > > -}
    > > -
    > > -#else /* #ifdef CONFIG_DEBUG_RCU_STALL */
    > > -
    > > -static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
    > > -{
    > > -}
    > > -
    > > -static inline void
    > > -check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    > > -{
    > > -}
    > > -
    > > -#endif /* #else #ifdef CONFIG_DEBUG_RCU_STALL */
    > > -
    > > /*
    > > * Register a new batch of callbacks, and start it up if there is currently no
    > > * active batch and the batch to be registered has not already occurred.
    > > @@ -381,7 +384,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
    > > if (rcp->cur != rcp->pending &&
    > > rcp->completed == rcp->cur) {
    > > rcp->cur++;
    > > - record_gp_check_time(rcp);
    > > + record_gp_stall_check_time(rcp);
    > >
    > > /*
    > > * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
    > > @@ -603,7 +606,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
    > > static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    > > {
    > > /* Check for CPU stalls, if enabled. */
    > > - check_cpu_stall(rcp, rdp);
    > > + check_cpu_stall(rcp);
    > >
    > > if (rdp->nxtlist) {
    > > long completed_snap = ACCESS_ONCE(rcp->completed);
    > > @@ -769,6 +772,9 @@ static struct notifier_block __cpuinitdata rcu_nb = {
    > > */
    > > void __init __rcu_init(void)
    > > {
    > > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    > > + printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
    > > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    > > rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
    > > (void *)(long)smp_processor_id());
    > > /* Register notifier for non-boot CPUs */
    > > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
    > > index 4e921a8..e0e0582 100644
    > > --- a/lib/Kconfig.debug
    > > +++ b/lib/Kconfig.debug
    > > @@ -616,7 +616,7 @@ config RCU_TORTURE_TEST_RUNNABLE
    > > Say N here if you want the RCU torture tests to start only
    > > after being manually enabled via /proc.
    > >
    > > -config RCU_CPU_STALL
    > > +config RCU_CPU_STALL_DETECTOR
    > > bool "Check for stalled CPUs delaying RCU grace periods"
    > > depends on CLASSIC_RCU
    > > default n

    >
    > --
    > Mathieu Desnoyers
    > OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  12. [PATCH] rcu: RCU-based detection of stalled CPUs for Classic RCU, fix


    -tip testing found a minor build bug - find the fix below.

    Ingo

    -------------------->
    From 2ec2b482b10a1ed3493c224f1893cddd3d33833b Mon Sep 17 00:00:00 2001
    From: Ingo Molnar
    Date: Fri, 3 Oct 2008 10:41:00 +0200
    Subject: [PATCH] rcu: RCU-based detection of stalled CPUs for Classic RCU, fix

    fix the !CONFIG_RCU_CPU_STALL_DETECTOR path:

    kernel/rcuclassic.c: In function '__rcu_pending':
    kernel/rcuclassic.c:609: error: too few arguments to function 'check_cpu_stall'

    Signed-off-by: Ingo Molnar
    ---
    kernel/rcuclassic.c | 2 +-
    1 files changed, 1 insertions(+), 1 deletions(-)

    diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
    index 0d07e6e..37f72e5 100644
    --- a/kernel/rcuclassic.c
    +++ b/kernel/rcuclassic.c
    @@ -239,7 +239,7 @@ static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    {
    }

    -static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    +static inline void check_cpu_stall(struct rcu_ctrlblk *rcp)
    {
    }

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  13. Re: [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU


    * Paul E. McKenney wrote:

    > Hello!
    >
    > This patch adds stalled-CPU detection to Classic RCU. This capability
    > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR,
    > which defaults disabled. This is a debugging feature to detect
    > infinite loops in kernel code, not something that non-kernel-hackers
    > would be expected to care about. This feature can detect looping CPUs
    > in !PREEMPT builds and looping CPUs with preemption disabled in
    > PREEMPT builds. This is essentially a port of this functionality from
    > the treercu patch, replacing the stall debug patch that is already in
    > tip/core/rcu (commit 67182ae1c4).
    >
    > The changes from the patch in tip/core/rcu include making the config
    > variable name match that in treercu, changing from seconds to jiffies
    > to avoid spurious warnings, and printing a boot message when this
    > feature is enabled.
    >
    > Signed-off-by: Paul E. McKenney


    applied to tip/core/rcu, thanks Paul!

    i tidied up two tiny details noticed by scripts/checkpatch.pl:

    > +#define RCU_SECONDS_TILL_STALL_CHECK 3 * HZ /* for rcp->jiffies_stall */
    > +#define RCU_SECONDS_TILL_STALL_RECHECK 30 * HZ /* for rcp->jiffies_stall */


    adding paranthesis around the expression.

    > + if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
    > +


    and have removed the two tailing tabs from this line.

    Ingo
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  14. Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU

    I found a magic number in it.

    Paul E. McKenney wrote:
    > Hello again!
    >
    > This patch adds stalled-CPU detection to Classic RCU. This capability
    > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR,
    > which defaults disabled. This is a debugging feature, not something
    > that non-kernel-hackers would be expected to care about. This feature
    > can detect looping CPUs in !PREEMPT builds and looping CPUs with
    > preemption disabled in PREEMPT builds. This is essentially a port of
    > this functionality from the treercu patch.
    >
    > This version uses jiffies rather than get_seconds(), which eliminates
    > the spurious boot-time CPU stall warnings seen on some systems with
    > the previous patch.
    >
    > This is still against v2.6.27-rc8 -- I will do a version against tip
    > this evening (Pacific Time) when I get back to the combination of better
    > bandwidth and AC power.
    >
    > Signed-off-by: Paul E. McKenney
    > ---
    >
    > include/linux/rcuclassic.h | 9 ++++
    > kernel/rcuclassic.c | 88 +++++++++++++++++++++++++++++++++++++++++++++
    > lib/Kconfig.debug | 13 ++++++
    > 3 files changed, 110 insertions(+)
    >
    > diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
    > index 4ab8436..cab055b 100644
    > --- a/include/linux/rcuclassic.h
    > +++ b/include/linux/rcuclassic.h
    > @@ -40,6 +40,10 @@
    > #include
    > #include
    >
    > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    > +#define RCU_SECONDS_TILL_STALL_CHECK 3 * HZ /* for rcp->jiffies_stall */
    > +#define RCU_SECONDS_TILL_STALL_RECHECK 30 * HZ /* for rcp->jiffies_stall */
    > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    >
    > /* Global control variables for rcupdate callback mechanism. */
    > struct rcu_ctrlblk {
    > @@ -52,6 +56,11 @@ struct rcu_ctrlblk {
    > spinlock_t lock ____cacheline_internodealigned_in_smp;
    > cpumask_t cpumask; /* CPUs that need to switch in order */
    > /* for current batch to proceed. */
    > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    > + unsigned long gp_start; /* Time at which GP started in jiffies. */
    > + unsigned long jiffies_stall;
    > + /* Time at which to check for CPU stalls. */
    > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    > } ____cacheline_internodealigned_in_smp;
    >
    > /* Is batch a before batch b ? */
    > diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
    > index aad93cd..a299876 100644
    > --- a/kernel/rcuclassic.c
    > +++ b/kernel/rcuclassic.c
    > @@ -118,6 +118,87 @@ static inline void force_quiescent_state(struct rcu_data *rdp,
    > }
    > #endif
    >
    > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    > +
    > +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    > +{
    > + rcp->gp_start = jiffies;
    > + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
    > +}
    > +
    > +static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
    > +{
    > + int cpu;
    > + long delta;
    > + unsigned long flags;
    > +
    > + /* Only let one CPU complain about others per time interval. */
    > +
    > + spin_lock_irqsave(&rcp->lock, flags);
    > + delta = jiffies - rcp->jiffies_stall;
    > + if (delta < 2 || rcp->cur != rcp->completed) {


    Is it (2 * HZ)?
    should it be defined as macro?

    > + spin_unlock_irqrestore(&rcp->lock, flags);
    > + return;
    > + }
    > + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
    > + spin_unlock_irqrestore(&rcp->lock, flags);
    > +
    > + /* OK, time to rat on our buddy... */
    > +
    > + printk(KERN_ERR "RCU detected CPU stalls:");
    > + for_each_possible_cpu(cpu) {
    > + if (cpu_isset(cpu, rcp->cpumask))
    > + printk(" %d", cpu);
    > + }
    > + printk(" (detected by %d, t=%ld jiffies)\n",
    > + smp_processor_id(), (long)(jiffies - rcp->gp_start));
    > +}
    > +
    > +static void print_cpu_stall(struct rcu_ctrlblk *rcp)
    > +{
    > + unsigned long flags;
    > +
    > + printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
    > + smp_processor_id(), jiffies,
    > + jiffies - rcp->gp_start);
    > + dump_stack();
    > + spin_lock_irqsave(&rcp->lock, flags);
    > + if ((long)(jiffies - rcp->jiffies_stall) >= 0)
    > + rcp->jiffies_stall =
    > + jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
    > + spin_unlock_irqrestore(&rcp->lock, flags);
    > + set_need_resched(); /* kick ourselves to get things going. */
    > +}
    > +
    > +static void check_cpu_stall(struct rcu_ctrlblk *rcp)
    > +{
    > + long delta;
    > +
    > + delta = jiffies - rcp->jiffies_stall;
    > + if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
    > +
    > + /* We haven't checked in, so go dump stack. */
    > + print_cpu_stall(rcp);
    > +
    > + } else if (rcp->cur != rcp->completed && delta >= 2) {
    > +
    > + /* They had two seconds to dump stack, so complain. */


    appear here again! and it's inconsistent with comment.

    > + print_other_cpu_stall(rcp);
    > + }
    > +}
    > +
    > +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    > +
    > +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    > +{
    > +}
    > +
    > +static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    > +{
    > +}
    > +
    > +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    > +
    > /**
    > * call_rcu - Queue an RCU callback for invocation after a grace period.
    > * @head: structure to be used for queueing the RCU updates.
    > @@ -285,6 +366,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
    > */
    > smp_wmb();
    > rcp->cur++;
    > + record_gp_stall_check_time(rcp);
    >
    > /*
    > * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
    > @@ -468,6 +550,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
    >
    > static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    > {
    > + /* Check for CPU stalls, if enabled. */
    > + check_cpu_stall(rcp);
    > +
    > /* This cpu has pending rcu entries and the grace period
    > * for them has completed.
    > */
    > @@ -558,6 +643,9 @@ void rcu_check_callbacks(int cpu, int user)
    > static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
    > struct rcu_data *rdp)
    > {
    > +#ifdef CONFIG_DEBUG_RCU_STALL
    > + printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
    > +#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
    > memset(rdp, 0, sizeof(*rdp));
    > rdp->curtail = &rdp->curlist;
    > rdp->nxttail = &rdp->nxtlist;
    > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
    > index 0b50481..9fee969 100644
    > --- a/lib/Kconfig.debug
    > +++ b/lib/Kconfig.debug
    > @@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
    > Say N here if you want the RCU torture tests to start only
    > after being manually enabled via /proc.
    >
    > +config RCU_CPU_STALL_DETECTOR
    > + bool "Check for stalled CPUs delaying RCU grace periods"
    > + depends on CLASSIC_RCU
    > + default n
    > + help
    > + This option causes RCU to printk information on which
    > + CPUs are delaying the current grace period, but only when
    > + the grace period extends for excessive time periods.
    > +
    > + Say Y if you want RCU to perform such checks.
    > +
    > + Say N if you are unsure.
    > +
    > config KPROBES_SANITY_TEST
    > bool "Kprobes sanity tests"
    > depends on DEBUG_KERNEL
    >
    >
    >



    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  15. Re: [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU

    On Fri, Oct 03, 2008 at 10:39:47AM +0200, Ingo Molnar wrote:
    >
    > * Paul E. McKenney wrote:
    >
    > > Hello!
    > >
    > > This patch adds stalled-CPU detection to Classic RCU. This capability
    > > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR,
    > > which defaults disabled. This is a debugging feature to detect
    > > infinite loops in kernel code, not something that non-kernel-hackers
    > > would be expected to care about. This feature can detect looping CPUs
    > > in !PREEMPT builds and looping CPUs with preemption disabled in
    > > PREEMPT builds. This is essentially a port of this functionality from
    > > the treercu patch, replacing the stall debug patch that is already in
    > > tip/core/rcu (commit 67182ae1c4).
    > >
    > > The changes from the patch in tip/core/rcu include making the config
    > > variable name match that in treercu, changing from seconds to jiffies
    > > to avoid spurious warnings, and printing a boot message when this
    > > feature is enabled.
    > >
    > > Signed-off-by: Paul E. McKenney

    >
    > applied to tip/core/rcu, thanks Paul!
    >
    > i tidied up two tiny details noticed by scripts/checkpatch.pl:
    >
    > > +#define RCU_SECONDS_TILL_STALL_CHECK 3 * HZ /* for rcp->jiffies_stall */
    > > +#define RCU_SECONDS_TILL_STALL_RECHECK 30 * HZ /* for rcp->jiffies_stall */

    >
    > adding paranthesis around the expression.
    >
    > > + if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
    > > +

    >
    > and have removed the two tailing tabs from this line.


    checkpatch.pl... I have heard of it! :-)

    Thanx, Paul
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  16. Re: [PATCH] rcu: RCU-based detection of stalled CPUs for Classic RCU, fix

    On Fri, Oct 03, 2008 at 10:42:33AM +0200, Ingo Molnar wrote:
    >
    > -tip testing found a minor build bug - find the fix below.
    >
    > Ingo
    >
    > -------------------->
    > >From 2ec2b482b10a1ed3493c224f1893cddd3d33833b Mon Sep 17 00:00:00 2001

    > From: Ingo Molnar
    > Date: Fri, 3 Oct 2008 10:41:00 +0200
    > Subject: [PATCH] rcu: RCU-based detection of stalled CPUs for Classic RCU, fix
    >
    > fix the !CONFIG_RCU_CPU_STALL_DETECTOR path:
    >
    > kernel/rcuclassic.c: In function '__rcu_pending':
    > kernel/rcuclassic.c:609: error: too few arguments to function 'check_cpu_stall'
    >
    > Signed-off-by: Ingo Molnar
    > ---
    > kernel/rcuclassic.c | 2 +-
    > 1 files changed, 1 insertions(+), 1 deletions(-)
    >
    > diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
    > index 0d07e6e..37f72e5 100644
    > --- a/kernel/rcuclassic.c
    > +++ b/kernel/rcuclassic.c
    > @@ -239,7 +239,7 @@ static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    > {
    > }
    >
    > -static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    > +static inline void check_cpu_stall(struct rcu_ctrlblk *rcp)
    > {
    > }


    I should have learned by now to test with all relevant config parameter
    combinations... My apologies for the hassle!!!

    Thanx, Paul
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  17. Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU

    On Fri, Oct 03, 2008 at 06:12:28PM +0800, Lai Jiangshan wrote:
    > I found a magic number in it.


    Good catch, Jiangshan!!! See below for responses, and please see the
    end of this email for an untested patch.

    > Paul E. McKenney wrote:
    > > Hello again!
    > >
    > > This patch adds stalled-CPU detection to Classic RCU. This capability
    > > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR,
    > > which defaults disabled. This is a debugging feature, not something
    > > that non-kernel-hackers would be expected to care about. This feature
    > > can detect looping CPUs in !PREEMPT builds and looping CPUs with
    > > preemption disabled in PREEMPT builds. This is essentially a port of
    > > this functionality from the treercu patch.
    > >
    > > This version uses jiffies rather than get_seconds(), which eliminates
    > > the spurious boot-time CPU stall warnings seen on some systems with
    > > the previous patch.
    > >
    > > This is still against v2.6.27-rc8 -- I will do a version against tip
    > > this evening (Pacific Time) when I get back to the combination of better
    > > bandwidth and AC power.
    > >
    > > Signed-off-by: Paul E. McKenney
    > > ---
    > >
    > > include/linux/rcuclassic.h | 9 ++++
    > > kernel/rcuclassic.c | 88 +++++++++++++++++++++++++++++++++++++++++++++
    > > lib/Kconfig.debug | 13 ++++++
    > > 3 files changed, 110 insertions(+)
    > >
    > > diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
    > > index 4ab8436..cab055b 100644
    > > --- a/include/linux/rcuclassic.h
    > > +++ b/include/linux/rcuclassic.h
    > > @@ -40,6 +40,10 @@
    > > #include
    > > #include
    > >
    > > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    > > +#define RCU_SECONDS_TILL_STALL_CHECK 3 * HZ /* for rcp->jiffies_stall */
    > > +#define RCU_SECONDS_TILL_STALL_RECHECK 30 * HZ /* for rcp->jiffies_stall */
    > > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    > >
    > > /* Global control variables for rcupdate callback mechanism. */
    > > struct rcu_ctrlblk {
    > > @@ -52,6 +56,11 @@ struct rcu_ctrlblk {
    > > spinlock_t lock ____cacheline_internodealigned_in_smp;
    > > cpumask_t cpumask; /* CPUs that need to switch in order */
    > > /* for current batch to proceed. */
    > > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    > > + unsigned long gp_start; /* Time at which GP started in jiffies. */
    > > + unsigned long jiffies_stall;
    > > + /* Time at which to check for CPU stalls. */
    > > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    > > } ____cacheline_internodealigned_in_smp;
    > >
    > > /* Is batch a before batch b ? */
    > > diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
    > > index aad93cd..a299876 100644
    > > --- a/kernel/rcuclassic.c
    > > +++ b/kernel/rcuclassic.c
    > > @@ -118,6 +118,87 @@ static inline void force_quiescent_state(struct rcu_data *rdp,
    > > }
    > > #endif
    > >
    > > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    > > +
    > > +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    > > +{
    > > + rcp->gp_start = jiffies;
    > > + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
    > > +}
    > > +
    > > +static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
    > > +{
    > > + int cpu;
    > > + long delta;
    > > + unsigned long flags;
    > > +
    > > + /* Only let one CPU complain about others per time interval. */
    > > +
    > > + spin_lock_irqsave(&rcp->lock, flags);
    > > + delta = jiffies - rcp->jiffies_stall;
    > > + if (delta < 2 || rcp->cur != rcp->completed) {

    >
    > Is it (2 * HZ)?
    > should it be defined as macro?


    It should be "2", though a macro might be good. The reason for "2" is
    that it guarantees that the other CPU had a full period to respond, so
    it would be "2" regardless of the time units.

    > > + spin_unlock_irqrestore(&rcp->lock, flags);
    > > + return;
    > > + }
    > > + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
    > > + spin_unlock_irqrestore(&rcp->lock, flags);
    > > +
    > > + /* OK, time to rat on our buddy... */
    > > +
    > > + printk(KERN_ERR "RCU detected CPU stalls:");
    > > + for_each_possible_cpu(cpu) {
    > > + if (cpu_isset(cpu, rcp->cpumask))
    > > + printk(" %d", cpu);
    > > + }
    > > + printk(" (detected by %d, t=%ld jiffies)\n",
    > > + smp_processor_id(), (long)(jiffies - rcp->gp_start));
    > > +}
    > > +
    > > +static void print_cpu_stall(struct rcu_ctrlblk *rcp)
    > > +{
    > > + unsigned long flags;
    > > +
    > > + printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
    > > + smp_processor_id(), jiffies,
    > > + jiffies - rcp->gp_start);
    > > + dump_stack();
    > > + spin_lock_irqsave(&rcp->lock, flags);
    > > + if ((long)(jiffies - rcp->jiffies_stall) >= 0)
    > > + rcp->jiffies_stall =
    > > + jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
    > > + spin_unlock_irqrestore(&rcp->lock, flags);
    > > + set_need_resched(); /* kick ourselves to get things going. */
    > > +}
    > > +
    > > +static void check_cpu_stall(struct rcu_ctrlblk *rcp)
    > > +{
    > > + long delta;
    > > +
    > > + delta = jiffies - rcp->jiffies_stall;
    > > + if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
    > > +
    > > + /* We haven't checked in, so go dump stack. */
    > > + print_cpu_stall(rcp);
    > > +
    > > + } else if (rcp->cur != rcp->completed && delta >= 2) {
    > > +
    > > + /* They had two seconds to dump stack, so complain. */

    >
    > appear here again! and it's inconsistent with comment.


    Indeed! The comment is wrong.

    > > + print_other_cpu_stall(rcp);
    > > + }
    > > +}
    > > +
    > > +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    > > +
    > > +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
    > > +{
    > > +}
    > > +
    > > +static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    > > +{
    > > +}
    > > +
    > > +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
    > > +
    > > /**
    > > * call_rcu - Queue an RCU callback for invocation after a grace period.
    > > * @head: structure to be used for queueing the RCU updates.
    > > @@ -285,6 +366,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
    > > */
    > > smp_wmb();
    > > rcp->cur++;
    > > + record_gp_stall_check_time(rcp);
    > >
    > > /*
    > > * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
    > > @@ -468,6 +550,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
    > >
    > > static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
    > > {
    > > + /* Check for CPU stalls, if enabled. */
    > > + check_cpu_stall(rcp);
    > > +
    > > /* This cpu has pending rcu entries and the grace period
    > > * for them has completed.
    > > */
    > > @@ -558,6 +643,9 @@ void rcu_check_callbacks(int cpu, int user)
    > > static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
    > > struct rcu_data *rdp)
    > > {
    > > +#ifdef CONFIG_DEBUG_RCU_STALL
    > > + printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
    > > +#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
    > > memset(rdp, 0, sizeof(*rdp));
    > > rdp->curtail = &rdp->curlist;
    > > rdp->nxttail = &rdp->nxtlist;
    > > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
    > > index 0b50481..9fee969 100644
    > > --- a/lib/Kconfig.debug
    > > +++ b/lib/Kconfig.debug
    > > @@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
    > > Say N here if you want the RCU torture tests to start only
    > > after being manually enabled via /proc.
    > >
    > > +config RCU_CPU_STALL_DETECTOR
    > > + bool "Check for stalled CPUs delaying RCU grace periods"
    > > + depends on CLASSIC_RCU
    > > + default n
    > > + help
    > > + This option causes RCU to printk information on which
    > > + CPUs are delaying the current grace period, but only when
    > > + the grace period extends for excessive time periods.
    > > +
    > > + Say Y if you want RCU to perform such checks.
    > > +
    > > + Say N if you are unsure.
    > > +
    > > config KPROBES_SANITY_TEST
    > > bool "Kprobes sanity tests"
    > > depends on DEBUG_KERNEL


    Signed-off-by: Paul E. McKenney
    ---

    include/linux/rcuclassic.h | 4 ++++
    kernel/rcuclassic.c | 6 +++---
    2 files changed, 7 insertions(+), 3 deletions(-)

    diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
    index 8388647..69e1929 100644
    --- a/include/linux/rcuclassic.h
    +++ b/include/linux/rcuclassic.h
    @@ -43,6 +43,10 @@
    #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
    #define RCU_SECONDS_TILL_STALL_CHECK (3 * HZ) /* for rcp->jiffies_stall */
    #define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */
    +#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
    + /* to take at least one */
    + /* scheduling clock irq */
    + /* before ratting on them. */
    #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */

    /* Global control variables for rcupdate callback mechanism. */
    diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
    index 0ce0802..8c2d15d 100644
    --- a/kernel/rcuclassic.c
    +++ b/kernel/rcuclassic.c
    @@ -136,7 +136,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)

    spin_lock_irqsave(&rcp->lock, flags);
    delta = jiffies - rcp->seconds_stall;
    - if (delta < 2 || rcp->cur != rcp->completed) {
    + if (delta < RCU_STALL_RAT_DELAY || rcp->cur != rcp->completed) {
    spin_unlock_irqrestore(&rcp->lock, flags);
    return;
    }
    @@ -180,9 +180,9 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
    /* We haven't checked in, so go dump stack. */
    print_cpu_stall(rcp);

    - } else if (rcp->cur != rcp->completed && delta >= 2) {
    + } else if (rcp->cur != rcp->completed && delta >= RCU_STALL_RAT_DELAY) {

    - /* They had two seconds to dump stack, so complain. */
    + /* They had two time units to dump stack, so complain. */
    print_other_cpu_stall(rcp);
    }
    }
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread