[PATCH, RFC] hacks to allow -rt to run kernbench on POWER - Kernel

This is a discussion on [PATCH, RFC] hacks to allow -rt to run kernbench on POWER - Kernel ; Hello! A few random patches that permit POWER to pass kernbench on -rt. Many of these have more focus on expediency than care for correctness, so might best be thought of as workarounds than as complete solutions. There are still ...

+ Reply to Thread
Results 1 to 12 of 12

Thread: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

  1. [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

    Hello!

    A few random patches that permit POWER to pass kernbench on -rt.
    Many of these have more focus on expediency than care for correctness,
    so might best be thought of as workarounds than as complete solutions.
    There are still issues not addressed by this patch, including:

    o kmem_cache_alloc() from non-preemptible context during
    bootup (xics_startup() building the irq_radix_revmap()).

    o unmap_vmas() freeing pages with preemption disabled.
    Might be able to address this by linking the pages together,
    then freeing them en masse after preemption has been re-enabled,
    but there is likely a better approach.

    Thoughts?

    Signed-off-by: Paul E. McKenney
    ---

    arch/powerpc/kernel/prom.c | 2 +-
    arch/powerpc/mm/fault.c | 3 +++
    arch/powerpc/mm/tlb_64.c | 8 ++++++--
    arch/powerpc/platforms/pseries/eeh.c | 2 +-
    drivers/of/base.c | 2 +-
    include/asm-powerpc/tlb.h | 5 ++++-
    include/asm-powerpc/tlbflush.h | 5 ++++-
    mm/memory.c | 2 ++
    8 files changed, 22 insertions(+), 7 deletions(-)

    diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/kernel/prom.c linux-2.6.23.1-rt4-fix/arch/powerpc/kernel/prom.c
    --- linux-2.6.23.1-rt4/arch/powerpc/kernel/prom.c 2007-10-12 09:43:44.000000000 -0700
    +++ linux-2.6.23.1-rt4-fix/arch/powerpc/kernel/prom.c 2007-10-28 13:37:23.000000000 -0700
    @@ -80,7 +80,7 @@ struct boot_param_header *initial_boot_p

    extern struct device_node *allnodes; /* temporary while merging */

    -extern rwlock_t devtree_lock; /* temporary while merging */
    +extern raw_rwlock_t devtree_lock; /* temporary while merging */

    /* export that to outside world */
    struct device_node *of_chosen;
    diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/mm/fault.c linux-2.6.23.1-rt4-fix/arch/powerpc/mm/fault.c
    --- linux-2.6.23.1-rt4/arch/powerpc/mm/fault.c 2007-10-27 22:20:57.000000000 -0700
    +++ linux-2.6.23.1-rt4-fix/arch/powerpc/mm/fault.c 2007-10-28 13:49:07.000000000 -0700
    @@ -301,6 +301,7 @@ good_area:
    if (get_pteptr(mm, address, &ptep, &pmdp)) {
    spinlock_t *ptl = pte_lockptr(mm, pmdp);
    spin_lock(ptl);
    + preempt_disable();
    if (pte_present(*ptep)) {
    struct page *page = pte_page(*ptep);

    @@ -310,10 +311,12 @@ good_area:
    }
    pte_update(ptep, 0, _PAGE_HWEXEC);
    _tlbie(address);
    + preempt_enable();
    pte_unmap_unlock(ptep, ptl);
    up_read(&mm->mmap_sem);
    return 0;
    }
    + preempt_enable();
    pte_unmap_unlock(ptep, ptl);
    }
    #endif
    diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/mm/tlb_64.c linux-2.6.23.1-rt4-fix/arch/powerpc/mm/tlb_64.c
    --- linux-2.6.23.1-rt4/arch/powerpc/mm/tlb_64.c 2007-10-27 22:20:57.000000000 -0700
    +++ linux-2.6.23.1-rt4-fix/arch/powerpc/mm/tlb_64.c 2007-10-28 13:50:38.000000000 -0700
    @@ -194,7 +194,9 @@ void hpte_need_flush(struct mm_struct *m
    * batch
    */
    if (i != 0 && (mm != batch->mm || batch->psize != psize)) {
    + preempt_disable();
    __flush_tlb_pending(batch);
    + preempt_enable();
    i = 0;
    }
    if (i == 0) {
    @@ -211,7 +213,9 @@ void hpte_need_flush(struct mm_struct *m
    * always flush it on RT to reduce scheduling latency.
    */
    if (machine_is(celleb)) {
    + preempt_disable();
    __flush_tlb_pending(batch);
    + preempt_enable();
    return;
    }
    #endif /* CONFIG_PREEMPT_RT */
    @@ -292,7 +296,7 @@ void __flush_hash_table_range(struct mm_
    * to being hashed). This is not the most performance oriented
    * way to do things but is fine for our needs here.
    */
    - local_irq_save(flags);
    + raw_local_irq_save(flags);
    arch_enter_lazy_mmu_mode();
    for (; start < end; start += PAGE_SIZE) {
    pte_t *ptep = find_linux_pte(mm->pgd, start);
    @@ -306,7 +310,7 @@ void __flush_hash_table_range(struct mm_
    hpte_need_flush(mm, start, ptep, pte, 0);
    }
    arch_leave_lazy_mmu_mode();
    - local_irq_restore(flags);
    + raw_local_irq_restore(flags);
    }

    #endif /* CONFIG_HOTPLUG */
    diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/platforms/pseries/eeh.c linux-2.6.23.1-rt4-fix/arch/powerpc/platforms/pseries/eeh.c
    --- linux-2.6.23.1-rt4/arch/powerpc/platforms/pseries/eeh.c 2007-10-12 09:43:44.000000000 -0700
    +++ linux-2.6.23.1-rt4-fix/arch/powerpc/platforms/pseries/eeh.c 2007-10-28 15:43:54.000000000 -0700
    @@ -97,7 +97,7 @@ int eeh_subsystem_enabled;
    EXPORT_SYMBOL(eeh_subsystem_enabled);

    /* Lock to avoid races due to multiple reports of an error */
    -static DEFINE_SPINLOCK(confirm_error_lock);
    +static DEFINE_RAW_SPINLOCK(confirm_error_lock);

    /* Buffer for reporting slot-error-detail rtas calls. Its here
    * in BSS, and not dynamically alloced, so that it ends up in
    diff -urpNa -X dontdiff linux-2.6.23.1-rt4/drivers/of/base.c linux-2.6.23.1-rt4-fix/drivers/of/base.c
    --- linux-2.6.23.1-rt4/drivers/of/base.c 2007-10-12 09:43:44.000000000 -0700
    +++ linux-2.6.23.1-rt4-fix/drivers/of/base.c 2007-10-28 13:38:36.000000000 -0700
    @@ -25,7 +25,7 @@ struct device_node *allnodes;
    /* use when traversing tree through the allnext, child, sibling,
    * or parent members of struct device_node.
    */
    -DEFINE_RWLOCK(devtree_lock);
    +DEFINE_RAW_RWLOCK(devtree_lock);

    int of_n_addr_cells(struct device_node *np)
    {
    diff -urpNa -X dontdiff linux-2.6.23.1-rt4/include/asm-powerpc/tlbflush.h linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlbflush.h
    --- linux-2.6.23.1-rt4/include/asm-powerpc/tlbflush.h 2007-10-12 09:43:44.000000000 -0700
    +++ linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlbflush.h 2007-10-28 11:36:47.000000000 -0700
    @@ -118,8 +118,11 @@ static inline void arch_leave_lazy_mmu_m
    {
    struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);

    - if (batch->index)
    + if (batch->index) {
    + preempt_disable();
    __flush_tlb_pending(batch);
    + preempt_enable();
    + }
    batch->active = 0;
    }

    diff -urpNa -X dontdiff linux-2.6.23.1-rt4/include/asm-powerpc/tlb.h linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlb.h
    --- linux-2.6.23.1-rt4/include/asm-powerpc/tlb.h 2007-10-12 09:43:44.000000000 -0700
    +++ linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlb.h 2007-10-28 11:36:05.000000000 -0700
    @@ -44,8 +44,11 @@ static inline void tlb_flush(struct mmu_
    * pages are going to be freed and we really don't want to have a CPU
    * access a freed page because it has a stale TLB
    */
    - if (tlbbatch->index)
    + if (tlbbatch->index) {
    + preempt_disable();
    __flush_tlb_pending(tlbbatch);
    + preempt_enable();
    + }

    pte_free_finish();
    }
    diff -urpNa -X dontdiff linux-2.6.23.1-rt4/mm/memory.c linux-2.6.23.1-rt4-fix/mm/memory.c
    --- linux-2.6.23.1-rt4/mm/memory.c 2007-10-27 22:20:57.000000000 -0700
    +++ linux-2.6.23.1-rt4-fix/mm/memory.c 2007-10-28 15:40:36.000000000 -0700
    @@ -664,6 +664,7 @@ static unsigned long zap_pte_range(struc
    int anon_rss = 0;

    pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
    + preempt_disable();
    arch_enter_lazy_mmu_mode();
    do {
    pte_t ptent = *pte;
    @@ -732,6 +733,7 @@ static unsigned long zap_pte_range(struc

    add_mm_rss(mm, file_rss, anon_rss);
    arch_leave_lazy_mmu_mode();
    + preempt_enable();
    pte_unmap_unlock(pte - 1, ptl);

    return addr;
    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER


    On Mon, 2007-10-29 at 11:50 -0700, Paul E. McKenney wrote:
    > Hello!
    >
    > A few random patches that permit POWER to pass kernbench on -rt.
    > Many of these have more focus on expediency than care for correctness,
    > so might best be thought of as workarounds than as complete solutions.
    > There are still issues not addressed by this patch, including:
    >
    > o kmem_cache_alloc() from non-preemptible context during
    > bootup (xics_startup() building the irq_radix_revmap()).
    >
    > o unmap_vmas() freeing pages with preemption disabled.
    > Might be able to address this by linking the pages together,
    > then freeing them en masse after preemption has been re-enabled,
    > but there is likely a better approach.
    >
    > Thoughts?


    I see a lot of case where you add preempt_disable/enable around areas
    that have the PTE lock held...

    So in -rt, spin_lock doesn't disable preempt ? I'm a bit worried...
    there are some strong requirements that anything within that lock is not
    preempted, so zap_pte_ranges() is the obvious ones but all of them would
    need to be addressed.

    Ben.


    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  3. Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

    On Tue, Oct 30, 2007 at 07:07:48AM +1100, Benjamin Herrenschmidt wrote:
    >
    > On Mon, 2007-10-29 at 11:50 -0700, Paul E. McKenney wrote:
    > > Hello!
    > >
    > > A few random patches that permit POWER to pass kernbench on -rt.
    > > Many of these have more focus on expediency than care for correctness,
    > > so might best be thought of as workarounds than as complete solutions.
    > > There are still issues not addressed by this patch, including:
    > >
    > > o kmem_cache_alloc() from non-preemptible context during
    > > bootup (xics_startup() building the irq_radix_revmap()).
    > >
    > > o unmap_vmas() freeing pages with preemption disabled.
    > > Might be able to address this by linking the pages together,
    > > then freeing them en masse after preemption has been re-enabled,
    > > but there is likely a better approach.
    > >
    > > Thoughts?

    >
    > I see a lot of case where you add preempt_disable/enable around areas
    > that have the PTE lock held...
    >
    > So in -rt, spin_lock doesn't disable preempt ? I'm a bit worried...
    > there are some strong requirements that anything within that lock is not
    > preempted, so zap_pte_ranges() is the obvious ones but all of them would
    > need to be addressed.


    Right in one! One of the big changes in -rt is that spinlock critical
    sections (and RCU read-side critical sections, for that matter) are
    preemptible under CONFIG_PREEMPT_RT.

    And I agree that this patchset will have missed quite a few places where
    additional changes are required. Hence the word "including" above, rather
    than something like "specifically". ;-)

    Thanx, Paul
    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  4. Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER


    On Mon, 2007-10-29 at 13:26 -0700, Paul E. McKenney wrote:
    >
    > > I see a lot of case where you add preempt_disable/enable around

    > areas
    > > that have the PTE lock held...
    > >
    > > So in -rt, spin_lock doesn't disable preempt ? I'm a bit worried...
    > > there are some strong requirements that anything within that lock is

    > not
    > > preempted, so zap_pte_ranges() is the obvious ones but all of them

    > would
    > > need to be addressed.

    >
    > Right in one! One of the big changes in -rt is that spinlock critical
    > sections (and RCU read-side critical sections, for that matter) are
    > preemptible under CONFIG_PREEMPT_RT.
    >
    > And I agree that this patchset will have missed quite a few places
    > where
    > additional changes are required. Hence the word "including" above,
    > rather
    > than something like "specifically". ;-)


    Ok, well, I'm pretty familiar with that MM code since I wrote a good
    deal of the current version so I'll try to spend some time with your
    patch have a look. It may have to wait for next week though, but feel
    free to ping me if you don't hear back, in case it falls through the
    hole in my brain :-)

    Ben.


    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  5. Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

    On Tue, Oct 30, 2007 at 07:37:50AM +1100, Benjamin Herrenschmidt wrote:
    >
    > On Mon, 2007-10-29 at 13:26 -0700, Paul E. McKenney wrote:
    > >
    > > > I see a lot of case where you add preempt_disable/enable around

    > > areas
    > > > that have the PTE lock held...
    > > >
    > > > So in -rt, spin_lock doesn't disable preempt ? I'm a bit worried...
    > > > there are some strong requirements that anything within that lock is

    > > not
    > > > preempted, so zap_pte_ranges() is the obvious ones but all of them

    > > would
    > > > need to be addressed.

    > >
    > > Right in one! One of the big changes in -rt is that spinlock critical
    > > sections (and RCU read-side critical sections, for that matter) are
    > > preemptible under CONFIG_PREEMPT_RT.
    > >
    > > And I agree that this patchset will have missed quite a few places
    > > where
    > > additional changes are required. Hence the word "including" above,
    > > rather
    > > than something like "specifically". ;-)

    >
    > Ok, well, I'm pretty familiar with that MM code since I wrote a good
    > deal of the current version so I'll try to spend some time with your
    > patch have a look. It may have to wait for next week though, but feel
    > free to ping me if you don't hear back, in case it falls through the
    > hole in my brain :-)


    Works for me!!!

    Thanx, Paul
    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  6. Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER


    On Tue, 2007-10-30 at 07:07 +1100, Benjamin Herrenschmidt wrote:
    > On Mon, 2007-10-29 at 11:50 -0700, Paul E. McKenney wrote:
    > > Hello!
    > >
    > > A few random patches that permit POWER to pass kernbench on -rt.
    > > Many of these have more focus on expediency than care for correctness,
    > > so might best be thought of as workarounds than as complete solutions.
    > > There are still issues not addressed by this patch, including:
    > >
    > > o kmem_cache_alloc() from non-preemptible context during
    > > bootup (xics_startup() building the irq_radix_revmap()).
    > >
    > > o unmap_vmas() freeing pages with preemption disabled.
    > > Might be able to address this by linking the pages together,
    > > then freeing them en masse after preemption has been re-enabled,
    > > but there is likely a better approach.
    > >
    > > Thoughts?

    >
    > I see a lot of case where you add preempt_disable/enable around areas
    > that have the PTE lock held...
    >
    > So in -rt, spin_lock doesn't disable preempt ? I'm a bit worried...


    So as Paul mentioned, spin_lock is now a mutex. There is a new
    raw_spinlock however (simply change the way it is declared, calling
    conventions are the same) which is used in a very few areas where a
    traditional spin_lock is truly necessary. This may or may not be one of
    those times, but I wanted to point it out.

    --Darren


    > there are some strong requirements that anything within that lock is not
    > preempted, so zap_pte_ranges() is the obvious ones but all of them would
    > need to be addressed.
    >
    > Ben.
    >
    >


    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  7. Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER


    > So as Paul mentioned, spin_lock is now a mutex. There is a new
    > raw_spinlock however (simply change the way it is declared, calling
    > conventions are the same) which is used in a very few areas where a
    > traditional spin_lock is truly necessary. This may or may not be one of
    > those times, but I wanted to point it out.


    Yeah, I figured that. My main worry has more to do with some fishy
    assumptions the powerpc VM code does regarding what can and cannot
    happen in those locked sections, among other things. I'll have to sit
    and think about it for a little while to convince myself we are ok ...
    or not. Plus we do keep track of various MM related things in per-CPU
    data structures but it looks like Paul already spotted that.

    Cheers,
    Ben.


    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  8. Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

    On Thu, Nov 01, 2007 at 08:15:28AM +1100, Benjamin Herrenschmidt wrote:
    >
    > > So as Paul mentioned, spin_lock is now a mutex. There is a new
    > > raw_spinlock however (simply change the way it is declared, calling
    > > conventions are the same) which is used in a very few areas where a
    > > traditional spin_lock is truly necessary. This may or may not be one of
    > > those times, but I wanted to point it out.

    >
    > Yeah, I figured that. My main worry has more to do with some fishy
    > assumptions the powerpc VM code does regarding what can and cannot
    > happen in those locked sections, among other things. I'll have to sit
    > and think about it for a little while to convince myself we are ok ...
    > or not. Plus we do keep track of various MM related things in per-CPU
    > data structures but it looks like Paul already spotted that.


    My concern would be that I failed to spot all of them. ;-)

    Thanx, Paul
    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  9. Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER


    On Mon, 29 Oct 2007, Paul E. McKenney wrote:
    > diff -urpNa -X dontdiff linux-2.6.23.1-rt4/mm/memory.c linux-2.6.23.1-rt4-fix/mm/memory.c
    > --- linux-2.6.23.1-rt4/mm/memory.c 2007-10-27 22:20:57.000000000 -0700
    > +++ linux-2.6.23.1-rt4-fix/mm/memory.c 2007-10-28 15:40:36.000000000 -0700
    > @@ -664,6 +664,7 @@ static unsigned long zap_pte_range(struc
    > int anon_rss = 0;
    >
    > pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
    > + preempt_disable();
    > arch_enter_lazy_mmu_mode();
    > do {
    > pte_t ptent = *pte;
    > @@ -732,6 +733,7 @@ static unsigned long zap_pte_range(struc
    >
    > add_mm_rss(mm, file_rss, anon_rss);
    > arch_leave_lazy_mmu_mode();
    > + preempt_enable();
    > pte_unmap_unlock(pte - 1, ptl);
    >
    > return addr;


    I'm pulling your patch for the above added code. Took me a few hours to
    find the culprit, but I was getting scheduling in atomic bugs. Turns out
    that this code you put "preempt_disable" in calls sleeping spinlocks.

    Might want to run with DEBUG_PREEMPT.

    Thanks,

    -- Steve

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  10. Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

    On Wed, Dec 12, 2007 at 10:56:12PM -0500, Steven Rostedt wrote:
    >
    > On Mon, 29 Oct 2007, Paul E. McKenney wrote:
    > > diff -urpNa -X dontdiff linux-2.6.23.1-rt4/mm/memory.c linux-2.6.23.1-rt4-fix/mm/memory.c
    > > --- linux-2.6.23.1-rt4/mm/memory.c 2007-10-27 22:20:57.000000000 -0700
    > > +++ linux-2.6.23.1-rt4-fix/mm/memory.c 2007-10-28 15:40:36.000000000 -0700
    > > @@ -664,6 +664,7 @@ static unsigned long zap_pte_range(struc
    > > int anon_rss = 0;
    > >
    > > pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
    > > + preempt_disable();
    > > arch_enter_lazy_mmu_mode();
    > > do {
    > > pte_t ptent = *pte;
    > > @@ -732,6 +733,7 @@ static unsigned long zap_pte_range(struc
    > >
    > > add_mm_rss(mm, file_rss, anon_rss);
    > > arch_leave_lazy_mmu_mode();
    > > + preempt_enable();
    > > pte_unmap_unlock(pte - 1, ptl);
    > >
    > > return addr;

    >
    > I'm pulling your patch for the above added code. Took me a few hours to
    > find the culprit, but I was getting scheduling in atomic bugs. Turns out
    > that this code you put "preempt_disable" in calls sleeping spinlocks.
    >
    > Might want to run with DEBUG_PREEMPT.


    I thought that you had already pulled the above version...

    Here is the replacement that I posted on November 9th (with much help
    from Ben H):

    http://lkml.org/lkml/2007/11/9/114

    Thanx, Paul

    Signed-off-by: Paul E. McKenney
    ---

    diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/kernel/process.c linux-2.6.23.1-rt4-fix/arch/powerpc/kernel/process.c
    --- linux-2.6.23.1-rt4/arch/powerpc/kernel/process.c 2007-10-12 09:43:44.000000000 -0700
    +++ linux-2.6.23.1-rt4-fix/arch/powerpc/kernel/process.c 2007-11-12 09:18:55.000000000 -0800
    @@ -245,6 +245,10 @@ struct task_struct *__switch_to(struct t
    struct thread_struct *new_thread, *old_thread;
    unsigned long flags;
    struct task_struct *last;
    +#ifdef CONFIG_PREEMPT_RT
    + struct ppc64_tlb_batch *batch;
    + int hadbatch;
    +#endif /* #ifdef CONFIG_PREEMPT_RT */

    #ifdef CONFIG_SMP
    /* avoid complexity of lazy save/restore of fpu
    @@ -325,6 +329,17 @@ struct task_struct *__switch_to(struct t
    }
    #endif

    +#ifdef CONFIG_PREEMPT_RT
    + batch = &__get_cpu_var(ppc64_tlb_batch);
    + if (batch->active) {
    + hadbatch = 1;
    + if (batch->index) {
    + __flush_tlb_pending(batch);
    + }
    + batch->active = 0;
    + }
    +#endif /* #ifdef CONFIG_PREEMPT_RT */
    +
    local_irq_save(flags);

    account_system_vtime(current);
    @@ -335,6 +350,13 @@ struct task_struct *__switch_to(struct t

    local_irq_restore(flags);

    +#ifdef CONFIG_PREEMPT_RT
    + if (hadbatch) {
    + batch = &__get_cpu_var(ppc64_tlb_batch);
    + batch->active = 1;
    + }
    +#endif /* #ifdef CONFIG_PREEMPT_RT */
    +
    return last;
    }

    diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/kernel/prom.c linux-2.6.23.1-rt4-fix/arch/powerpc/kernel/prom.c
    --- linux-2.6.23.1-rt4/arch/powerpc/kernel/prom.c 2007-10-12 09:43:44.000000000 -0700
    +++ linux-2.6.23.1-rt4-fix/arch/powerpc/kernel/prom.c 2007-10-28 13:37:23.000000000 -0700
    @@ -80,7 +80,7 @@ struct boot_param_header *initial_boot_p

    extern struct device_node *allnodes; /* temporary while merging */

    -extern rwlock_t devtree_lock; /* temporary while merging */
    +extern raw_rwlock_t devtree_lock; /* temporary while merging */

    /* export that to outside world */
    struct device_node *of_chosen;
    diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/mm/tlb_64.c linux-2.6.23.1-rt4-fix/arch/powerpc/mm/tlb_64.c
    --- linux-2.6.23.1-rt4/arch/powerpc/mm/tlb_64.c 2007-10-27 22:20:57.000000000 -0700
    +++ linux-2.6.23.1-rt4-fix/arch/powerpc/mm/tlb_64.c 2007-11-08 16:49:04.000000000 -0800
    @@ -133,7 +133,7 @@ void pgtable_free_tlb(struct mmu_gather
    void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
    pte_t *ptep, unsigned long pte, int huge)
    {
    - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
    + struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
    unsigned long vsid, vaddr;
    unsigned int psize;
    real_pte_t rpte;
    @@ -180,6 +180,7 @@ void hpte_need_flush(struct mm_struct *m
    */
    if (!batch->active) {
    flush_hash_page(vaddr, rpte, psize, 0);
    + put_cpu_var(ppc64_tlb_batch);
    return;
    }

    @@ -212,12 +213,14 @@ void hpte_need_flush(struct mm_struct *m
    */
    if (machine_is(celleb)) {
    __flush_tlb_pending(batch);
    + put_cpu_var(ppc64_tlb_batch);
    return;
    }
    #endif /* CONFIG_PREEMPT_RT */

    if (i >= PPC64_TLB_BATCH_NR)
    __flush_tlb_pending(batch);
    + put_cpu_var(ppc64_tlb_batch);
    }

    /*
    diff -urpNa -X dontdiff linux-2.6.23.1-rt4/arch/powerpc/platforms/pseries/eeh.c linux-2.6.23.1-rt4-fix/arch/powerpc/platforms/pseries/eeh.c
    --- linux-2.6.23.1-rt4/arch/powerpc/platforms/pseries/eeh.c 2007-10-12 09:43:44.000000000 -0700
    +++ linux-2.6.23.1-rt4-fix/arch/powerpc/platforms/pseries/eeh.c 2007-10-28 15:43:54.000000000 -0700
    @@ -97,7 +97,7 @@ int eeh_subsystem_enabled;
    EXPORT_SYMBOL(eeh_subsystem_enabled);

    /* Lock to avoid races due to multiple reports of an error */
    -static DEFINE_SPINLOCK(confirm_error_lock);
    +static DEFINE_RAW_SPINLOCK(confirm_error_lock);

    /* Buffer for reporting slot-error-detail rtas calls. Its here
    * in BSS, and not dynamically alloced, so that it ends up in
    diff -urpNa -X dontdiff linux-2.6.23.1-rt4/drivers/of/base.c linux-2.6.23.1-rt4-fix/drivers/of/base.c
    --- linux-2.6.23.1-rt4/drivers/of/base.c 2007-10-12 09:43:44.000000000 -0700
    +++ linux-2.6.23.1-rt4-fix/drivers/of/base.c 2007-10-28 13:38:36.000000000 -0700
    @@ -25,7 +25,7 @@ struct device_node *allnodes;
    /* use when traversing tree through the allnext, child, sibling,
    * or parent members of struct device_node.
    */
    -DEFINE_RWLOCK(devtree_lock);
    +DEFINE_RAW_RWLOCK(devtree_lock);

    int of_n_addr_cells(struct device_node *np)
    {
    diff -urpNa -X dontdiff linux-2.6.23.1-rt4/include/asm-powerpc/tlbflush.h linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlbflush.h
    --- linux-2.6.23.1-rt4/include/asm-powerpc/tlbflush.h 2007-10-12 09:43:44.000000000 -0700
    +++ linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlbflush.h 2007-11-08 17:11:18.000000000 -0800
    @@ -109,18 +109,23 @@ extern void hpte_need_flush(struct mm_st

    static inline void arch_enter_lazy_mmu_mode(void)
    {
    - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
    + struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);

    batch->active = 1;
    + put_cpu_var(ppc64_tlb_batch);
    }

    static inline void arch_leave_lazy_mmu_mode(void)
    {
    - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
    + struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);

    - if (batch->index)
    - __flush_tlb_pending(batch);
    - batch->active = 0;
    + if (batch->active) {
    + if (batch->index) {
    + __flush_tlb_pending(batch);
    + }
    + batch->active = 0;
    + }
    + put_cpu_var(ppc64_tlb_batch);
    }

    #define arch_flush_lazy_mmu_mode() do {} while (0)
    diff -urpNa -X dontdiff linux-2.6.23.1-rt4/include/asm-powerpc/tlb.h linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlb.h
    --- linux-2.6.23.1-rt4/include/asm-powerpc/tlb.h 2007-10-12 09:43:44.000000000 -0700
    +++ linux-2.6.23.1-rt4-fix/include/asm-powerpc/tlb.h 2007-10-28 11:36:05.000000000 -0700
    @@ -44,8 +44,11 @@ static inline void tlb_flush(struct mmu_
    * pages are going to be freed and we really don't want to have a CPU
    * access a freed page because it has a stale TLB
    */
    - if (tlbbatch->index)
    + if (tlbbatch->index) {
    + preempt_disable();
    __flush_tlb_pending(tlbbatch);
    + preempt_enable();
    + }

    pte_free_finish();
    }
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  11. Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER



    On Wed, 12 Dec 2007, Paul E. McKenney wrote:

    > >
    > > I'm pulling your patch for the above added code. Took me a few hours to
    > > find the culprit, but I was getting scheduling in atomic bugs. Turns out
    > > that this code you put "preempt_disable" in calls sleeping spinlocks.
    > >
    > > Might want to run with DEBUG_PREEMPT.

    >
    > I thought that you had already pulled the above version...
    >
    > Here is the replacement that I posted on November 9th (with much help
    > from Ben H):
    >
    > http://lkml.org/lkml/2007/11/9/114


    OK, sorry, I somehow got the two reversed, and I think I replaced the new
    one with the old one :-(

    I blame the expresso!

    >
    > Signed-off-by: Paul E. McKenney


    OK, will apply to -rt14

    Thanks,

    -- Steve
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  12. Re: [PATCH, RFC] hacks to allow -rt to run kernbench on POWER

    On Thu, Dec 13, 2007 at 07:52:41AM -0500, Steven Rostedt wrote:
    >
    >
    > On Wed, 12 Dec 2007, Paul E. McKenney wrote:
    >
    > > >
    > > > I'm pulling your patch for the above added code. Took me a few hours to
    > > > find the culprit, but I was getting scheduling in atomic bugs. Turns out
    > > > that this code you put "preempt_disable" in calls sleeping spinlocks.
    > > >
    > > > Might want to run with DEBUG_PREEMPT.

    > >
    > > I thought that you had already pulled the above version...
    > >
    > > Here is the replacement that I posted on November 9th (with much help
    > > from Ben H):
    > >
    > > http://lkml.org/lkml/2007/11/9/114

    >
    > OK, sorry, I somehow got the two reversed, and I think I replaced the new
    > one with the old one :-(


    That sounds like something -I- would do!!! ;-)

    > I blame the expresso!


    If you give -me- espresso, you also have to give me a putty knife so that
    I can scrape myself off of the ceiling!

    > > Signed-off-by: Paul E. McKenney

    >
    > OK, will apply to -rt14


    Thank you!

    Thanx, Paul
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread