[PATCH] bitops: simplify generic bit finding functions - Kernel

This is a discussion on [PATCH] bitops: simplify generic bit finding functions - Kernel ; The mapsize optimizations which were moved from x86 to the generic code in commit 64970b68d2b3ed32b964b0b30b1b98518fde388e increased the binary size on non x86 architectures. Looking into the real effects of the "optimizations" it turned out that they are not used in ...

+ Reply to Thread
Page 2 of 2 FirstFirst 1 2
Results 21 to 27 of 27

Thread: [PATCH] bitops: simplify generic bit finding functions

  1. [PATCH] bitops: remove "optimizations"

    The mapsize optimizations which were moved from x86 to the generic
    code in commit 64970b68d2b3ed32b964b0b30b1b98518fde388e increased the
    binary size on non x86 architectures.

    Looking into the real effects of the "optimizations" it turned out
    that they are not used in find_next_bit() and find_next_zero_bit().

    The ones in find_first_bit() and find_first_zero_bit() are used in a
    couple of places but none of them is a real hot path.

    Remove the "optimizations" all together and call the library functions
    unconditionally.

    Boot-tested on x86 and compile tested on every cross compiler I have.

    Signed-off-by: Thomas Gleixner

    ---
    include/linux/bitops.h | 115 +++++--------------------------------------------
    lib/find_next_bit.c | 22 ++++-----
    2 files changed, 22 insertions(+), 115 deletions(-)

    Index: linux-2.6/include/linux/bitops.h
    ================================================== =================
    --- linux-2.6.orig/include/linux/bitops.h
    +++ linux-2.6/include/linux/bitops.h
    @@ -114,8 +114,6 @@ static inline unsigned fls_long(unsigned

    #ifdef __KERNEL__
    #ifdef CONFIG_GENERIC_FIND_FIRST_BIT
    -extern unsigned long __find_first_bit(const unsigned long *addr,
    - unsigned long size);

    /**
    * find_first_bit - find the first set bit in a memory region
    @@ -124,28 +122,8 @@ extern unsigned long __find_first_bit(co
    *
    * Returns the bit number of the first set bit.
    */
    -static __always_inline unsigned long
    -find_first_bit(const unsigned long *addr, unsigned long size)
    -{
    - /* Avoid a function call if the bitmap size is a constant */
    - /* and not bigger than BITS_PER_LONG. */
    -
    - /* insert a sentinel so that __ffs returns size if there */
    - /* are no set bits in the bitmap */
    - if (__builtin_constant_p(size) && (size < BITS_PER_LONG))
    - return __ffs((*addr) | (1ul << size));
    -
    - /* the result of __ffs(0) is undefined, so it needs to be */
    - /* handled separately */
    - if (__builtin_constant_p(size) && (size == BITS_PER_LONG))
    - return ((*addr) == 0) ? BITS_PER_LONG : __ffs(*addr);
    -
    - /* size is not constant or too big */
    - return __find_first_bit(addr, size);
    -}
    -
    -extern unsigned long __find_first_zero_bit(const unsigned long *addr,
    - unsigned long size);
    +extern unsigned long find_first_bit(const unsigned long *addr,
    + unsigned long size);

    /**
    * find_first_zero_bit - find the first cleared bit in a memory region
    @@ -154,31 +132,12 @@ extern unsigned long __find_first_zero_b
    *
    * Returns the bit number of the first cleared bit.
    */
    -static __always_inline unsigned long
    -find_first_zero_bit(const unsigned long *addr, unsigned long size)
    -{
    - /* Avoid a function call if the bitmap size is a constant */
    - /* and not bigger than BITS_PER_LONG. */
    -
    - /* insert a sentinel so that __ffs returns size if there */
    - /* are no set bits in the bitmap */
    - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
    - return __ffs(~(*addr) | (1ul << size));
    - }
    -
    - /* the result of __ffs(0) is undefined, so it needs to be */
    - /* handled separately */
    - if (__builtin_constant_p(size) && (size == BITS_PER_LONG))
    - return (~(*addr) == 0) ? BITS_PER_LONG : __ffs(~(*addr));
    -
    - /* size is not constant or too big */
    - return __find_first_zero_bit(addr, size);
    -}
    +extern unsigned long find_first_zero_bit(const unsigned long *addr,
    + unsigned long size);
    +
    #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */

    #ifdef CONFIG_GENERIC_FIND_NEXT_BIT
    -extern unsigned long __find_next_bit(const unsigned long *addr,
    - unsigned long size, unsigned long offset);

    /**
    * find_next_bit - find the next set bit in a memory region
    @@ -186,36 +145,8 @@ extern unsigned long __find_next_bit(con
    * @offset: The bitnumber to start searching at
    * @size: The bitmap size in bits
    */
    -static __always_inline unsigned long
    -find_next_bit(const unsigned long *addr, unsigned long size,
    - unsigned long offset)
    -{
    - unsigned long value;
    -
    - /* Avoid a function call if the bitmap size is a constant */
    - /* and not bigger than BITS_PER_LONG. */
    -
    - /* insert a sentinel so that __ffs returns size if there */
    - /* are no set bits in the bitmap */
    - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
    - value = (*addr) & ((~0ul) << offset);
    - value |= (1ul << size);
    - return __ffs(value);
    - }
    -
    - /* the result of __ffs(0) is undefined, so it needs to be */
    - /* handled separately */
    - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) {
    - value = (*addr) & ((~0ul) << offset);
    - return (value == 0) ? BITS_PER_LONG : __ffs(value);
    - }
    -
    - /* size is not constant or too big */
    - return __find_next_bit(addr, size, offset);
    -}
    -
    -extern unsigned long __find_next_zero_bit(const unsigned long *addr,
    - unsigned long size, unsigned long offset);
    +extern unsigned long find_next_bit(const unsigned long *addr,
    + unsigned long size, unsigned long offset);

    /**
    * find_next_zero_bit - find the next cleared bit in a memory region
    @@ -223,33 +154,11 @@ extern unsigned long __find_next_zero_bi
    * @offset: The bitnumber to start searching at
    * @size: The bitmap size in bits
    */
    -static __always_inline unsigned long
    -find_next_zero_bit(const unsigned long *addr, unsigned long size,
    - unsigned long offset)
    -{
    - unsigned long value;
    -
    - /* Avoid a function call if the bitmap size is a constant */
    - /* and not bigger than BITS_PER_LONG. */
    -
    - /* insert a sentinel so that __ffs returns size if there */
    - /* are no set bits in the bitmap */
    - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
    - value = (~(*addr)) & ((~0ul) << offset);
    - value |= (1ul << size);
    - return __ffs(value);
    - }
    -
    - /* the result of __ffs(0) is undefined, so it needs to be */
    - /* handled separately */
    - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) {
    - value = (~(*addr)) & ((~0ul) << offset);
    - return (value == 0) ? BITS_PER_LONG : __ffs(value);
    - }
    -
    - /* size is not constant or too big */
    - return __find_next_zero_bit(addr, size, offset);
    -}
    +
    +extern unsigned long find_next_zero_bit(const unsigned long *addr,
    + unsigned long size,
    + unsigned long offset);
    +
    #endif /* CONFIG_GENERIC_FIND_NEXT_BIT */
    #endif /* __KERNEL__ */
    #endif
    Index: linux-2.6/lib/find_next_bit.c
    ================================================== =================
    --- linux-2.6.orig/lib/find_next_bit.c
    +++ linux-2.6/lib/find_next_bit.c
    @@ -20,8 +20,8 @@
    /*
    * Find the next set bit in a memory region.
    */
    -unsigned long __find_next_bit(const unsigned long *addr,
    - unsigned long size, unsigned long offset)
    +unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
    + unsigned long offset)
    {
    const unsigned long *p = addr + BITOP_WORD(offset);
    unsigned long result = offset & ~(BITS_PER_LONG-1);
    @@ -58,14 +58,14 @@ found_first:
    found_middle:
    return result + __ffs(tmp);
    }
    -EXPORT_SYMBOL(__find_next_bit);
    +EXPORT_SYMBOL(find_next_bit);

    /*
    * This implementation of find_{first,next}_zero_bit was stolen from
    * Linus' asm-alpha/bitops.h.
    */
    -unsigned long __find_next_zero_bit(const unsigned long *addr,
    - unsigned long size, unsigned long offset)
    +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
    + unsigned long offset)
    {
    const unsigned long *p = addr + BITOP_WORD(offset);
    unsigned long result = offset & ~(BITS_PER_LONG-1);
    @@ -102,15 +102,14 @@ found_first:
    found_middle:
    return result + ffz(tmp);
    }
    -EXPORT_SYMBOL(__find_next_zero_bit);
    +EXPORT_SYMBOL(find_next_zero_bit);
    #endif /* CONFIG_GENERIC_FIND_NEXT_BIT */

    #ifdef CONFIG_GENERIC_FIND_FIRST_BIT
    /*
    * Find the first set bit in a memory region.
    */
    -unsigned long __find_first_bit(const unsigned long *addr,
    - unsigned long size)
    +unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
    {
    const unsigned long *p = addr;
    unsigned long result = 0;
    @@ -131,13 +130,12 @@ unsigned long __find_first_bit(const uns
    found:
    return result + __ffs(tmp);
    }
    -EXPORT_SYMBOL(__find_first_bit);
    +EXPORT_SYMBOL(find_first_bit);

    /*
    * Find the first cleared bit in a memory region.
    */
    -unsigned long __find_first_zero_bit(const unsigned long *addr,
    - unsigned long size)
    +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
    {
    const unsigned long *p = addr;
    unsigned long result = 0;
    @@ -158,7 +156,7 @@ unsigned long __find_first_zero_bit(cons
    found:
    return result + ffz(tmp);
    }
    -EXPORT_SYMBOL(__find_first_zero_bit);
    +EXPORT_SYMBOL(find_first_zero_bit);
    #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */

    #ifdef __BIG_ENDIAN
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. Re: [PATCH] bitops: remove "optimizations"

    From: David Miller
    Date: Tue, 29 Apr 2008 03:03:32 -0700 (PDT)

    > From: Thomas Gleixner
    > Date: Tue, 29 Apr 2008 12:01:02 +0200 (CEST)
    >
    > > Signed-off-by: Thomas Gleixner

    >
    > Acked-by: David S. Miller


    Ironically, I just bisected sparc64 bootup failures to the following
    changeset. It's very late here, and I haven't looked into the
    details, but it seems to wedge in free_area_init_nodes() on a non-NUMA
    system with CONFIG_NUMA disabled.

    Isn't it funny that this optimization not only was useless, but also
    broke things. :-/

    64970b68d2b3ed32b964b0b30b1b98518fde388e is first bad commit
    commit 64970b68d2b3ed32b964b0b30b1b98518fde388e
    Author: Alexander van Heukelum
    Date: Tue Mar 11 16:17:19 2008 +0100

    x86, generic: optimize find_next_(zero_)bit for small constant-size bitmaps

    This moves an optimization for searching constant-sized small
    bitmaps form x86_64-specific to generic code.

    On an i386 defconfig (the x86#testing one), the size of vmlinux hardly
    changes with this applied. I have observed only four places where this
    optimization avoids a call into find_next_bit:

    In the functions return_unused_surplus_pages, alloc_fresh_huge_page,
    and adjust_pool_surplus, this patch avoids a call for a 1-bit bitmap.
    In __next_cpu a call is avoided for a 32-bit bitmap. That's it.

    On x86_64, 52 locations are optimized with a minimal increase in
    code size:

    Current #testing defconfig:
    146 x bsf, 27 x find_next_*bit
    text data bss dec hex filename
    5392637 846592 724424 6963653 6a41c5 vmlinux

    After removing the x86_64 specific optimization for find_next_*bit:
    94 x bsf, 79 x find_next_*bit
    text data bss dec hex filename
    5392358 846592 724424 6963374 6a40ae vmlinux

    After this patch (making the optimization generic):
    146 x bsf, 27 x find_next_*bit
    text data bss dec hex filename
    5392396 846592 724424 6963412 6a40d4 vmlinux

    [ tglx@linutronix.de: build fixes ]

    Signed-off-by: Ingo Molnar

    :040000 040000 c0407f7df7dc5333c78c300780931a269ae0dedd 2f6ef634a35b6dd46e40ea70128c39a742e60501 M include
    :040000 040000 556ee6ccb15cdbb1aa5a690c732a5492d58dfe6f 937d05977f46056c12b5da64ca62959c06a912c0 M lib
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  3. Re: [PATCH] bitops: remove "optimizations"


    * David Miller wrote:

    > From: David Miller
    > Date: Tue, 29 Apr 2008 03:03:32 -0700 (PDT)
    >
    > > From: Thomas Gleixner
    > > Date: Tue, 29 Apr 2008 12:01:02 +0200 (CEST)
    > >
    > > > Signed-off-by: Thomas Gleixner

    > >
    > > Acked-by: David S. Miller

    >
    > Ironically, I just bisected sparc64 bootup failures to the following
    > changeset. It's very late here, and I haven't looked into the
    > details, but it seems to wedge in free_area_init_nodes() on a non-NUMA
    > system with CONFIG_NUMA disabled.


    hm. I guess the next thing you plan to check is whether Thomas's patch
    fixes it?

    > Isn't it funny that this optimization not only was useless, but also
    > broke things. :-/


    yeah and sorry :-/ It would still be nice to understand why this
    happened, it looks like a weird (and unexpected) interaction.

    Ingo
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  4. Re: [PATCH] bitops: remove "optimizations"

    On Tue, 29 Apr 2008, David Miller wrote:
    > From: David Miller
    > Date: Tue, 29 Apr 2008 03:03:32 -0700 (PDT)
    >
    > > From: Thomas Gleixner
    > > Date: Tue, 29 Apr 2008 12:01:02 +0200 (CEST)
    > >
    > > > Signed-off-by: Thomas Gleixner

    > >
    > > Acked-by: David S. Miller

    >
    > Ironically, I just bisected sparc64 bootup failures to the following
    > changeset. It's very late here, and I haven't looked into the
    > details, but it seems to wedge in free_area_init_nodes() on a non-NUMA
    > system with CONFIG_NUMA disabled.
    >
    > Isn't it funny that this optimization not only was useless, but also
    > broke things. :-/


    The question is, whether it broke things or just unearthed some bug
    hidden elsewhere.

    Thanks,
    tglx
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  5. Re: [PATCH] bitops: remove "optimizations"

    From: Ingo Molnar
    Date: Tue, 29 Apr 2008 16:20:39 +0200

    >
    > * David Miller wrote:
    >
    > > Ironically, I just bisected sparc64 bootup failures to the following
    > > changeset. It's very late here, and I haven't looked into the
    > > details, but it seems to wedge in free_area_init_nodes() on a non-NUMA
    > > system with CONFIG_NUMA disabled.

    >
    > hm. I guess the next thing you plan to check is whether Thomas's patch
    > fixes it?


    Yes, I just needed some sleep first, which I just obtained :-)
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  6. Re: [PATCH] bitops: remove "optimizations"

    From: Thomas Gleixner
    Date: Tue, 29 Apr 2008 18:51:51 +0200 (CEST)

    > The question is, whether it broke things or just unearthed some bug
    > hidden elsewhere.


    So I did a quick test, just #if 0'ing out the optimization inline
    portions of the find_first_bit() code in linux/bitops.h, and forcing
    it to always unconditionally call __find_first_bit() fixes the
    regression.

    Given that others who tested could not find one case where the
    optimization cases actually applied, and it's breaking things for me,
    my theory is that it's triggering for some obscure case on sparc64 and
    thus showing a bug in these optimizations since in practice I'm the
    only person to actually test this new code.
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  7. Re: [PATCH] bitops: remove "optimizations"

    From: David Miller
    Date: Tue, 29 Apr 2008 15:58:24 -0700 (PDT)

    > Given that others who tested could not find one case where the
    > optimization cases actually applied, and it's breaking things for me,
    > my theory is that it's triggering for some obscure case on sparc64 and
    > thus showing a bug in these optimizations since in practice I'm the
    > only person to actually test this new code.


    Ok, I think I see the problem.

    The core issue is that (X << N) is undefined when N is >= the
    word size, but that's exactly what the find_next_bit() inline
    optimizations do.

    This optimization code will trigger on 64-bit if NR_CPUS is set
    to 64, and you actually have 64 cpus. It should also occur
    on 32-bit if NR_CPUS=32 and you have 32 cpus.

    The bogus expansion occurs in lib/cpumask.c:__next_cpu()

    After processing cpu 63, we'll use offset==64 and thus try
    to make the undefined shift I described above, causing the
    caller's cpumask iteration loop to run forever.

    This code was really not tested very well at all.
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread
Page 2 of 2 FirstFirst 1 2