[PATCH] x86: create array based interface to change page attribute - Kernel

This is a discussion on [PATCH] x86: create array based interface to change page attribute - Kernel ; When cpa was refactored to the new set_memory_ interfaces, it removed a special case fast path for AGP systems, which did a lot of page by page attribute changing and held the flushes until they were finished. The DRM memory ...

+ Reply to Thread
Page 1 of 2 1 2 LastLast
Results 1 to 20 of 39

Thread: [PATCH] x86: create array based interface to change page attribute

  1. [PATCH] x86: create array based interface to change page attribute


    When cpa was refactored to the new set_memory_ interfaces, it removed
    a special case fast path for AGP systems, which did a lot of page by page
    attribute changing and held the flushes until they were finished. The
    DRM memory manager also required this to get useable speeds.

    This introduces a new interface, which accepts an array of memory addresses
    to have attributes changed on and to flush once finished.

    Further changes to the AGP stack to actually use this interface will be
    published later.

    Signed-off-by: Dave Airlie
    ---
    arch/x86/mm/pageattr-test.c | 12 ++-
    arch/x86/mm/pageattr.c | 164 +++++++++++++++++++++++++++++++-----------
    include/asm-x86/cacheflush.h | 3 +
    3 files changed, 132 insertions(+), 47 deletions(-)

    diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
    index 75f1b10..22c1496 100644
    --- a/arch/x86/mm/pageattr-test.c
    +++ b/arch/x86/mm/pageattr-test.c
    @@ -111,6 +111,7 @@ static int pageattr_test(void)
    unsigned int level;
    int i, k;
    int err;
    + unsigned long test_addr;

    if (print)
    printk(KERN_INFO "CPA self-test:\n");
    @@ -165,8 +166,10 @@ static int pageattr_test(void)
    continue;
    }

    - err = change_page_attr_clear(addr[i], len[i],
    - __pgprot(_PAGE_GLOBAL));
    +
    + test_addr = addr[i];
    + err = change_page_attr_clear(&test_addr, len[i],
    + __pgprot(_PAGE_GLOBAL), 0);
    if (err < 0) {
    printk(KERN_ERR "CPA %d failed %d\n", i, err);
    failed++;
    @@ -198,8 +201,9 @@ static int pageattr_test(void)
    failed++;
    continue;
    }
    - err = change_page_attr_set(addr[i], len[i],
    - __pgprot(_PAGE_GLOBAL));
    + test_addr = addr[i];
    + err = change_page_attr_set(&test_addr, len[i],
    + __pgprot(_PAGE_GLOBAL), 0);
    if (err < 0) {
    printk(KERN_ERR "CPA reverting failed: %d\n", err);
    failed++;
    diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
    index 7b79f6b..6124726 100644
    --- a/arch/x86/mm/pageattr.c
    +++ b/arch/x86/mm/pageattr.c
    @@ -22,14 +22,18 @@
    * The current flushing context - we pass it instead of 5 arguments:
    */
    struct cpa_data {
    - unsigned long vaddr;
    + unsigned long *vaddr;
    pgprot_t mask_set;
    pgprot_t mask_clr;
    int numpages;
    - int flushtlb;
    + int flags;
    unsigned long pfn;
    + int curaddr;
    };

    +#define CPA_FLUSHTLB 1
    +#define CPA_ARRAY 2
    +
    #ifdef CONFIG_X86_64

    static inline unsigned long highmap_start_pfn(void)
    @@ -145,6 +149,35 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
    }
    }

    +static void cpa_flush_array(unsigned long *start, int numpages, int cache)
    +{
    + unsigned int i, level;
    + unsigned long *addr;
    +
    + BUG_ON(irqs_disabled());
    +
    + on_each_cpu(__cpa_flush_range, NULL, 1, 1);
    +
    + if (!cache)
    + return;
    +
    + /*
    + * We only need to flush on one CPU,
    + * clflush is a MESI-coherent instruction that
    + * will cause all other CPUs to flush the same
    + * cachelines:
    + */
    + for (i = 0, addr = start; i < numpages; i++, addr++) {
    + pte_t *pte = lookup_address(*addr, &level);
    +
    + /*
    + * Only flush present addresses:
    + */
    + if (pte && (pte_val(*pte) & _PAGE_PRESENT))
    + clflush_cache_range((void *) *addr, PAGE_SIZE);
    + }
    +}
    +
    /*
    * Certain areas of memory on x86 require very specific protection flags,
    * for example the BIOS area or kernel text. Callers don't always get this
    @@ -349,7 +382,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
    */
    new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
    __set_pmd_pte(kpte, address, new_pte);
    - cpa->flushtlb = 1;
    + cpa->flags |= CPA_FLUSHTLB;
    do_split = 0;
    }

    @@ -527,11 +560,13 @@ out_unlock:

    static int __change_page_attr(struct cpa_data *cpa, int primary)
    {
    - unsigned long address = cpa->vaddr;
    + unsigned long address;
    int do_split, err;
    unsigned int level;
    pte_t *kpte, old_pte;

    + address = cpa->vaddr[cpa->curaddr];
    +
    repeat:
    kpte = lookup_address(address, &level);
    if (!kpte)
    @@ -543,7 +578,7 @@ repeat:
    return 0;
    printk(KERN_WARNING "CPA: called for zero pte. "
    "vaddr = %lx cpa->vaddr = %lx\n", address,
    - cpa->vaddr);
    + *cpa->vaddr);
    WARN_ON(1);
    return -EINVAL;
    }
    @@ -570,7 +605,7 @@ repeat:
    */
    if (pte_val(old_pte) != pte_val(new_pte)) {
    set_pte_atomic(kpte, new_pte);
    - cpa->flushtlb = 1;
    + cpa->flags |= CPA_FLUSHTLB;
    }
    cpa->numpages = 1;
    return 0;
    @@ -594,7 +629,7 @@ repeat:
    */
    err = split_large_page(kpte, address);
    if (!err) {
    - cpa->flushtlb = 1;
    + cpa->flags |= CPA_FLUSHTLB;
    goto repeat;
    }

    @@ -607,6 +642,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
    {
    struct cpa_data alias_cpa;
    int ret = 0;
    + unsigned long temp_cpa_vaddr, vaddr;

    if (cpa->pfn > max_pfn_mapped)
    return 0;
    @@ -615,11 +651,16 @@ static int cpa_process_alias(struct cpa_data *cpa)
    * No need to redo, when the primary call touched the direct
    * mapping already:
    */
    - if (!within(cpa->vaddr, PAGE_OFFSET,
    + vaddr = cpa->vaddr[cpa->curaddr];
    +
    + if (!within(vaddr, PAGE_OFFSET,
    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {

    alias_cpa = *cpa;
    - alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
    + temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
    + alias_cpa.vaddr = &temp_cpa_vaddr;
    + alias_cpa.curaddr = 0;
    + alias_cpa.flags &= ~CPA_ARRAY;

    ret = __change_page_attr_set_clr(&alias_cpa, 0);
    }
    @@ -631,7 +672,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
    * No need to redo, when the primary call touched the high
    * mapping already:
    */
    - if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end))
    + if (within(vaddr, (unsigned long) _text, (unsigned long) _end))
    return 0;

    /*
    @@ -642,8 +683,11 @@ static int cpa_process_alias(struct cpa_data *cpa)
    return 0;

    alias_cpa = *cpa;
    - alias_cpa.vaddr =
    - (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
    + temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
    + __START_KERNEL_map - phys_base;
    + alias_cpa.vaddr = &temp_cpa_vaddr;
    + alias_cpa.curaddr = 0;
    + alias_cpa.flags &= ~CPA_ARRAY;

    /*
    * The high mapping range is imprecise, so ignore the return value.
    @@ -681,7 +725,10 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
    */
    BUG_ON(cpa->numpages > numpages);
    numpages -= cpa->numpages;
    - cpa->vaddr += cpa->numpages * PAGE_SIZE;
    + if (cpa->flags & CPA_ARRAY)
    + cpa->curaddr++;
    + else
    + *cpa->vaddr += cpa->numpages * PAGE_SIZE;
    }
    return 0;
    }
    @@ -692,8 +739,9 @@ static inline int cache_attr(pgprot_t attr)
    (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
    }

    -static int change_page_attr_set_clr(unsigned long addr, int numpages,
    - pgprot_t mask_set, pgprot_t mask_clr)
    +static int change_page_attr_set_clr(unsigned long *addr, int numpages,
    + pgprot_t mask_set, pgprot_t mask_clr,
    + int array)
    {
    struct cpa_data cpa;
    int ret, cache, checkalias;
    @@ -708,19 +756,25 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
    return 0;

    /* Ensure we are PAGE_SIZE aligned */
    - if (addr & ~PAGE_MASK) {
    - addr &= PAGE_MASK;
    - /*
    - * People should not be passing in unaligned addresses:
    - */
    - WARN_ON_ONCE(1);
    + if (!array) {
    + if (*addr & ~PAGE_MASK) {
    + *addr &= PAGE_MASK;
    + /*
    + * People should not be passing in unaligned addresses:
    + */
    + WARN_ON_ONCE(1);
    + }
    }

    cpa.vaddr = addr;
    cpa.numpages = numpages;
    cpa.mask_set = mask_set;
    cpa.mask_clr = mask_clr;
    - cpa.flushtlb = 0;
    + cpa.flags = 0;
    + cpa.curaddr = 0;
    +
    + if (array)
    + cpa.flags |= CPA_ARRAY;

    /* No alias checking for _NX bit modifications */
    checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
    @@ -730,7 +784,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
    /*
    * Check whether we really changed something:
    */
    - if (!cpa.flushtlb)
    + if (!(cpa.flags & CPA_FLUSHTLB))
    goto out;

    /*
    @@ -746,7 +800,10 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
    * wbindv):
    */
    if (!ret && cpu_has_clflush)
    - cpa_flush_range(addr, numpages, cache);
    + if (cpa.flags & CPA_ARRAY)
    + cpa_flush_array(addr, numpages, cache);
    + else
    + cpa_flush_range(*addr, numpages, cache);
    else
    cpa_flush_all(cache);

    @@ -756,57 +813,74 @@ out:
    return ret;
    }

    -static inline int change_page_attr_set(unsigned long addr, int numpages,
    - pgprot_t mask)
    +static inline int change_page_attr_set(unsigned long *addr, int numpages,
    + pgprot_t mask, int array)
    {
    - return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
    + return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0),
    + array);
    }

    -static inline int change_page_attr_clear(unsigned long addr, int numpages,
    - pgprot_t mask)
    +static inline int change_page_attr_clear(unsigned long *addr, int numpages,
    + pgprot_t mask, int array)
    {
    - return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
    + return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask,
    + array);
    }

    int set_memory_uc(unsigned long addr, int numpages)
    {
    - return change_page_attr_set(addr, numpages,
    - __pgprot(_PAGE_PCD));
    + return change_page_attr_set(&addr, numpages,
    + __pgprot(_PAGE_PCD), 0);
    }
    EXPORT_SYMBOL(set_memory_uc);

    +int set_memory_array_uc(unsigned long *addr, int addrinarray)
    +{
    + return change_page_attr_set(addr, addrinarray,
    + __pgprot(_PAGE_PCD), 1);
    +}
    +EXPORT_SYMBOL(set_memory_array_uc);
    +
    int set_memory_wb(unsigned long addr, int numpages)
    {
    - return change_page_attr_clear(addr, numpages,
    - __pgprot(_PAGE_PCD | _PAGE_PWT));
    + return change_page_attr_clear(&addr, numpages,
    + __pgprot(_PAGE_PCD | _PAGE_PWT), 0);
    }
    EXPORT_SYMBOL(set_memory_wb);

    +int set_memory_array_wb(unsigned long *addr, int addrinarray)
    +{
    + return change_page_attr_clear(addr, addrinarray,
    + __pgprot(_PAGE_PCD | _PAGE_PWT), 1);
    +}
    +EXPORT_SYMBOL(set_memory_array_wb);
    +
    int set_memory_x(unsigned long addr, int numpages)
    {
    - return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX));
    + return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
    }
    EXPORT_SYMBOL(set_memory_x);

    int set_memory_nx(unsigned long addr, int numpages)
    {
    - return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX));
    + return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
    }
    EXPORT_SYMBOL(set_memory_nx);

    int set_memory_ro(unsigned long addr, int numpages)
    {
    - return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
    + return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
    }

    int set_memory_rw(unsigned long addr, int numpages)
    {
    - return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
    + return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
    }

    int set_memory_np(unsigned long addr, int numpages)
    {
    - return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
    + return change_page_attr_clear(&addr, numpages,
    + __pgprot(_PAGE_PRESENT), 0);
    }

    int set_pages_uc(struct page *page, int numpages)
    @@ -859,20 +933,24 @@ int set_pages_rw(struct page *page, int numpages)

    static int __set_pages_p(struct page *page, int numpages)
    {
    - struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
    + unsigned long tempaddr = (unsigned long)page_address(page);
    + struct cpa_data cpa = { .vaddr = &tempaddr,
    .numpages = numpages,
    .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
    - .mask_clr = __pgprot(0)};
    + .mask_clr = __pgprot(0),
    + .flags = 0};

    return __change_page_attr_set_clr(&cpa, 1);
    }

    static int __set_pages_np(struct page *page, int numpages)
    {
    - struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
    + unsigned long tempaddr = (unsigned long)page_address(page);
    + struct cpa_data cpa = { .vaddr = &tempaddr,
    .numpages = numpages,
    .mask_set = __pgprot(0),
    - .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
    + .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
    + .flags = 0};

    return __change_page_attr_set_clr(&cpa, 1);
    }
    diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
    index 5396c21..5646584 100644
    --- a/include/asm-x86/cacheflush.h
    +++ b/include/asm-x86/cacheflush.h
    @@ -42,6 +42,9 @@ int set_memory_ro(unsigned long addr, int numpages);
    int set_memory_rw(unsigned long addr, int numpages);
    int set_memory_np(unsigned long addr, int numpages);

    +int set_memory_array_uc(unsigned long *addr, int addrinarray);
    +int set_memory_array_wb(unsigned long *addr, int addrinarray);
    +
    void clflush_cache_range(void *addr, unsigned int size);

    void cpa_init(void);




    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. Re: [PATCH] x86: create array based interface to change page attribute

    Dave Airlie wrote:
    > When cpa was refactored to the new set_memory_ interfaces, it removed
    > a special case fast path for AGP systems, which did a lot of page by page
    > attribute changing and held the flushes until they were finished. The
    > DRM memory manager also required this to get useable speeds.
    >
    > This introduces a new interface, which accepts an array of memory addresses
    > to have attributes changed on and to flush once finished.
    >
    > Further changes to the AGP stack to actually use this interface will be
    > published later.
    >
    > Signed-off-by: Dave Airlie
    > ---
    > arch/x86/mm/pageattr-test.c | 12 ++-
    > arch/x86/mm/pageattr.c | 164 +++++++++++++++++++++++++++++++-----------
    > include/asm-x86/cacheflush.h | 3 +
    > 3 files changed, 132 insertions(+), 47 deletions(-)
    >

    ....
    Dave,
    Nice work, but how do we handle highmem pages? I know that they don't
    need any attribute change since they're not in the kernel map, but both
    the AGP module and the DRM memory manager typically hold highmem
    addresses in their arrays, so the code should presumably detect those
    and avoid them?

    Since this is an AGPgart and DRM fastpath, the interface should ideally
    be adapted to match the data structures used by those callers. The
    AGPgart module uses an array of addresses, which effectively stops it
    from using pages beyond the DMA32 limit. The DRM memory manager uses an
    array of struct page pointers, but using that was, If I understand
    things correctly, rejected.

    So, if we, at some point, want to have an AGPgart module capable of
    using anything beyond the DMA32 limit we will end up with an interface
    that doesn't match neither AGPgart nor DRM, for which users the fastpath
    was originally intended.

    /Thomas



    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  3. Re: [PATCH] x86: create array based interface to change page attribute

    Dave Airlie writes:
    >
    > +#define CPA_FLUSHTLB 1
    > +#define CPA_ARRAY 2


    I don't think CPA_ARRAY should be a separate case. Rather single
    page flushing should be an array with only a single entry. pageattr
    is already very complex, no need to make add more special cases.
    > +
    > + /*
    > + * Only flush present addresses:
    > + */
    > + if (pte && (pte_val(*pte) & _PAGE_PRESENT))
    > + clflush_cache_range((void *) *addr, PAGE_SIZE);


    Also it is doubtful clflush really makes sense on a large array. Just
    doing wbinvd might be faster then. Or perhaps better supporting Self-Snoop
    should be revisited, that would at least eliminate it on most Intel
    CPUs.

    -Andi
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  4. Re: [PATCH] x86: create array based interface to change page attribute

    Andi Kleen wrote:
    > Dave Airlie writes:
    >
    >>
    >> +#define CPA_FLUSHTLB 1
    >> +#define CPA_ARRAY 2
    >>

    >
    > I don't think CPA_ARRAY should be a separate case. Rather single
    > page flushing should be an array with only a single entry. pageattr
    > is already very complex, no need to make add more special cases.
    >
    >> +
    >> + /*
    >> + * Only flush present addresses:
    >> + */
    >> + if (pte && (pte_val(*pte) & _PAGE_PRESENT))
    >> + clflush_cache_range((void *) *addr, PAGE_SIZE);
    >>

    >
    > Also it is doubtful clflush really makes sense on a large array. Just
    > doing wbinvd might be faster then. Or perhaps better supporting Self-Snoop
    > should be revisited, that would at least eliminate it on most Intel
    > CPUs.
    >
    >

    I agree that wbinvd() seems to be faster on large arrays on the
    processors I've tested. But isn't there a severe latency problem with
    that instruction, that makes people really want to avoid it in all
    possible cases?

    Also I think we need to clarify the semantics of the c_p_a
    functionality. Right now both AGP and DRM relies on c_p_a doing an
    explicit cache flush. Otherwise the data won't appear on the device side
    of the aperture.
    If we use self-snoop, the AGP and DRM drivers can't rely on this flush
    being performed, and they have to do the flush themselves, and for
    non-self-snooping processors, the flush needs to be done twice?

    /Thomas

    > -Andi
    >




    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  5. Re: [PATCH] x86: create array based interface to change page attribute

    > Also I think we need to clarify the semantics of the c_p_a
    > functionality. Right now both AGP and DRM relies on c_p_a doing an
    > explicit cache flush. Otherwise the data won't appear on the device side
    > of the aperture.


    But surely not in cpa I hope ? Or are you saying you first write data
    and then do cpa? If true that would be quite an abuse of CPA
    I would say and you should fix it ASAP.

    > If we use self-snoop, the AGP and DRM drivers can't rely on this flush
    > being performed, and they have to do the flush themselves, and for


    They definitely should flush themselves if they want data to reach
    the device. That is obviously required any time they reuse a page
    too.

    -Andi
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  6. Re: [PATCH] x86: create array based interface to change page attribute

    Andi Kleen wrote:
    >> Also I think we need to clarify the semantics of the c_p_a
    >> functionality. Right now both AGP and DRM relies on c_p_a doing an
    >> explicit cache flush. Otherwise the data won't appear on the device side
    >> of the aperture.
    >>

    >
    > But surely not in cpa I hope ? Or are you saying you first write data
    > and then do cpa? If true that would be quite an abuse of CPA
    > I would say and you should fix it ASAP.
    >
    >

    As AGP buffers are moved in- and out of AGP, the caching policy changes,
    so yes, there may be writes to cache coherent memory that needs to be
    flushed at some point. Since CPA has been doing that up to now, and the
    codepaths involved are quite time-critical, a double cache-flush is a
    no-no, so if this is left to the caller, we must be able to tell CPA
    that any needed cache-flush has already been performed.
    >> If we use self-snoop, the AGP and DRM drivers can't rely on this flush
    >> being performed, and they have to do the flush themselves, and for
    >>

    >
    > They definitely should flush themselves if they want data to reach
    > the device. That is obviously required any time they reuse a page
    > too.
    >

    Understood,
    but then we *must* really find a way to say "don't flush the cache
    again", perhaps part of Dave's array function?

    /Thomas
    > -Andi
    >




    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  7. Re: [PATCH] x86: create array based interface to change page attribute

    On Mon, Mar 31, 2008 at 11:06:16AM +0200, Thomas Hellström wrote:
    > Andi Kleen wrote:
    > >>Also I think we need to clarify the semantics of the c_p_a
    > >>functionality. Right now both AGP and DRM relies on c_p_a doing an
    > >>explicit cache flush. Otherwise the data won't appear on the device side
    > >>of the aperture.
    > >>

    > >
    > >But surely not in cpa I hope ? Or are you saying you first write data
    > >and then do cpa? If true that would be quite an abuse of CPA
    > >I would say and you should fix it ASAP.
    > >
    > >

    > As AGP buffers are moved in- and out of AGP, the caching policy changes,
    > so yes, there may be writes to cache coherent memory that needs to be
    > flushed at some point. Since CPA has been doing that up to now, and the
    > codepaths involved are quite time-critical, a double cache-flush is a


    That doesn't make sense. You shouldn't be using CPA in any
    time critical code path. It will always be slow.

    For anything time critical you should keep a pool of uncached pages
    once set up using CPA and reuse them.

    CPA really should only be used on initialization or for
    larger setup changes which are ok to go somewhat slower.

    > no-no, so if this is left to the caller, we must be able to tell CPA
    > that any needed cache-flush has already been performed.


    Sounds like a bad design.

    > >>If we use self-snoop, the AGP and DRM drivers can't rely on this flush
    > >>being performed, and they have to do the flush themselves, and for
    > >>

    > >
    > >They definitely should flush themselves if they want data to reach
    > >the device. That is obviously required any time they reuse a page
    > >too.
    > >

    > Understood,
    > but then we *must* really find a way to say "don't flush the cache
    > again", perhaps part of Dave's array function?


    The cache must be flushed in CPA, there is no way around it.

    If you write data into the buffers before you do CPA on them
    you could avoid it, but I don't think you should do that anywhere
    near time critical code, so it actually shouldn't matter.

    -Andi

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  8. Re: [PATCH] x86: create array based interface to change page attribute

    Thomas Hellström wrote:
    > Dave Airlie wrote:
    >> When cpa was refactored to the new set_memory_ interfaces, it removed
    >> a special case fast path for AGP systems, which did a lot of page by page
    >> attribute changing and held the flushes until they were finished. The
    >> DRM memory manager also required this to get useable speeds.
    >>
    >> This introduces a new interface, which accepts an array of memory
    >> addresses
    >> to have attributes changed on and to flush once finished.
    >>
    >> Further changes to the AGP stack to actually use this interface will be
    >> published later.
    >>
    >> Signed-off-by: Dave Airlie
    >> ---
    >> arch/x86/mm/pageattr-test.c | 12 ++-
    >> arch/x86/mm/pageattr.c | 164
    >> +++++++++++++++++++++++++++++++-----------
    >> include/asm-x86/cacheflush.h | 3 +
    >> 3 files changed, 132 insertions(+), 47 deletions(-)
    >>

    > ...
    > Dave,
    > Nice work, but how do we handle highmem pages?


    Cache attributes fundamentally work on a mapping and not on physical memory.
    (MTRR's are special there, they do work on physical memory, but that's a special case and not relevant here)

    So to be honest, your question doesn't make sense; because all I can do is ask "which mapping of these pages".

    Even the old interfaces prior to 2.6.24 only managed to deal with SOME of the mappings of a page.
    And if/when future CPUs don't require all mappings to be in sync, clearly the kernel will only change
    the mapping that is requested as well.



    > Since this is an AGPgart and DRM fastpath, the interface should ideally
    > be adapted to match the data structures used by those callers.


    Actually, the interface has to make sense conceptually convey the right information,
    the implementation should not have to second guess internals of AGP/DRM because that
    would just be a recipe for disaster.
    >The
    > AGPgart module uses an array of addresses, which effectively stops it
    > from using pages beyond the DMA32 limit. The DRM memory manager uses an
    > array of struct page pointers, but using that was, If I understand
    > things correctly, rejected.


    yes because simply put, if you pass a struct page to such a function, you're not telling it which
    mapping or mappings you want changed....
    (And if you say "only the 1:1 mapping, so why doesn't the other side calculate that"... there's no speed gain in doing
    the calculation for that on the other side of an interface, and that makes it zero reason to misdesign the interface
    to only have the "which mapping" information implicit)

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  9. Re: [PATCH] x86: create array based interface to change page attribute

    Andi Kleen wrote:

    > Also it is doubtful clflush really makes sense on a large array. Just
    > doing wbinvd might be faster then.


    wbinvd is rarely a good idea; think about it... it'll flush 12Mb of cache *per socket* in one instruction.
    (on a modern Intel consumer grade CPU, more on the enterprise ones)
    This doesn't only impact the current logical thread, but ALL of the threads in the system, since the cache
    coherency needs to be preserved, all have to go empty at the same time.
    Forget real time... this can take really long even for non-realtime users

    At least clflush breaks this up into smaller pieces so total latency won't suck entirely.


    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  10. Re: [PATCH] x86: create array based interface to change page attribute

    Arjan van de Ven wrote:
    > Thomas Hellström wrote:
    >> Dave Airlie wrote:
    >>> When cpa was refactored to the new set_memory_ interfaces, it removed
    >>> a special case fast path for AGP systems, which did a lot of page by
    >>> page
    >>> attribute changing and held the flushes until they were finished. The
    >>> DRM memory manager also required this to get useable speeds.
    >>>
    >>> This introduces a new interface, which accepts an array of memory
    >>> addresses
    >>> to have attributes changed on and to flush once finished.
    >>>
    >>> Further changes to the AGP stack to actually use this interface will be
    >>> published later.
    >>>
    >>> Signed-off-by: Dave Airlie
    >>> ---
    >>> arch/x86/mm/pageattr-test.c | 12 ++-
    >>> arch/x86/mm/pageattr.c | 164
    >>> +++++++++++++++++++++++++++++++-----------
    >>> include/asm-x86/cacheflush.h | 3 +
    >>> 3 files changed, 132 insertions(+), 47 deletions(-)
    >>>

    >> ...
    >> Dave,
    >> Nice work, but how do we handle highmem pages?

    >
    > Cache attributes fundamentally work on a mapping and not on physical
    > memory.
    > (MTRR's are special there, they do work on physical memory, but that's
    > a special case and not relevant here)
    >
    > So to be honest, your question doesn't make sense; because all I can
    > do is ask "which mapping of these pages".
    >
    > Even the old interfaces prior to 2.6.24 only managed to deal with SOME
    > of the mappings of a page.
    > And if/when future CPUs don't require all mappings to be in sync,
    > clearly the kernel will only change
    > the mapping that is requested as well.
    >
    >
    >
    >> Since this is an AGPgart and DRM fastpath, the interface should
    >> ideally be adapted to match the data structures used by those callers.

    >
    > Actually, the interface has to make sense conceptually convey the
    > right information,
    > the implementation should not have to second guess internals of
    > AGP/DRM because that
    > would just be a recipe for disaster.
    >> The AGPgart module uses an array of addresses, which effectively
    >> stops it from using pages beyond the DMA32 limit. The DRM memory
    >> manager uses an array of struct page pointers, but using that was, If
    >> I understand things correctly, rejected.

    >
    > yes because simply put, if you pass a struct page to such a function,
    > you're not telling it which
    > mapping or mappings you want changed....
    > (And if you say "only the 1:1 mapping, so why doesn't the other side
    > calculate that"... there's no speed gain in doing
    > the calculation for that on the other side of an interface, and that
    > makes it zero reason to misdesign the interface
    > to only have the "which mapping" information implicit)
    >

    Hmm. I get the point. There should be ways to do reasonably efficient
    workarounds in the drivers.

    /Thomas



    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  11. Re: [PATCH] x86: create array based interface to change page attribute

    Andi Kleen wrote:
    > On Mon, Mar 31, 2008 at 11:06:16AM +0200, Thomas Hellström wrote:
    >
    >> Andi Kleen wrote:
    >>
    >>>> Also I think we need to clarify the semantics of the c_p_a
    >>>> functionality. Right now both AGP and DRM relies on c_p_a doing an
    >>>> explicit cache flush. Otherwise the data won't appear on the device side
    >>>> of the aperture.
    >>>>
    >>>>
    >>> But surely not in cpa I hope ? Or are you saying you first write data
    >>> and then do cpa? If true that would be quite an abuse of CPA
    >>> I would say and you should fix it ASAP.
    >>>
    >>>
    >>>

    >> As AGP buffers are moved in- and out of AGP, the caching policy changes,
    >> so yes, there may be writes to cache coherent memory that needs to be
    >> flushed at some point. Since CPA has been doing that up to now, and the
    >> codepaths involved are quite time-critical, a double cache-flush is a
    >>

    >
    > That doesn't make sense. You shouldn't be using CPA in any
    > time critical code path. It will always be slow.
    >
    > For anything time critical you should keep a pool of uncached pages
    > once set up using CPA and reuse them.
    >
    > CPA really should only be used on initialization or for
    > larger setup changes which are ok to go somewhat slower.
    >

    Let me rehprase. Not really time-critical but it is of some importance
    that CPA is done quickly.
    We're dealing with the tradeoff of reading from uncached device memory
    vs taking the pages out of
    AGP, setting up a cache-coherent mapping, read and then change back.
    What we'd really would like to set up is a pool of completely unmapped
    (like highmem) pages. Then we could, to a large extent, avoid the CPA calls.

    >
    >
    > The cache must be flushed in CPA, there is no way around it.
    >
    > If you write data into the buffers before you do CPA on them
    > you could avoid it, but I don't think you should do that anywhere
    > near time critical code, so it actually shouldn't matter.
    >
    > -Andi
    >

    But then we wouldn't really be discussing SS either? For DRM purposes,
    the more performance we can squeeze out of CPA, the better.

    /Thomas




    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  12. Re: [PATCH] x86: create array based interface to change page attribute

    On Mon, Mar 31, 2008 at 5:25 PM, Andi Kleen wrote:
    > Dave Airlie writes:
    > >
    > > +#define CPA_FLUSHTLB 1
    > > +#define CPA_ARRAY 2

    >
    > I don't think CPA_ARRAY should be a separate case. Rather single
    > page flushing should be an array with only a single entry. pageattr
    > is already very complex, no need to make add more special cases.


    I thought about this but the current interface takes a start address
    and number of pages from that point to cpa,
    the array interface takes an array of page sized pages.

    I don't really think we need to generate an array in the first case
    with all the pages in it..

    Dave.
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  13. Re: [PATCH] x86: create array based interface to change page attribute

    On Mon, Mar 31, 2008 at 09:21:19PM +1000, Dave Airlie wrote:
    > On Mon, Mar 31, 2008 at 5:25 PM, Andi Kleen wrote:
    > > Dave Airlie writes:
    > > >
    > > > +#define CPA_FLUSHTLB 1
    > > > +#define CPA_ARRAY 2

    > >
    > > I don't think CPA_ARRAY should be a separate case. Rather single
    > > page flushing should be an array with only a single entry. pageattr
    > > is already very complex, no need to make add more special cases.

    >
    > I thought about this but the current interface takes a start address
    > and number of pages from that point to cpa,
    > the array interface takes an array of page sized pages.
    >
    > I don't really think we need to generate an array in the first case
    > with all the pages in it..


    Just put the length into the array members too.

    -Andi
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  14. Re: [PATCH] x86: create array based interface to change page attribute

    Thomas Hellström wrote:

    > Let me rehprase. Not really time-critical but it is of some importance
    > that CPA is done quickly.
    > We're dealing with the tradeoff of reading from uncached device memory


    uncached or write combining ?

    > vs taking the pages out of
    > AGP, setting up a cache-coherent mapping, read and then change back.
    > What we'd really would like to set up is a pool of completely unmapped
    > (like highmem) pages. Then we could, to a large extent, avoid the CPA
    > calls.


    changing attributes by nature means a tlb flush and a bunch of expensive cache work.
    That's never going to be cheap, I guess it all depends on how much work you do
    on the memory for it to pay off or not...
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  15. Re: [PATCH] x86: create array based interface to change page attribute

    Arjan van de Ven wrote:
    > Thomas Hellström wrote:
    >
    >> Let me rehprase. Not really time-critical but it is of some
    >> importance that CPA is done quickly.
    >> We're dealing with the tradeoff of reading from uncached device memory

    >
    > uncached or write combining ?

    The user-space mappings (the ones that we really use) are usually
    write-combined, whereas the kernel mappings are uncached. (I think this
    is OK since both mapping types implies no cache coherency). Even if
    (IIRC) write combining is theoretically prefetchable, some devices give
    read speeds around 9MB/s.
    >
    >> vs taking the pages out of
    >> AGP, setting up a cache-coherent mapping, read and then change back.
    >> What we'd really would like to set up is a pool of completely
    >> unmapped (like highmem) pages. Then we could, to a large extent,
    >> avoid the CPA calls.

    >
    > changing attributes by nature means a tlb flush and a bunch of
    > expensive cache work.
    > That's never going to be cheap, I guess it all depends on how much
    > work you do
    > on the memory for it to pay off or not...

    Indeed. Actually with the new non-wbinvd() CPA, We seem to benefit
    already if the buffer is a single page, though it's probably hard to
    measure the impact of repopulating the tlb.

    /Thomas



    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  16. Re: [PATCH] x86: create array based interface to change page attribute

    Thomas Hellström wrote:
    > Arjan van de Ven wrote:
    >> Thomas Hellström wrote:
    >>
    >>> Let me rehprase. Not really time-critical but it is of some
    >>> importance that CPA is done quickly.
    >>> We're dealing with the tradeoff of reading from uncached device memory

    >>
    >> uncached or write combining ?

    > The user-space mappings (the ones that we really use) are usually
    > write-combined, whereas the kernel mappings are uncached. (I think this
    > is OK since both mapping types implies no cache coherency).


    This is not officially allowed and may tripple fault your cpu..
    To comply with the spec one needs to have ALL mappings the same unfortunately.
    (And yes, this is a hard problem)

    >Even if
    > (IIRC) write combining is theoretically prefetchable, some devices give
    > read speeds around 9MB/s.
    >>
    >>> vs taking the pages out of
    >>> AGP, setting up a cache-coherent mapping, read and then change back.
    >>> What we'd really would like to set up is a pool of completely
    >>> unmapped (like highmem) pages. Then we could, to a large extent,
    >>> avoid the CPA calls.

    >>
    >> changing attributes by nature means a tlb flush and a bunch of
    >> expensive cache work.
    >> That's never going to be cheap, I guess it all depends on how much
    >> work you do
    >> on the memory for it to pay off or not...

    > Indeed. Actually with the new non-wbinvd() CPA, We seem to benefit
    > already if the buffer is a single page, though it's probably hard to
    > measure the impact of repopulating the tlb.
    >
    > /Thomas
    >
    >
    >


    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  17. Re: [PATCH] x86: create array based interface to change page attribute

    Arjan van de Ven wrote:
    > Thomas Hellström wrote:
    >> Arjan van de Ven wrote:
    >>> Thomas Hellström wrote:
    >>>
    >>>> Let me rehprase. Not really time-critical but it is of some
    >>>> importance that CPA is done quickly.
    >>>> We're dealing with the tradeoff of reading from uncached device memory
    >>>
    >>> uncached or write combining ?

    >> The user-space mappings (the ones that we really use) are usually
    >> write-combined, whereas the kernel mappings are uncached. (I think
    >> this is OK since both mapping types implies no cache coherency).

    >
    > This is not officially allowed and may tripple fault your cpu..
    > To comply with the spec one needs to have ALL mappings the same
    > unfortunately.
    > (And yes, this is a hard problem)
    >

    Hmm...

    Given this problem, the previously mentioned use-case, and the fact that
    we mostly really use user-space mappings,
    Is there a possibility we could add the following functions to Dave's
    patch (provided they would work as intended, of course, namely
    invalidate / bring back the kernel mapping).
    Then we can set up a pool of unmapped pages, avoid frequent use of CPA
    and everybody's happy.

    int set_memory_array_np(unsigned long *addr, int addrinarray)
    {
    return change_page_attr_clr(addr, addrinarray,
    __pgprot(_PAGE_PRESENT | _PAGE_RW), 1);
    }
    EXPORT_SYMBOL(set_memory_array_np);

    int set_memory_array_p(unsigned long *addr, int addrinarray)
    {
    return change_page_attr_set(addr, addrinarray,
    __pgprot(_PAGE_PRESENT | _PAGE_RW), 1);
    }
    EXPORT_SYMBOL(set_memory_array_p);

    /Thomas



    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  18. Re: [PATCH] x86: create array based interface to change page attribute

    Dave Airlie wrote:
    > When cpa was refactored to the new set_memory_ interfaces, it removed
    > a special case fast path for AGP systems, which did a lot of page by page
    > attribute changing and held the flushes until they were finished. The
    > DRM memory manager also required this to get useable speeds.
    >
    > This introduces a new interface, which accepts an array of memory addresses
    > to have attributes changed on and to flush once finished.
    >
    > Further changes to the AGP stack to actually use this interface will be
    > published later.


    Acked-by: Arjan van de Ven
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  19. Re: [PATCH] x86: create array based interface to change page attribute

    Thomas Hellström wrote:
    > Given this problem, the previously mentioned use-case, and the fact that
    > we mostly really use user-space mappings,
    > Is there a possibility we could add the following functions to Dave's
    > patch (provided they would work as intended, of course, namely
    > invalidate / bring back the kernel mapping).


    sadly there are multiple mappings, both in theory and practice.
    Especially the _np / _p functions specifically work on only the mapping you specify.

    For this to work we would need to somehow make a "mark all mappings NP, but please only do the kernel ones" kind of thing.
    The semantics of that are... lets say messy at best.
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  20. Re: [PATCH] x86: create array based interface to change page attribute

    Arjan van de Ven wrote:
    > Thomas Hellström wrote:
    >> Given this problem, the previously mentioned use-case, and the fact
    >> that we mostly really use user-space mappings,
    >> Is there a possibility we could add the following functions to Dave's
    >> patch (provided they would work as intended, of course, namely
    >> invalidate / bring back the kernel mapping).

    >
    > sadly there are multiple mappings, both in theory and practice.
    > Especially the _np / _p functions specifically work on only the
    > mapping you specify.
    >
    > For this to work we would need to somehow make a "mark all mappings
    > NP, but please only do the kernel ones" kind of thing.
    > The semantics of that are... lets say messy at best.

    Hmm, I'm not sure I follow you here. Are you saying that it's illegal to
    have an NP mapping of a page (which, If I understand it correctly, means
    no mapping at all) at the same time as you have a, say user-space WC
    mapping pointing to the same physical page?

    Or are you saying that it's very hard to keep track of all mappings to a
    page, and change only the ones you really want to change?

    If you mean the latter, for the DRM case, the DRM memory manager has
    already made sure all user-space mappings of a page are killed before
    calling CPA on it, (using unmap_mapping_range()) and they are faulted
    back once CPA is done.

    I was under the impression that calling CPA on the kernel mapping of
    that page would do the rest?

    /Thomas




    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread
Page 1 of 2 1 2 LastLast