[PATCH 1 of 6] hotplug-memory: refactor online_pages to separate zone growth from page onlining - Kernel

This is a discussion on [PATCH 1 of 6] hotplug-memory: refactor online_pages to separate zone growth from page onlining - Kernel ; The Xen balloon driver needs to separate the process of hot-installing memory into two phases: one to allocate the page structures and configure the zones, and another to actually online the pages of newly installed memory. This patch splits up ...

+ Reply to Thread
Results 1 to 7 of 7

Thread: [PATCH 1 of 6] hotplug-memory: refactor online_pages to separate zone growth from page onlining

  1. [PATCH 1 of 6] hotplug-memory: refactor online_pages to separate zone growth from page onlining

    The Xen balloon driver needs to separate the process of hot-installing
    memory into two phases: one to allocate the page structures and
    configure the zones, and another to actually online the pages of newly
    installed memory.

    This patch splits up the innards of online_pages() into two pieces
    which correspond to these two phases. The behaviour of online_pages()
    itself is unchanged.

    Signed-off-by: Jeremy Fitzhardinge
    Cc: KAMEZAWA Hiroyuki
    Cc: Yasunori Goto
    Cc: Christoph Lameter
    Cc: Dave Hansen
    ---
    include/linux/memory_hotplug.h | 3 +
    mm/memory_hotplug.c | 102 ++++++++++++++++++++++++++--------------
    2 files changed, 71 insertions(+), 34 deletions(-)

    diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
    --- a/include/linux/memory_hotplug.h
    +++ b/include/linux/memory_hotplug.h
    @@ -57,7 +57,10 @@
    /* need some defines for these for archs that don't support it */
    extern void online_page(struct page *page);
    /* VM interface that may be used by firmware interface */
    +extern int prepare_online_pages(unsigned long pfn, unsigned long nr_pages);
    +extern unsigned long mark_pages_onlined(unsigned long pfn, unsigned long nr_pages);
    extern int online_pages(unsigned long, unsigned long);
    +
    extern void __offline_isolated_pages(unsigned long, unsigned long);
    extern int offline_pages(unsigned long, unsigned long, unsigned long);

    diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
    --- a/mm/memory_hotplug.c
    +++ b/mm/memory_hotplug.c
    @@ -134,10 +134,32 @@
    }
    EXPORT_SYMBOL_GPL(__add_pages);

    -static void grow_zone_span(struct zone *zone,
    +static void grow_pgdat_span(struct pglist_data *pgdat,
    unsigned long start_pfn, unsigned long end_pfn)
    {
    + unsigned long old_pgdat_end_pfn =
    + pgdat->node_start_pfn + pgdat->node_spanned_pages;
    +
    + if (start_pfn < pgdat->node_start_pfn)
    + pgdat->node_start_pfn = start_pfn;
    +
    + pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
    + pgdat->node_start_pfn;
    +}
    +
    +static void grow_zone_span(unsigned long start_pfn, unsigned long end_pfn)
    +{
    unsigned long old_zone_end_pfn;
    + struct zone *zone;
    + unsigned long flags;
    +
    + /*
    + * This doesn't need a lock to do pfn_to_page().
    + * The section can't be removed here because of the
    + * memory_block->state_sem.
    + */
    + zone = page_zone(pfn_to_page(start_pfn));
    + pgdat_resize_lock(zone->zone_pgdat, &flags);

    zone_span_writelock(zone);

    @@ -149,19 +171,9 @@
    zone->zone_start_pfn;

    zone_span_writeunlock(zone);
    -}

    -static void grow_pgdat_span(struct pglist_data *pgdat,
    - unsigned long start_pfn, unsigned long end_pfn)
    -{
    - unsigned long old_pgdat_end_pfn =
    - pgdat->node_start_pfn + pgdat->node_spanned_pages;
    -
    - if (start_pfn < pgdat->node_start_pfn)
    - pgdat->node_start_pfn = start_pfn;
    -
    - pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
    - pgdat->node_start_pfn;
    + grow_pgdat_span(zone->zone_pgdat, start_pfn, end_pfn);
    + pgdat_resize_unlock(zone->zone_pgdat, &flags);
    }

    static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
    @@ -180,16 +192,12 @@
    return 0;
    }

    -
    -int online_pages(unsigned long pfn, unsigned long nr_pages)
    +/* Tell anyone who's interested that we're onlining some memory */
    +static int notify_going_online(unsigned long pfn, unsigned long nr_pages)
    {
    - unsigned long flags;
    - unsigned long onlined_pages = 0;
    - struct zone *zone;
    - int need_zonelists_rebuild = 0;
    + struct memory_notify arg;
    int nid;
    int ret;
    - struct memory_notify arg;

    arg.start_pfn = pfn;
    arg.nr_pages = nr_pages;
    @@ -201,20 +209,18 @@

    ret = memory_notify(MEM_GOING_ONLINE, &arg);
    ret = notifier_to_errno(ret);
    - if (ret) {
    + if (ret)
    memory_notify(MEM_CANCEL_ONLINE, &arg);
    - return ret;
    - }
    - /*
    - * This doesn't need a lock to do pfn_to_page().
    - * The section can't be removed here because of the
    - * memory_block->state_sem.
    - */
    - zone = page_zone(pfn_to_page(pfn));
    - pgdat_resize_lock(zone->zone_pgdat, &flags);
    - grow_zone_span(zone, pfn, pfn + nr_pages);
    - grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages);
    - pgdat_resize_unlock(zone->zone_pgdat, &flags);
    +
    + return ret;
    +}
    +
    +/* Mark a set of pages as online */
    +unsigned long mark_pages_onlined(unsigned long pfn, unsigned long nr_pages)
    +{
    + struct zone *zone = page_zone(pfn_to_page(pfn));
    + unsigned long onlined_pages = 0;
    + int need_zonelists_rebuild = 0;

    /*
    * If this zone is not populated, then it is not in zonelist.
    @@ -240,10 +246,38 @@
    vm_total_pages = nr_free_pagecache_pages();
    writeback_set_ratelimit();

    - if (onlined_pages)
    + if (onlined_pages) {
    + struct memory_notify arg;
    +
    + arg.start_pfn = pfn; /* ? */
    + arg.nr_pages = onlined_pages;
    + arg.status_change_nid = -1; /* ? */
    +
    memory_notify(MEM_ONLINE, &arg);
    + }

    + return onlined_pages;
    +}
    +
    +int prepare_online_pages(unsigned long pfn, unsigned long nr_pages)
    +{
    + int ret = notify_going_online(pfn, nr_pages);
    + if (ret)
    + return ret;
    +
    + grow_zone_span(pfn, pfn+nr_pages);
    return 0;
    +}
    +
    +int online_pages(unsigned long pfn, unsigned long nr_pages)
    +{
    + int ret;
    +
    + ret = prepare_online_pages(pfn, nr_pages);
    + if (ret == 0)
    + mark_pages_onlined(pfn, nr_pages);
    +
    + return ret;
    }
    #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */



    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. Re: [PATCH 1 of 6] hotplug-memory: refactor online_pages to separate zone growth from page onlining


    On Thu, 2008-04-03 at 17:05 -0700, Jeremy Fitzhardinge wrote:
    >
    > +int prepare_online_pages(unsigned long pfn, unsigned long nr_pages)
    > +{
    > + int ret = notify_going_online(pfn, nr_pages);
    > + if (ret)
    > + return ret;
    > +
    > + grow_zone_span(pfn, pfn+nr_pages);
    > return 0;
    > +}


    OK, after seeing this used in the Xen driver, I'm even less a fan of the
    name. Mostly because it doesn't actually prepare *pages* to be onlined.
    It prepares the zones/pgdats and notifies that pages might soon be
    online. Can you think of any better names?

    grow_and_notify...??

    It's also a bit funky because you're calling the online notifiers, but
    you're not actually onlining the pages, yet. Does that have any
    repercussions?

    -- Dave

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  3. Re: [PATCH 1 of 6] hotplug-memory: refactor online_pages to separate zone growth from page onlining


    On Thu, 2008-04-03 at 17:05 -0700, Jeremy Fitzhardinge wrote:
    >
    > +static void grow_zone_span(unsigned long start_pfn, unsigned long
    > end_pfn)
    > +{
    > unsigned long old_zone_end_pfn;
    > + struct zone *zone;
    > + unsigned long flags;
    > +
    > + /*
    > + * This doesn't need a lock to do pfn_to_page().
    > + * The section can't be removed here because of the
    > + * memory_block->state_sem.
    > + */
    > + zone = page_zone(pfn_to_page(start_pfn));
    > + pgdat_resize_lock(zone->zone_pgdat, &flags);
    >
    > zone_span_writelock(zone);
    >
    > @@ -149,19 +171,9 @@
    > zone->zone_start_pfn;
    >
    > zone_span_writeunlock(zone);
    > -}
    >
    > -static void grow_pgdat_span(struct pglist_data *pgdat,
    > - unsigned long start_pfn, unsigned long end_pfn)
    > -{
    > - unsigned long old_pgdat_end_pfn =
    > - pgdat->node_start_pfn + pgdat->node_spanned_pages;
    > -
    > - if (start_pfn < pgdat->node_start_pfn)
    > - pgdat->node_start_pfn = start_pfn;
    > -
    > - pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
    > - pgdat->node_start_pfn;
    > + grow_pgdat_span(zone->zone_pgdat, start_pfn, end_pfn);
    > + pgdat_resize_unlock(zone->zone_pgdat, &flags);
    > }
    >
    > static int online_pages_range(unsigned long start_pfn, unsigned long
    > nr_pages,
    > @@ -180,16 +192,12 @@
    > return 0;
    > }


    I don't particularly like this change to have grow_pgdat_span() called
    *from* grow_zone_span(). Seems backwards to me. But, this diff look
    funny (not your fault) so I may just be seeing things.

    -- Dave

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  4. Re: [PATCH 1 of 6] hotplug-memory: refactor online_pages to separate zone growth from page onlining

    Dave Hansen wrote:
    > On Thu, 2008-04-03 at 17:05 -0700, Jeremy Fitzhardinge wrote:
    >
    >> +static void grow_zone_span(unsigned long start_pfn, unsigned long
    >> end_pfn)
    >> +{
    >> unsigned long old_zone_end_pfn;
    >> + struct zone *zone;
    >> + unsigned long flags;
    >> +
    >> + /*
    >> + * This doesn't need a lock to do pfn_to_page().
    >> + * The section can't be removed here because of the
    >> + * memory_block->state_sem.
    >> + */
    >> + zone = page_zone(pfn_to_page(start_pfn));
    >> + pgdat_resize_lock(zone->zone_pgdat, &flags);
    >>
    >> zone_span_writelock(zone);
    >>
    >> @@ -149,19 +171,9 @@
    >> zone->zone_start_pfn;
    >>
    >> zone_span_writeunlock(zone);
    >> -}
    >>
    >> -static void grow_pgdat_span(struct pglist_data *pgdat,
    >> - unsigned long start_pfn, unsigned long end_pfn)
    >> -{
    >> - unsigned long old_pgdat_end_pfn =
    >> - pgdat->node_start_pfn + pgdat->node_spanned_pages;
    >> -
    >> - if (start_pfn < pgdat->node_start_pfn)
    >> - pgdat->node_start_pfn = start_pfn;
    >> -
    >> - pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
    >> - pgdat->node_start_pfn;
    >> + grow_pgdat_span(zone->zone_pgdat, start_pfn, end_pfn);
    >> + pgdat_resize_unlock(zone->zone_pgdat, &flags);
    >> }
    >>
    >> static int online_pages_range(unsigned long start_pfn, unsigned long
    >> nr_pages,
    >> @@ -180,16 +192,12 @@
    >> return 0;
    >> }
    >>

    >
    > I don't particularly like this change to have grow_pgdat_span() called
    > *from* grow_zone_span(). Seems backwards to me. But, this diff look
    > funny (not your fault) so I may just be seeing things.
    >


    Last time I posted this patch you complained about my name
    "online_pages_zone", suggesting "grow_zone_span". Since that already
    existed, I took that as a hint to fold the two functions together. Is
    that not what you meant?

    J
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  5. Re: [PATCH 1 of 6] hotplug-memory: refactor online_pages to separate zone growth from page onlining

    Dave Hansen wrote:
    > On Thu, 2008-04-03 at 17:05 -0700, Jeremy Fitzhardinge wrote:
    >
    >> +int prepare_online_pages(unsigned long pfn, unsigned long nr_pages)
    >> +{
    >> + int ret = notify_going_online(pfn, nr_pages);
    >> + if (ret)
    >> + return ret;
    >> +
    >> + grow_zone_span(pfn, pfn+nr_pages);
    >> return 0;
    >> +}
    >>

    >
    > OK, after seeing this used in the Xen driver, I'm even less a fan of the
    > name. Mostly because it doesn't actually prepare *pages* to be onlined.
    > It prepares the zones/pgdats and notifies that pages might soon be
    > online. Can you think of any better names?
    >
    > grow_and_notify...??
    >


    Does it even need to be a separately visible function? Could it just be
    part of add_memory()? Is there any reason delay doing it until
    online_pages()?

    That way online_pages() can return to being the one-stop function to
    online all the pages, making mark_pages_online() redundant.

    > It's also a bit funky because you're calling the online notifiers, but
    > you're not actually onlining the pages, yet. Does that have any
    > repercussions?


    No. It will always call the GOING_ONLINE notifier, but it will only
    call the ONLINE notifier if it actually bulk-onlines all the pages. In
    my page-by-page case, it will never end up calling the ONLINE notifier.
    I could call it repeatedly for each page, but I'm not sure how useful
    that is (the lack of any users of the ONLINE notifier makes it hard to
    judge).

    J
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  6. Re: [PATCH 1 of 6] hotplug-memory: refactor online_pages to separate zone growth from page onlining

    On Thu, 2008-04-03 at 18:20 -0700, Jeremy Fitzhardinge wrote:
    > Last time I posted this patch you complained about my name
    > "online_pages_zone", suggesting "grow_zone_span". Since that already
    > existed, I took that as a hint to fold the two functions together. Is
    > that not what you meant?


    I'm obviously a doofus for suggesting a name that already existed.

    You probably just want a name that lets the caller know that you're
    growing both the pgdats and the zones somehow.

    -- Dave

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  7. Re: [PATCH 1 of 6] hotplug-memory: refactor online_pages to separate zone growth from page onlining

    On Thu, 2008-04-03 at 18:32 -0700, Jeremy Fitzhardinge wrote:
    > Dave Hansen wrote:
    > > On Thu, 2008-04-03 at 17:05 -0700, Jeremy Fitzhardinge wrote:
    > >> +int prepare_online_pages(unsigned long pfn, unsigned long nr_pages)
    > >> +{
    > >> + int ret = notify_going_online(pfn, nr_pages);
    > >> + if (ret)
    > >> + return ret;
    > >> +
    > >> + grow_zone_span(pfn, pfn+nr_pages);
    > >> return 0;
    > >> +}
    > >>

    > >
    > > OK, after seeing this used in the Xen driver, I'm even less a fan of the
    > > name. Mostly because it doesn't actually prepare *pages* to be onlined.
    > > It prepares the zones/pgdats and notifies that pages might soon be
    > > online. Can you think of any better names?
    > >
    > > grow_and_notify...??

    >
    > Does it even need to be a separately visible function? Could it just be
    > part of add_memory()? Is there any reason delay doing it until
    > online_pages()?


    Yeah, the add_memory() itself is supposed to be easily able to be rolled
    back. Say the machine didn't actually need the memory, it could just
    give the memory back. No danger of fragmentation or anything.

    But, at the same time, we don't want to have this memory sitting there
    being unused, but still being accounted for in the VM as real memory.
    It's not in use, and making it look in use (by growing the zone spans)
    could upset the VM watermarks.

    > > It's also a bit funky because you're calling the online notifiers, but
    > > you're not actually onlining the pages, yet. Does that have any
    > > repercussions?

    >
    > No. It will always call the GOING_ONLINE notifier, but it will only
    > call the ONLINE notifier if it actually bulk-onlines all the pages. In
    > my page-by-page case, it will never end up calling the ONLINE notifier.
    > I could call it repeatedly for each page, but I'm not sure how useful
    > that is (the lack of any users of the ONLINE notifier makes it hard to
    > judge).


    Ahhh. You're completely right. I was confused by ONLINE and
    GOING_ONLINE. Thanks for the reminder!

    -- Dave

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread