[PATCH rc5-mm1 1/3] mm-have-zonelist: fix memcg ooms - Kernel

This is a discussion on [PATCH rc5-mm1 1/3] mm-have-zonelist: fix memcg ooms - Kernel ; Memcgroups up against their limit were OOMing unjustifiably on x86_32 with highmem: pages ripe for freeing were not being found. It's because the zonelist changes in vmscan.c now derive zone from gfp_mask, and the gfp mask being passed into try_to_free_mem_cgroup_pages ...

+ Reply to Thread
Results 1 to 7 of 7

Thread: [PATCH rc5-mm1 1/3] mm-have-zonelist: fix memcg ooms

  1. [PATCH rc5-mm1 1/3] mm-have-zonelist: fix memcg ooms

    Memcgroups up against their limit were OOMing unjustifiably on x86_32
    with highmem: pages ripe for freeing were not being found.

    It's because the zonelist changes in vmscan.c now derive zone from gfp_mask,
    and the gfp mask being passed into try_to_free_mem_cgroup_pages is atuned
    to allocating a struct page_cgroup (__GFP_HIGHMEM masked off), whereas here
    we need to be scanning highmem zones.

    Frankenstein a gfp_mask from GFP_HIGHUSER_MOVABLE and the mask passed in.

    Signed-off-by: Hugh Dickins
    ---
    Same error was in -rc3-mm1, sorry for not sending fix in time for -rc5-mm1.
    The error comes 1 or 4 patches earlier, but this patch atop 2.6.25-rc5-mm1
    fits best just after
    mm-have-zonelist-contains-structs-with-both-a-zone-pointer-and-zone_idx.patch

    I've other GFP masking fixes or cleanups to come another day: this fixes
    just one clear bug, peculiar to the -mm tree, orthogonal to the rest.
    But two little do_try_to_free_pages cleanups follow on from this...

    mm/vmscan.c | 6 +++---
    1 file changed, 3 insertions(+), 3 deletions(-)

    --- 2.6.25-rc5-mm1/mm/vmscan.c 2008-03-11 14:19:36.000000000 +0000
    +++ linux/mm/vmscan.c 2008-03-11 20:09:05.000000000 +0000
    @@ -1444,7 +1444,6 @@ unsigned long try_to_free_mem_cgroup_pag
    gfp_t gfp_mask)
    {
    struct scan_control sc = {
    - .gfp_mask = gfp_mask,
    .may_writepage = !laptop_mode,
    .may_swap = 1,
    .swap_cluster_max = SWAP_CLUSTER_MAX,
    @@ -1454,9 +1453,10 @@ unsigned long try_to_free_mem_cgroup_pag
    .isolate_pages = mem_cgroup_isolate_pages,
    };
    struct zonelist *zonelist;
    - int target_zone = gfp_zonelist(GFP_HIGHUSER_MOVABLE);

    - zonelist = &NODE_DATA(numa_node_id())->node_zonelists[target_zone];
    + sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
    + (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
    + zonelist = NODE_DATA(numa_node_id())->node_zonelists;
    if (do_try_to_free_pages(zonelist, sc.gfp_mask, &sc))
    return 1;
    return 0;
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. [PATCH rc5-mm1 3/3] do_try_to_free_pages gfp_mask redundant

    do_try_to_free_pages is taking both a gfp_mask and an sc->gfp_mask,
    sometimes one used and sometimes the other; at present they're always
    identical, but such duplication begs for trouble: remove gfp_mask arg.

    Signed-off-by: Hugh Dickins
    ---

    mm/vmscan.c | 10 +++++-----
    1 file changed, 5 insertions(+), 5 deletions(-)

    --- 2.6.25-rc5-mm1++/mm/vmscan.c 2008-03-11 20:15:47.000000000 +0000
    +++ linux/mm/vmscan.c 2008-03-11 20:21:14.000000000 +0000
    @@ -1324,7 +1324,7 @@ static unsigned long shrink_zones(int pr
    * allocation attempt will fail.
    */
    static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
    - gfp_t gfp_mask, struct scan_control *sc)
    + struct scan_control *sc)
    {
    int priority;
    int ret = 0;
    @@ -1334,7 +1334,7 @@ static unsigned long do_try_to_free_page
    unsigned long lru_pages = 0;
    struct zoneref *z;
    struct zone *zone;
    - enum zone_type high_zoneidx = gfp_zone(gfp_mask);
    + enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);

    if (scan_global_lru(sc))
    count_vm_event(ALLOCSTALL);
    @@ -1363,7 +1363,7 @@ static unsigned long do_try_to_free_page
    * over limit cgroups
    */
    if (scan_global_lru(sc)) {
    - shrink_slab(sc->nr_scanned, gfp_mask, lru_pages);
    + shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
    if (reclaim_state) {
    nr_reclaimed += reclaim_state->reclaimed_slab;
    reclaim_state->reclaimed_slab = 0;
    @@ -1435,7 +1435,7 @@ unsigned long try_to_free_pages(struct z
    .isolate_pages = isolate_pages_global,
    };

    - return do_try_to_free_pages(zonelist, gfp_mask, &sc);
    + return do_try_to_free_pages(zonelist, &sc);
    }

    #ifdef CONFIG_CGROUP_MEM_RES_CTLR
    @@ -1457,7 +1457,7 @@ unsigned long try_to_free_mem_cgroup_pag
    sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
    (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
    zonelist = NODE_DATA(numa_node_id())->node_zonelists;
    - return do_try_to_free_pages(zonelist, sc.gfp_mask, &sc);
    + return do_try_to_free_pages(zonelist, &sc);
    }
    #endif

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  3. Re: [PATCH rc5-mm1 1/3] mm-have-zonelist: fix memcg ooms

    Hugh Dickins wrote:
    > Memcgroups up against their limit were OOMing unjustifiably on x86_32
    > with highmem: pages ripe for freeing were not being found.
    >
    > It's because the zonelist changes in vmscan.c now derive zone from gfp_mask,
    > and the gfp mask being passed into try_to_free_mem_cgroup_pages is atuned
    > to allocating a struct page_cgroup (__GFP_HIGHMEM masked off), whereas here
    > we need to be scanning highmem zones.
    >


    Very good catch. I see a high_zoneidx field being derived from gfp_mask -mm.
    I've been looking at mainline code too often.


    --
    Warm Regards,
    Balbir Singh
    Linux Technology Center
    IBM, ISTL

    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  4. Re: [PATCH rc5-mm1 1/3] mm-have-zonelist: fix memcg ooms

    On (11/03/08 21:12), Hugh Dickins didst pronounce:
    > Memcgroups up against their limit were OOMing unjustifiably on x86_32
    > with highmem: pages ripe for freeing were not being found.
    >
    > It's because the zonelist changes in vmscan.c now derive zone from gfp_mask,
    > and the gfp mask being passed into try_to_free_mem_cgroup_pages is atuned
    > to allocating a struct page_cgroup (__GFP_HIGHMEM masked off), whereas here
    > we need to be scanning highmem zones.
    >
    > Frankenstein a gfp_mask from GFP_HIGHUSER_MOVABLE and the mask passed in.
    >


    Well spotted, but I have one query below.

    > Signed-off-by: Hugh Dickins
    > ---
    > Same error was in -rc3-mm1, sorry for not sending fix in time for -rc5-mm1.
    > The error comes 1 or 4 patches earlier, but this patch atop 2.6.25-rc5-mm1
    > fits best just after
    > mm-have-zonelist-contains-structs-with-both-a-zone-pointer-and-zone_idx.patch
    >
    > I've other GFP masking fixes or cleanups to come another day: this fixes
    > just one clear bug, peculiar to the -mm tree, orthogonal to the rest.
    > But two little do_try_to_free_pages cleanups follow on from this...
    >
    > mm/vmscan.c | 6 +++---
    > 1 file changed, 3 insertions(+), 3 deletions(-)
    >
    > --- 2.6.25-rc5-mm1/mm/vmscan.c 2008-03-11 14:19:36.000000000 +0000
    > +++ linux/mm/vmscan.c 2008-03-11 20:09:05.000000000 +0000
    > @@ -1444,7 +1444,6 @@ unsigned long try_to_free_mem_cgroup_pag
    > gfp_t gfp_mask)
    > {
    > struct scan_control sc = {
    > - .gfp_mask = gfp_mask,
    > .may_writepage = !laptop_mode,
    > .may_swap = 1,
    > .swap_cluster_max = SWAP_CLUSTER_MAX,
    > @@ -1454,9 +1453,10 @@ unsigned long try_to_free_mem_cgroup_pag
    > .isolate_pages = mem_cgroup_isolate_pages,
    > };
    > struct zonelist *zonelist;
    > - int target_zone = gfp_zonelist(GFP_HIGHUSER_MOVABLE);
    >
    > - zonelist = &NODE_DATA(numa_node_id())->node_zonelists[target_zone];
    > + sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
    > + (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
    > + zonelist = NODE_DATA(numa_node_id())->node_zonelists;


    While it is clear you are setting the mask to include HIGHMEM-related flags,
    it's not as clear to me why you alter the zonelist as well. target_zone was
    already based on HIGHMEM so what are you fixing there?

    It should still work as ->node_zonelists[0] is a zonelist suitable for node
    fallback as opposed to node_zonelists[1] which is for GFP_THISNODE but
    maybe this was not quite what you intended?

    Probably something obvious that will hit me the second I push send

    > if (do_try_to_free_pages(zonelist, sc.gfp_mask, &sc))
    > return 1;
    > return 0;
    >


    --
    Mel Gorman
    Part-time Phd Student Linux Technology Center
    University of Limerick IBM Dublin Software Lab
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  5. Re: [PATCH rc5-mm1 1/3] mm-have-zonelist: fix memcg ooms

    On Wed, 12 Mar 2008, Mel Gorman wrote:
    > On (11/03/08 21:12), Hugh Dickins didst pronounce:
    > > @@ -1454,9 +1453,10 @@ unsigned long try_to_free_mem_cgroup_pag
    > > .isolate_pages = mem_cgroup_isolate_pages,
    > > };
    > > struct zonelist *zonelist;
    > > - int target_zone = gfp_zonelist(GFP_HIGHUSER_MOVABLE);
    > >
    > > - zonelist = &NODE_DATA(numa_node_id())->node_zonelists[target_zone];
    > > + sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
    > > + (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
    > > + zonelist = NODE_DATA(numa_node_id())->node_zonelists;

    >
    > While it is clear you are setting the mask to include HIGHMEM-related flags,
    > it's not as clear to me why you alter the zonelist as well. target_zone was
    > already based on HIGHMEM so what are you fixing there?
    >
    > It should still work as ->node_zonelists[0] is a zonelist suitable for node
    > fallback as opposed to node_zonelists[1] which is for GFP_THISNODE but
    > maybe this was not quite what you intended?
    >
    > Probably something obvious that will hit me the second I push send


    That bit wasn't a fix as such, it just came from not wanting to repeat
    GFP_HIGHUSER_MOVABLE in there: after I'd looked at gfp_zonelist, it
    appeared to be redundant in this context, so I preferred to cut out
    the target_zone, and let sc.gfp_mask handle it all. That worries you?

    Hugh
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  6. Re: [PATCH rc5-mm1 1/3] mm-have-zonelist: fix memcg ooms

    On (12/03/08 13:34), Hugh Dickins didst pronounce:
    > On Wed, 12 Mar 2008, Mel Gorman wrote:
    > > On (11/03/08 21:12), Hugh Dickins didst pronounce:
    > > > @@ -1454,9 +1453,10 @@ unsigned long try_to_free_mem_cgroup_pag
    > > > .isolate_pages = mem_cgroup_isolate_pages,
    > > > };
    > > > struct zonelist *zonelist;
    > > > - int target_zone = gfp_zonelist(GFP_HIGHUSER_MOVABLE);
    > > >
    > > > - zonelist = &NODE_DATA(numa_node_id())->node_zonelists[target_zone];
    > > > + sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
    > > > + (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
    > > > + zonelist = NODE_DATA(numa_node_id())->node_zonelists;

    > >
    > > While it is clear you are setting the mask to include HIGHMEM-related flags,
    > > it's not as clear to me why you alter the zonelist as well. target_zone was
    > > already based on HIGHMEM so what are you fixing there?
    > >
    > > It should still work as ->node_zonelists[0] is a zonelist suitable for node
    > > fallback as opposed to node_zonelists[1] which is for GFP_THISNODE but
    > > maybe this was not quite what you intended?
    > >
    > > Probably something obvious that will hit me the second I push send

    >
    > That bit wasn't a fix as such, it just came from not wanting to repeat
    > GFP_HIGHUSER_MOVABLE in there: after I'd looked at gfp_zonelist, it
    > appeared to be redundant in this context, so I preferred to cut out
    > the target_zone, and let sc.gfp_mask handle it all. That worries you?
    >


    Only a little. If the layout of node_zonelists[] changes so that [0] does not
    contain the node fallback list, it may cause a problem but I can't imagine why
    such a situation would occur either. My initial concern was because I couldn't
    see what difference the change made so assumed I must be missing something.

    --
    Mel Gorman
    Part-time Phd Student Linux Technology Center
    University of Limerick IBM Dublin Software Lab
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  7. Re: [PATCH rc5-mm1 3/3] do_try_to_free_pages gfp_mask redundant

    On (11/03/08 21:16), Hugh Dickins didst pronounce:
    > do_try_to_free_pages is taking both a gfp_mask and an sc->gfp_mask,
    > sometimes one used and sometimes the other; at present they're always
    > identical, but such duplication begs for trouble: remove gfp_mask arg.
    >
    > Signed-off-by: Hugh Dickins


    Acked-by: Mel Gorman

    > ---
    >
    > mm/vmscan.c | 10 +++++-----
    > 1 file changed, 5 insertions(+), 5 deletions(-)
    >
    > --- 2.6.25-rc5-mm1++/mm/vmscan.c 2008-03-11 20:15:47.000000000 +0000
    > +++ linux/mm/vmscan.c 2008-03-11 20:21:14.000000000 +0000
    > @@ -1324,7 +1324,7 @@ static unsigned long shrink_zones(int pr
    > * allocation attempt will fail.
    > */
    > static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
    > - gfp_t gfp_mask, struct scan_control *sc)
    > + struct scan_control *sc)
    > {
    > int priority;
    > int ret = 0;
    > @@ -1334,7 +1334,7 @@ static unsigned long do_try_to_free_page
    > unsigned long lru_pages = 0;
    > struct zoneref *z;
    > struct zone *zone;
    > - enum zone_type high_zoneidx = gfp_zone(gfp_mask);
    > + enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
    >
    > if (scan_global_lru(sc))
    > count_vm_event(ALLOCSTALL);
    > @@ -1363,7 +1363,7 @@ static unsigned long do_try_to_free_page
    > * over limit cgroups
    > */
    > if (scan_global_lru(sc)) {
    > - shrink_slab(sc->nr_scanned, gfp_mask, lru_pages);
    > + shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
    > if (reclaim_state) {
    > nr_reclaimed += reclaim_state->reclaimed_slab;
    > reclaim_state->reclaimed_slab = 0;
    > @@ -1435,7 +1435,7 @@ unsigned long try_to_free_pages(struct z
    > .isolate_pages = isolate_pages_global,
    > };
    >
    > - return do_try_to_free_pages(zonelist, gfp_mask, &sc);
    > + return do_try_to_free_pages(zonelist, &sc);
    > }
    >
    > #ifdef CONFIG_CGROUP_MEM_RES_CTLR
    > @@ -1457,7 +1457,7 @@ unsigned long try_to_free_mem_cgroup_pag
    > sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
    > (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
    > zonelist = NODE_DATA(numa_node_id())->node_zonelists;
    > - return do_try_to_free_pages(zonelist, sc.gfp_mask, &sc);
    > + return do_try_to_free_pages(zonelist, &sc);
    > }
    > #endif
    >
    >


    --
    Mel Gorman
    Part-time Phd Student Linux Technology Center
    University of Limerick IBM Dublin Software Lab
    --
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread