From 7e96fb5710a806fd41d2eb6b41bac14a26a094e2 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 6 Apr 2020 20:03:43 -0700 Subject: mm/vma: add missing VMA flag readable name for VM_SYNC Patch series "mm/vma: Use all available wrappers when possible", v2. Apart from adding a VMA flag readable name for trace purpose, this series does some open encoding replacements with availabe VMA specific wrappers. This skips VM_HUGETLB check in vma_migratable() as its already being done with another patch (https://patchwork.kernel.org/patch/11347831/) which is yet to be merged. This patch (of 4): This just adds the missing readable name for VM_SYNC. Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Cc: Vlastimil Babka Cc: Steven Rostedt Cc: Ingo Molnar Cc: Alexander Viro Cc: Andy Lutomirski Cc: "Aneesh Kumar K.V" Cc: Arnaldo Carvalho de Melo Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Dave Hansen Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Mel Gorman Cc: Michael Ellerman Cc: Nick Piggin Cc: Paul Burton Cc: Paul Mackerras Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Rich Felker Cc: Thomas Gleixner Cc: Will Deacon Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/1582520593-30704-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- include/trace/events/mmflags.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/trace') diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index a1675d43777e..5fb752034386 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -154,6 +154,7 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" ) {VM_ACCOUNT, "account" }, \ {VM_NORESERVE, "noreserve" }, \ {VM_HUGETLB, "hugetlb" }, \ + {VM_SYNC, "sync" }, \ __VM_ARCH_SPECIFIC_1 , \ {VM_WIPEONFORK, "wipeonfork" }, \ {VM_DONTDUMP, "dontdump" }, \ -- cgit v1.2.3 From 9de4f22a60f731943f050f4448bf2933ed3fa70b Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 6 Apr 2020 20:04:41 -0700 Subject: mm: code cleanup for MADV_FREE Some comments for MADV_FREE is revised and added to help people understand the MADV_FREE code, especially the page flag, PG_swapbacked. This makes page_is_file_cache() isn't consistent with its comments. So the function is renamed to page_is_file_lru() to make them consistent again. All these are put in one patch as one logical change. Suggested-by: David Hildenbrand Suggested-by: Johannes Weiner Suggested-by: David Rientjes Signed-off-by: "Huang, Ying" Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Acked-by: David Rientjes Acked-by: Michal Hocko Acked-by: Pankaj Gupta Acked-by: Vlastimil Babka Cc: Dave Hansen Cc: Mel Gorman Cc: Minchan Kim Cc: Hugh Dickins Cc: Rik van Riel Link: http://lkml.kernel.org/r/20200317100342.2730705-1-ying.huang@intel.com Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 15 ++++++++------- include/linux/page-flags.h | 5 +++++ include/trace/events/vmscan.h | 2 +- mm/compaction.c | 2 +- mm/gup.c | 2 +- mm/khugepaged.c | 4 ++-- mm/memory-failure.c | 2 +- mm/memory_hotplug.c | 2 +- mm/mempolicy.c | 2 +- mm/migrate.c | 16 ++++++++-------- mm/mprotect.c | 2 +- mm/swap.c | 16 ++++++++-------- mm/vmscan.c | 12 ++++++------ 13 files changed, 44 insertions(+), 38 deletions(-) (limited to 'include/trace') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 6f2fef7b0784..219bef41d87c 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -6,19 +6,20 @@ #include /** - * page_is_file_cache - should the page be on a file LRU or anon LRU? + * page_is_file_lru - should the page be on a file LRU or anon LRU? * @page: the page to test * - * Returns 1 if @page is page cache page backed by a regular filesystem, - * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed. - * Used by functions that manipulate the LRU lists, to sort a page - * onto the right LRU list. + * Returns 1 if @page is a regular filesystem backed page cache page or a lazily + * freed anonymous page (e.g. via MADV_FREE). Returns 0 if @page is a normal + * anonymous page, a tmpfs page or otherwise ram or swap backed page. Used by + * functions that manipulate the LRU lists, to sort a page onto the right LRU + * list. * * We would like to get this info without a page flag, but the state * needs to survive until the page is last deleted from the LRU, which * could be as far down as __page_cache_release. */ -static inline int page_is_file_cache(struct page *page) +static inline int page_is_file_lru(struct page *page) { return !PageSwapBacked(page); } @@ -75,7 +76,7 @@ static __always_inline void del_page_from_lru_list(struct page *page, */ static inline enum lru_list page_lru_base_type(struct page *page) { - if (page_is_file_cache(page)) + if (page_is_file_lru(page)) return LRU_INACTIVE_FILE; return LRU_INACTIVE_ANON; } diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 77de28bfefb0..acf7988fd640 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -63,6 +63,11 @@ * page_waitqueue(page) is a wait queue of all tasks waiting for the page * to become unlocked. * + * PG_swapbacked is set when a page uses swap as a backing storage. This are + * usually PageAnon or shmem pages but please note that even anonymous pages + * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as + * a result of MADV_FREE). + * * PG_uptodate tells whether the page's contents is valid. When a read * completes, the page becomes uptodate, unless a disk I/O error happened. * diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index a5ab2973e8dc..74bb594ccb25 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -323,7 +323,7 @@ TRACE_EVENT(mm_vmscan_writepage, TP_fast_assign( __entry->pfn = page_to_pfn(page); __entry->reclaim_flags = trace_reclaim_flags( - page_is_file_cache(page)); + page_is_file_lru(page)); ), TP_printk("page=%p pfn=%lu flags=%s", diff --git a/mm/compaction.c b/mm/compaction.c index df3da2f76fdc..46af63eb8212 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -989,7 +989,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* Successfully isolated */ del_page_from_lru_list(page, lruvec, page_lru(page)); mod_node_page_state(page_pgdat(page), - NR_ISOLATED_ANON + page_is_file_cache(page), + NR_ISOLATED_ANON + page_is_file_lru(page), hpage_nr_pages(page)); isolate_success: diff --git a/mm/gup.c b/mm/gup.c index 96af7e08db4b..b185377c38b7 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1677,7 +1677,7 @@ check_again: list_add_tail(&head->lru, &cma_page_list); mod_node_page_state(page_pgdat(head), NR_ISOLATED_ANON + - page_is_file_cache(head), + page_is_file_lru(head), hpage_nr_pages(head)); } } diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b1d9a8e189b8..3afc1e2d7a55 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -511,7 +511,7 @@ void __khugepaged_exit(struct mm_struct *mm) static void release_pte_page(struct page *page) { - dec_node_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); + dec_node_page_state(page, NR_ISOLATED_ANON + page_is_file_lru(page)); unlock_page(page); putback_lru_page(page); } @@ -611,7 +611,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, goto out; } inc_node_page_state(page, - NR_ISOLATED_ANON + page_is_file_cache(page)); + NR_ISOLATED_ANON + page_is_file_lru(page)); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageLRU(page), page); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 1c961cd26c0b..a96364be8ab4 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1810,7 +1810,7 @@ static int __soft_offline_page(struct page *page, int flags) */ if (!__PageMovable(page)) inc_node_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); + page_is_file_lru(page)); list_add(&page->lru, &pagelist); ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, MIGRATE_SYNC, MR_MEMORY_FAILURE); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 19389cdc16a5..005eab3411e5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1317,7 +1317,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) list_add_tail(&page->lru, &source); if (!__PageMovable(page)) inc_node_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); + page_is_file_lru(page)); } else { pr_warn("failed to isolate pfn %lx\n", pfn); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b36926ba02e2..037e5f548118 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1022,7 +1022,7 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist, if (!isolate_lru_page(head)) { list_add_tail(&head->lru, pagelist); mod_node_page_state(page_pgdat(head), - NR_ISOLATED_ANON + page_is_file_cache(head), + NR_ISOLATED_ANON + page_is_file_lru(head), hpage_nr_pages(head)); } else if (flags & MPOL_MF_STRICT) { /* diff --git a/mm/migrate.c b/mm/migrate.c index 1a205503be3f..c1412e04975e 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -193,7 +193,7 @@ void putback_movable_pages(struct list_head *l) put_page(page); } else { mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + - page_is_file_cache(page), -hpage_nr_pages(page)); + page_is_file_lru(page), -hpage_nr_pages(page)); putback_lru_page(page); } } @@ -1219,7 +1219,7 @@ out: */ if (likely(!__PageMovable(page))) mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + - page_is_file_cache(page), -hpage_nr_pages(page)); + page_is_file_lru(page), -hpage_nr_pages(page)); } /* @@ -1592,7 +1592,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, err = 1; list_add_tail(&head->lru, pagelist); mod_node_page_state(page_pgdat(head), - NR_ISOLATED_ANON + page_is_file_cache(head), + NR_ISOLATED_ANON + page_is_file_lru(head), hpage_nr_pages(head)); } out_putpage: @@ -1955,7 +1955,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) return 0; } - page_lru = page_is_file_cache(page); + page_lru = page_is_file_lru(page); mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru, hpage_nr_pages(page)); @@ -1991,7 +1991,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, * Don't migrate file pages that are mapped in multiple processes * with execute permissions as they are probably shared libraries. */ - if (page_mapcount(page) != 1 && page_is_file_cache(page) && + if (page_mapcount(page) != 1 && page_is_file_lru(page) && (vma->vm_flags & VM_EXEC)) goto out; @@ -1999,7 +1999,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, * Also do not migrate dirty pages as not all filesystems can move * dirty pages in MIGRATE_ASYNC mode which is a waste of cycles. */ - if (page_is_file_cache(page) && PageDirty(page)) + if (page_is_file_lru(page) && PageDirty(page)) goto out; isolated = numamigrate_isolate_page(pgdat, page); @@ -2014,7 +2014,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, if (!list_empty(&migratepages)) { list_del(&page->lru); dec_node_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); + page_is_file_lru(page)); putback_lru_page(page); } isolated = 0; @@ -2044,7 +2044,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, pg_data_t *pgdat = NODE_DATA(node); int isolated = 0; struct page *new_page = NULL; - int page_lru = page_is_file_cache(page); + int page_lru = page_is_file_lru(page); unsigned long start = address & HPAGE_PMD_MASK; new_page = alloc_pages_node(node, diff --git a/mm/mprotect.c b/mm/mprotect.c index 311c0dadf71c..0fee14b39416 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -98,7 +98,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, * it cannot move them all from MIGRATE_ASYNC * context. */ - if (page_is_file_cache(page) && PageDirty(page)) + if (page_is_file_lru(page) && PageDirty(page)) continue; /* diff --git a/mm/swap.c b/mm/swap.c index a4af8c999963..18505990c3b1 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -276,7 +276,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec, void *arg) { if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { - int file = page_is_file_cache(page); + int file = page_is_file_lru(page); int lru = page_lru_base_type(page); del_page_from_lru_list(page, lruvec, lru); @@ -394,7 +394,7 @@ void mark_page_accessed(struct page *page) else __lru_cache_activate_page(page); ClearPageReferenced(page); - if (page_is_file_cache(page)) + if (page_is_file_lru(page)) workingset_activation(page); } if (page_is_idle(page)) @@ -515,7 +515,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, return; active = PageActive(page); - file = page_is_file_cache(page); + file = page_is_file_lru(page); lru = page_lru_base_type(page); del_page_from_lru_list(page, lruvec, lru + active); @@ -548,7 +548,7 @@ static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, void *arg) { if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { - int file = page_is_file_cache(page); + int file = page_is_file_lru(page); int lru = page_lru_base_type(page); del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE); @@ -573,9 +573,9 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, ClearPageActive(page); ClearPageReferenced(page); /* - * lazyfree pages are clean anonymous pages. They have - * SwapBacked flag cleared to distinguish normal anonymous - * pages + * Lazyfree pages are clean anonymous pages. They have + * PG_swapbacked flag cleared, to distinguish them from normal + * anonymous pages */ ClearPageSwapBacked(page); add_page_to_lru_list(page, lruvec, LRU_INACTIVE_FILE); @@ -962,7 +962,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, if (page_evictable(page)) { lru = page_lru(page); - update_page_reclaim_stat(lruvec, page_is_file_cache(page), + update_page_reclaim_stat(lruvec, page_is_file_lru(page), PageActive(page)); if (was_unevictable) count_vm_event(UNEVICTABLE_PGRESCUED); diff --git a/mm/vmscan.c b/mm/vmscan.c index 2e8e690d2813..b06868fc4926 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -919,7 +919,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, * exceptional entries and shadow exceptional entries in the * same address_space. */ - if (reclaimed && page_is_file_cache(page) && + if (reclaimed && page_is_file_lru(page) && !mapping_exiting(mapping) && !dax_mapping(mapping)) shadow = workingset_eviction(page, target_memcg); __delete_from_page_cache(page, shadow); @@ -1043,7 +1043,7 @@ static void page_check_dirty_writeback(struct page *page, * Anonymous pages are not handled by flushers and must be written * from reclaim context. Do not stall reclaim based on them */ - if (!page_is_file_cache(page) || + if (!page_is_file_lru(page) || (PageAnon(page) && !PageSwapBacked(page))) { *dirty = false; *writeback = false; @@ -1315,7 +1315,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, * the rest of the LRU for clean pages and see * the same dirty pages again (PageReclaim). */ - if (page_is_file_cache(page) && + if (page_is_file_lru(page) && (!current_is_kswapd() || !PageReclaim(page) || !test_bit(PGDAT_DIRTY, &pgdat->flags))) { /* @@ -1459,7 +1459,7 @@ activate_locked: try_to_free_swap(page); VM_BUG_ON_PAGE(PageActive(page), page); if (!PageMlocked(page)) { - int type = page_is_file_cache(page); + int type = page_is_file_lru(page); SetPageActive(page); stat->nr_activate[type] += nr_pages; count_memcg_page_event(page, PGACTIVATE); @@ -1497,7 +1497,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, LIST_HEAD(clean_pages); list_for_each_entry_safe(page, next, page_list, lru) { - if (page_is_file_cache(page) && !PageDirty(page) && + if (page_is_file_lru(page) && !PageDirty(page) && !__PageMovable(page) && !PageUnevictable(page)) { ClearPageActive(page); list_move(&page->lru, &clean_pages); @@ -2053,7 +2053,7 @@ static void shrink_active_list(unsigned long nr_to_scan, * IO, plus JVM can create lots of anon VM_EXEC pages, * so we ignore them here. */ - if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) { + if ((vm_flags & VM_EXEC) && page_is_file_lru(page)) { list_add(&page->lru, &l_active); continue; } -- cgit v1.2.3 From e1e267c7928fe387e5e1cffeafb0de2d0473663a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 6 Apr 2020 20:06:04 -0700 Subject: khugepaged: skip collapse if uffd-wp detected Don't collapse the huge PMD if there is any userfault write protected small PTEs. The problem is that the write protection is in small page granularity and there's no way to keep all these write protection information if the small pages are going to be merged into a huge PMD. The same thing needs to be considered for swap entries and migration entries. So do the check as well disregarding khugepaged_max_ptes_swap. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: Jerome Glisse Reviewed-by: Mike Rapoport Cc: Andrea Arcangeli Cc: Bobby Powers Cc: Brian Geffon Cc: David Hildenbrand Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Hugh Dickins Cc: Johannes Weiner Cc: "Kirill A . Shutemov" Cc: Martin Cracauer Cc: Marty McFadden Cc: Maya Gokhale Cc: Mel Gorman Cc: Mike Kravetz Cc: Pavel Emelyanov Cc: Rik van Riel Cc: Shaohua Li Link: http://lkml.kernel.org/r/20200220163112.11409-12-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/trace/events/huge_memory.h | 1 + mm/khugepaged.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index d82a0f4e824d..70e32ff096ec 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -13,6 +13,7 @@ EM( SCAN_PMD_NULL, "pmd_null") \ EM( SCAN_EXCEED_NONE_PTE, "exceed_none_pte") \ EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \ + EM( SCAN_PTE_UFFD_WP, "pte_uffd_wp") \ EM( SCAN_PAGE_RO, "no_writable_page") \ EM( SCAN_LACK_REFERENCED_PAGE, "lack_referenced_page") \ EM( SCAN_PAGE_NULL, "page_null") \ diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 3afc1e2d7a55..99d77ffb79c2 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -29,6 +29,7 @@ enum scan_result { SCAN_PMD_NULL, SCAN_EXCEED_NONE_PTE, SCAN_PTE_NON_PRESENT, + SCAN_PTE_UFFD_WP, SCAN_PAGE_RO, SCAN_LACK_REFERENCED_PAGE, SCAN_PAGE_NULL, @@ -1137,6 +1138,15 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, pte_t pteval = *_pte; if (is_swap_pte(pteval)) { if (++unmapped <= khugepaged_max_ptes_swap) { + /* + * Always be strict with uffd-wp + * enabled swap entries. Please see + * comment below for pte_uffd_wp(). + */ + if (pte_swp_uffd_wp(pteval)) { + result = SCAN_PTE_UFFD_WP; + goto out_unmap; + } continue; } else { result = SCAN_EXCEED_SWAP_PTE; @@ -1156,6 +1166,19 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, result = SCAN_PTE_NON_PRESENT; goto out_unmap; } + if (pte_uffd_wp(pteval)) { + /* + * Don't collapse the page if any of the small + * PTEs are armed with uffd write protection. + * Here we can also mark the new huge pmd as + * write protected if any of the small ones is + * marked but that could bring uknown + * userfault messages that falls outside of + * the registered range. So, just be simple. + */ + result = SCAN_PTE_UFFD_WP; + goto out_unmap; + } if (pte_write(pteval)) writable = true; -- cgit v1.2.3