Removed upstreamed: backport-5.15/883-v6.1-ca8210-Fix-crash-by-zero-initializing-data.patch[1] All patches automatically rebased 1. https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=v5.15.83&id=246bcd05ba6cc43b34ac0bb4bac3ea94a4efa07c Build system: x86_64 Build-tested: bcm2711/RPi4B Run-tested: bcm2711/RPi4B Signed-off-by: John Audia <therealgraysky@proton.me>
		
			
				
	
	
		
			997 lines
		
	
	
		
			30 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			997 lines
		
	
	
		
			30 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
From 05f366c941ae2bb8ba21c79fafcb747a5a6b967b Mon Sep 17 00:00:00 2001
 | 
						|
From: Yu Zhao <yuzhao@google.com>
 | 
						|
Date: Mon, 25 Jan 2021 21:12:33 -0700
 | 
						|
Subject: [PATCH 04/10] mm: multigenerational lru: groundwork
 | 
						|
 | 
						|
For each lruvec, evictable pages are divided into multiple
 | 
						|
generations. The youngest generation number is stored in
 | 
						|
lrugen->max_seq for both anon and file types as they are aged on an
 | 
						|
equal footing. The oldest generation numbers are stored in
 | 
						|
lrugen->min_seq[] separately for anon and file types as clean file
 | 
						|
pages can be evicted regardless of swap constraints. These three
 | 
						|
variables are monotonically increasing. Generation numbers are
 | 
						|
truncated into order_base_2(MAX_NR_GENS+1) bits in order to fit into
 | 
						|
page->flags. The sliding window technique is used to prevent truncated
 | 
						|
generation numbers from overlapping. Each truncated generation number
 | 
						|
is an index to
 | 
						|
lrugen->lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES].
 | 
						|
 | 
						|
The framework comprises two conceptually independent components: the
 | 
						|
aging, which produces young generations, and the eviction, which
 | 
						|
consumes old generations. Both can be invoked independently from user
 | 
						|
space for the purpose of working set estimation and proactive reclaim.
 | 
						|
 | 
						|
The protection of hot pages and the selection of cold pages are based
 | 
						|
on page access types and patterns. There are two access types: one via
 | 
						|
page tables and the other via file descriptors. The protection of the
 | 
						|
former type is by design stronger because:
 | 
						|
  1) The uncertainty in determining the access patterns of the former
 | 
						|
  type is higher due to the coalesced nature of the accessed bit.
 | 
						|
  2) The cost of evicting the former type is higher due to the TLB
 | 
						|
  flushes required and the likelihood of involving I/O.
 | 
						|
  3) The penalty of under-protecting the former type is higher because
 | 
						|
  applications usually do not prepare themselves for major faults like
 | 
						|
  they do for blocked I/O. For example, client applications commonly
 | 
						|
  dedicate blocked I/O to separate threads to avoid UI janks that
 | 
						|
  negatively affect user experience.
 | 
						|
 | 
						|
There are also two access patterns: one with temporal locality and the
 | 
						|
other without. The latter pattern, e.g., random and sequential, needs
 | 
						|
to be explicitly excluded to avoid weakening the protection of the
 | 
						|
former pattern. Generally the former type follows the former pattern
 | 
						|
unless MADV_SEQUENTIAL is specified and the latter type follows the
 | 
						|
latter pattern unless outlying refaults have been observed.
 | 
						|
 | 
						|
Upon faulting, a page is added to the youngest generation, which
 | 
						|
provides the strongest protection as the eviction will not consider
 | 
						|
this page before the aging has scanned it at least twice. The first
 | 
						|
scan clears the accessed bit set during the initial fault. And the
 | 
						|
second scan makes sure this page has not been used since the first
 | 
						|
scan. A page from any other generations is brought back to the
 | 
						|
youngest generation whenever the aging finds the accessed bit set on
 | 
						|
any of the PTEs mapping this page.
 | 
						|
 | 
						|
Unmapped pages are initially added to the oldest generation and then
 | 
						|
conditionally protected by tiers. This is done later [PATCH 07/10].
 | 
						|
 | 
						|
Signed-off-by: Yu Zhao <yuzhao@google.com>
 | 
						|
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
						|
Change-Id: I71de7cd15b8dfa6f9fdd838023474693c4fee0a7
 | 
						|
---
 | 
						|
 fs/fuse/dev.c                     |   3 +-
 | 
						|
 include/linux/cgroup.h            |  15 +-
 | 
						|
 include/linux/mm.h                |  36 ++++
 | 
						|
 include/linux/mm_inline.h         | 182 ++++++++++++++++++++
 | 
						|
 include/linux/mmzone.h            |  70 ++++++++
 | 
						|
 include/linux/page-flags-layout.h |  19 ++-
 | 
						|
 include/linux/page-flags.h        |   4 +-
 | 
						|
 include/linux/sched.h             |   3 +
 | 
						|
 kernel/bounds.c                   |   3 +
 | 
						|
 kernel/cgroup/cgroup-internal.h   |   1 -
 | 
						|
 mm/huge_memory.c                  |   3 +-
 | 
						|
 mm/memcontrol.c                   |   1 +
 | 
						|
 mm/memory.c                       |   7 +
 | 
						|
 mm/mm_init.c                      |   6 +-
 | 
						|
 mm/page_alloc.c                   |   1 +
 | 
						|
 mm/swap.c                         |   9 +-
 | 
						|
 mm/swapfile.c                     |   2 +
 | 
						|
 mm/vmscan.c                       | 268 ++++++++++++++++++++++++++++++
 | 
						|
 18 files changed, 618 insertions(+), 15 deletions(-)
 | 
						|
 | 
						|
--- a/fs/fuse/dev.c
 | 
						|
+++ b/fs/fuse/dev.c
 | 
						|
@@ -785,7 +785,8 @@ static int fuse_check_page(struct page *
 | 
						|
 	       1 << PG_active |
 | 
						|
 	       1 << PG_workingset |
 | 
						|
 	       1 << PG_reclaim |
 | 
						|
-	       1 << PG_waiters))) {
 | 
						|
+	       1 << PG_waiters |
 | 
						|
+	       LRU_GEN_MASK | LRU_REFS_MASK))) {
 | 
						|
 		dump_page(page, "fuse: trying to steal weird page");
 | 
						|
 		return 1;
 | 
						|
 	}
 | 
						|
--- a/include/linux/cgroup.h
 | 
						|
+++ b/include/linux/cgroup.h
 | 
						|
@@ -433,6 +433,18 @@ static inline void cgroup_put(struct cgr
 | 
						|
 	css_put(&cgrp->self);
 | 
						|
 }
 | 
						|
 
 | 
						|
+extern struct mutex cgroup_mutex;
 | 
						|
+
 | 
						|
+static inline void cgroup_lock(void)
 | 
						|
+{
 | 
						|
+	mutex_lock(&cgroup_mutex);
 | 
						|
+}
 | 
						|
+
 | 
						|
+static inline void cgroup_unlock(void)
 | 
						|
+{
 | 
						|
+	mutex_unlock(&cgroup_mutex);
 | 
						|
+}
 | 
						|
+
 | 
						|
 /**
 | 
						|
  * task_css_set_check - obtain a task's css_set with extra access conditions
 | 
						|
  * @task: the task to obtain css_set for
 | 
						|
@@ -447,7 +459,6 @@ static inline void cgroup_put(struct cgr
 | 
						|
  * as locks used during the cgroup_subsys::attach() methods.
 | 
						|
  */
 | 
						|
 #ifdef CONFIG_PROVE_RCU
 | 
						|
-extern struct mutex cgroup_mutex;
 | 
						|
 extern spinlock_t css_set_lock;
 | 
						|
 #define task_css_set_check(task, __c)					\
 | 
						|
 	rcu_dereference_check((task)->cgroups,				\
 | 
						|
@@ -708,6 +719,8 @@ struct cgroup;
 | 
						|
 static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
 | 
						|
 static inline void css_get(struct cgroup_subsys_state *css) {}
 | 
						|
 static inline void css_put(struct cgroup_subsys_state *css) {}
 | 
						|
+static inline void cgroup_lock(void) {}
 | 
						|
+static inline void cgroup_unlock(void) {}
 | 
						|
 static inline int cgroup_attach_task_all(struct task_struct *from,
 | 
						|
 					 struct task_struct *t) { return 0; }
 | 
						|
 static inline int cgroupstats_build(struct cgroupstats *stats,
 | 
						|
--- a/include/linux/mm.h
 | 
						|
+++ b/include/linux/mm.h
 | 
						|
@@ -1093,6 +1093,8 @@ vm_fault_t finish_mkwrite_fault(struct v
 | 
						|
 #define ZONES_PGOFF		(NODES_PGOFF - ZONES_WIDTH)
 | 
						|
 #define LAST_CPUPID_PGOFF	(ZONES_PGOFF - LAST_CPUPID_WIDTH)
 | 
						|
 #define KASAN_TAG_PGOFF		(LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
 | 
						|
+#define LRU_GEN_PGOFF		(KASAN_TAG_PGOFF - LRU_GEN_WIDTH)
 | 
						|
+#define LRU_REFS_PGOFF		(LRU_GEN_PGOFF - LRU_REFS_WIDTH)
 | 
						|
 
 | 
						|
 /*
 | 
						|
  * Define the bit shifts to access each section.  For non-existent
 | 
						|
@@ -1807,6 +1809,40 @@ static inline void unmap_mapping_range(s
 | 
						|
 		loff_t const holebegin, loff_t const holelen, int even_cows) { }
 | 
						|
 #endif
 | 
						|
 
 | 
						|
+#ifdef CONFIG_LRU_GEN
 | 
						|
+static inline void task_enter_nonseq_fault(void)
 | 
						|
+{
 | 
						|
+	WARN_ON(current->in_nonseq_fault);
 | 
						|
+
 | 
						|
+	current->in_nonseq_fault = 1;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static inline void task_exit_nonseq_fault(void)
 | 
						|
+{
 | 
						|
+	WARN_ON(!current->in_nonseq_fault);
 | 
						|
+
 | 
						|
+	current->in_nonseq_fault = 0;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static inline bool task_in_nonseq_fault(void)
 | 
						|
+{
 | 
						|
+	return current->in_nonseq_fault;
 | 
						|
+}
 | 
						|
+#else
 | 
						|
+static inline void task_enter_nonseq_fault(void)
 | 
						|
+{
 | 
						|
+}
 | 
						|
+
 | 
						|
+static inline void task_exit_nonseq_fault(void)
 | 
						|
+{
 | 
						|
+}
 | 
						|
+
 | 
						|
+static inline bool task_in_nonseq_fault(void)
 | 
						|
+{
 | 
						|
+	return false;
 | 
						|
+}
 | 
						|
+#endif /* CONFIG_LRU_GEN */
 | 
						|
+
 | 
						|
 static inline void unmap_shared_mapping_range(struct address_space *mapping,
 | 
						|
 		loff_t const holebegin, loff_t const holelen)
 | 
						|
 {
 | 
						|
--- a/include/linux/mm_inline.h
 | 
						|
+++ b/include/linux/mm_inline.h
 | 
						|
@@ -79,11 +79,187 @@ static __always_inline enum lru_list pag
 | 
						|
 	return lru;
 | 
						|
 }
 | 
						|
 
 | 
						|
+#ifdef CONFIG_LRU_GEN
 | 
						|
+
 | 
						|
+static inline bool lru_gen_enabled(void)
 | 
						|
+{
 | 
						|
+#ifdef CONFIG_LRU_GEN_ENABLED
 | 
						|
+	DECLARE_STATIC_KEY_TRUE(lru_gen_static_key);
 | 
						|
+
 | 
						|
+	return static_branch_likely(&lru_gen_static_key);
 | 
						|
+#else
 | 
						|
+	DECLARE_STATIC_KEY_FALSE(lru_gen_static_key);
 | 
						|
+
 | 
						|
+	return static_branch_unlikely(&lru_gen_static_key);
 | 
						|
+#endif
 | 
						|
+}
 | 
						|
+
 | 
						|
+/* Return an index within the sliding window that tracks MAX_NR_GENS generations. */
 | 
						|
+static inline int lru_gen_from_seq(unsigned long seq)
 | 
						|
+{
 | 
						|
+	return seq % MAX_NR_GENS;
 | 
						|
+}
 | 
						|
+
 | 
						|
+/* The youngest and the second youngest generations are counted as active. */
 | 
						|
+static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
 | 
						|
+{
 | 
						|
+	unsigned long max_seq = lruvec->evictable.max_seq;
 | 
						|
+
 | 
						|
+	VM_BUG_ON(gen >= MAX_NR_GENS);
 | 
						|
+
 | 
						|
+	return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1);
 | 
						|
+}
 | 
						|
+
 | 
						|
+/* Update the sizes of the multigenerational lru lists. */
 | 
						|
+static inline void lru_gen_update_size(struct page *page, struct lruvec *lruvec,
 | 
						|
+				       int old_gen, int new_gen)
 | 
						|
+{
 | 
						|
+	int type = page_is_file_lru(page);
 | 
						|
+	int zone = page_zonenum(page);
 | 
						|
+	int delta = thp_nr_pages(page);
 | 
						|
+	enum lru_list lru = type * LRU_FILE;
 | 
						|
+	struct lrugen *lrugen = &lruvec->evictable;
 | 
						|
+
 | 
						|
+	lockdep_assert_held(&lruvec->lru_lock);
 | 
						|
+	VM_BUG_ON(old_gen != -1 && old_gen >= MAX_NR_GENS);
 | 
						|
+	VM_BUG_ON(new_gen != -1 && new_gen >= MAX_NR_GENS);
 | 
						|
+	VM_BUG_ON(old_gen == -1 && new_gen == -1);
 | 
						|
+
 | 
						|
+	if (old_gen >= 0)
 | 
						|
+		WRITE_ONCE(lrugen->sizes[old_gen][type][zone],
 | 
						|
+			   lrugen->sizes[old_gen][type][zone] - delta);
 | 
						|
+	if (new_gen >= 0)
 | 
						|
+		WRITE_ONCE(lrugen->sizes[new_gen][type][zone],
 | 
						|
+			   lrugen->sizes[new_gen][type][zone] + delta);
 | 
						|
+
 | 
						|
+	if (old_gen < 0) {
 | 
						|
+		if (lru_gen_is_active(lruvec, new_gen))
 | 
						|
+			lru += LRU_ACTIVE;
 | 
						|
+		update_lru_size(lruvec, lru, zone, delta);
 | 
						|
+		return;
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	if (new_gen < 0) {
 | 
						|
+		if (lru_gen_is_active(lruvec, old_gen))
 | 
						|
+			lru += LRU_ACTIVE;
 | 
						|
+		update_lru_size(lruvec, lru, zone, -delta);
 | 
						|
+		return;
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) {
 | 
						|
+		update_lru_size(lruvec, lru, zone, -delta);
 | 
						|
+		update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta);
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	VM_BUG_ON(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen));
 | 
						|
+}
 | 
						|
+
 | 
						|
+/* Add a page to one of the multigenerational lru lists. Return true on success. */
 | 
						|
+static inline bool lru_gen_add_page(struct page *page, struct lruvec *lruvec, bool reclaiming)
 | 
						|
+{
 | 
						|
+	int gen;
 | 
						|
+	unsigned long old_flags, new_flags;
 | 
						|
+	int type = page_is_file_lru(page);
 | 
						|
+	int zone = page_zonenum(page);
 | 
						|
+	struct lrugen *lrugen = &lruvec->evictable;
 | 
						|
+
 | 
						|
+	if (PageUnevictable(page) || !lrugen->enabled[type])
 | 
						|
+		return false;
 | 
						|
+	/*
 | 
						|
+	 * If a page shouldn't be considered for eviction, i.e., a page mapped
 | 
						|
+	 * upon fault during which the accessed bit is set, add it to the
 | 
						|
+	 * youngest generation.
 | 
						|
+	 *
 | 
						|
+	 * If a page can't be evicted immediately, i.e., an anon page not in
 | 
						|
+	 * swap cache or a dirty page pending writeback, add it to the second
 | 
						|
+	 * oldest generation.
 | 
						|
+	 *
 | 
						|
+	 * If a page could be evicted immediately, e.g., a clean page, add it to
 | 
						|
+	 * the oldest generation.
 | 
						|
+	 */
 | 
						|
+	if (PageActive(page))
 | 
						|
+		gen = lru_gen_from_seq(lrugen->max_seq);
 | 
						|
+	else if ((!type && !PageSwapCache(page)) ||
 | 
						|
+		 (PageReclaim(page) && (PageDirty(page) || PageWriteback(page))))
 | 
						|
+		gen = lru_gen_from_seq(lrugen->min_seq[type] + 1);
 | 
						|
+	else
 | 
						|
+		gen = lru_gen_from_seq(lrugen->min_seq[type]);
 | 
						|
+
 | 
						|
+	do {
 | 
						|
+		new_flags = old_flags = READ_ONCE(page->flags);
 | 
						|
+		VM_BUG_ON_PAGE(new_flags & LRU_GEN_MASK, page);
 | 
						|
+
 | 
						|
+		new_flags &= ~(LRU_GEN_MASK | BIT(PG_active));
 | 
						|
+		new_flags |= (gen + 1UL) << LRU_GEN_PGOFF;
 | 
						|
+	} while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
 | 
						|
+
 | 
						|
+	lru_gen_update_size(page, lruvec, -1, gen);
 | 
						|
+	/* for rotate_reclaimable_page() */
 | 
						|
+	if (reclaiming)
 | 
						|
+		list_add_tail(&page->lru, &lrugen->lists[gen][type][zone]);
 | 
						|
+	else
 | 
						|
+		list_add(&page->lru, &lrugen->lists[gen][type][zone]);
 | 
						|
+
 | 
						|
+	return true;
 | 
						|
+}
 | 
						|
+
 | 
						|
+/* Delete a page from one of the multigenerational lru lists. Return true on success. */
 | 
						|
+static inline bool lru_gen_del_page(struct page *page, struct lruvec *lruvec, bool reclaiming)
 | 
						|
+{
 | 
						|
+	int gen;
 | 
						|
+	unsigned long old_flags, new_flags;
 | 
						|
+
 | 
						|
+	do {
 | 
						|
+		new_flags = old_flags = READ_ONCE(page->flags);
 | 
						|
+		if (!(new_flags & LRU_GEN_MASK))
 | 
						|
+			return false;
 | 
						|
+
 | 
						|
+		VM_BUG_ON_PAGE(PageActive(page), page);
 | 
						|
+		VM_BUG_ON_PAGE(PageUnevictable(page), page);
 | 
						|
+
 | 
						|
+		gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
 | 
						|
+
 | 
						|
+		new_flags &= ~LRU_GEN_MASK;
 | 
						|
+		/* for shrink_page_list() */
 | 
						|
+		if (reclaiming)
 | 
						|
+			new_flags &= ~(BIT(PG_referenced) | BIT(PG_reclaim));
 | 
						|
+		else if (lru_gen_is_active(lruvec, gen))
 | 
						|
+			new_flags |= BIT(PG_active);
 | 
						|
+	} while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
 | 
						|
+
 | 
						|
+	lru_gen_update_size(page, lruvec, gen, -1);
 | 
						|
+	list_del(&page->lru);
 | 
						|
+
 | 
						|
+	return true;
 | 
						|
+}
 | 
						|
+
 | 
						|
+#else
 | 
						|
+
 | 
						|
+static inline bool lru_gen_enabled(void)
 | 
						|
+{
 | 
						|
+	return false;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static inline bool lru_gen_add_page(struct page *page, struct lruvec *lruvec, bool reclaiming)
 | 
						|
+{
 | 
						|
+	return false;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static inline bool lru_gen_del_page(struct page *page, struct lruvec *lruvec, bool reclaiming)
 | 
						|
+{
 | 
						|
+	return false;
 | 
						|
+}
 | 
						|
+
 | 
						|
+#endif /* CONFIG_LRU_GEN */
 | 
						|
+
 | 
						|
 static __always_inline void add_page_to_lru_list(struct page *page,
 | 
						|
 				struct lruvec *lruvec)
 | 
						|
 {
 | 
						|
 	enum lru_list lru = page_lru(page);
 | 
						|
 
 | 
						|
+	if (lru_gen_add_page(page, lruvec, false))
 | 
						|
+		return;
 | 
						|
+
 | 
						|
 	update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
 | 
						|
 	list_add(&page->lru, &lruvec->lists[lru]);
 | 
						|
 }
 | 
						|
@@ -93,6 +269,9 @@ static __always_inline void add_page_to_
 | 
						|
 {
 | 
						|
 	enum lru_list lru = page_lru(page);
 | 
						|
 
 | 
						|
+	if (lru_gen_add_page(page, lruvec, true))
 | 
						|
+		return;
 | 
						|
+
 | 
						|
 	update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
 | 
						|
 	list_add_tail(&page->lru, &lruvec->lists[lru]);
 | 
						|
 }
 | 
						|
@@ -100,6 +279,9 @@ static __always_inline void add_page_to_
 | 
						|
 static __always_inline void del_page_from_lru_list(struct page *page,
 | 
						|
 				struct lruvec *lruvec)
 | 
						|
 {
 | 
						|
+	if (lru_gen_del_page(page, lruvec, false))
 | 
						|
+		return;
 | 
						|
+
 | 
						|
 	list_del(&page->lru);
 | 
						|
 	update_lru_size(lruvec, page_lru(page), page_zonenum(page),
 | 
						|
 			-thp_nr_pages(page));
 | 
						|
--- a/include/linux/mmzone.h
 | 
						|
+++ b/include/linux/mmzone.h
 | 
						|
@@ -294,6 +294,72 @@ enum lruvec_flags {
 | 
						|
 					 */
 | 
						|
 };
 | 
						|
 
 | 
						|
+struct lruvec;
 | 
						|
+
 | 
						|
+#define LRU_GEN_MASK		((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
 | 
						|
+#define LRU_REFS_MASK		((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
 | 
						|
+
 | 
						|
+#ifdef CONFIG_LRU_GEN
 | 
						|
+
 | 
						|
+/*
 | 
						|
+ * For each lruvec, evictable pages are divided into multiple generations. The
 | 
						|
+ * youngest and the oldest generation numbers, AKA max_seq and min_seq, are
 | 
						|
+ * monotonically increasing. The sliding window technique is used to track at
 | 
						|
+ * least MIN_NR_GENS and at most MAX_NR_GENS generations. An offset within the
 | 
						|
+ * window, AKA gen, indexes an array of per-type and per-zone lists for the
 | 
						|
+ * corresponding generation. The counter in page->flags stores gen+1 while a
 | 
						|
+ * page is on one of the multigenerational lru lists. Otherwise, it stores 0.
 | 
						|
+ *
 | 
						|
+ * After a page is faulted in, the aging must check the accessed bit at least
 | 
						|
+ * twice before the eviction would consider it. The first check clears the
 | 
						|
+ * accessed bit set during the initial fault. The second check makes sure this
 | 
						|
+ * page hasn't been used since then.
 | 
						|
+ */
 | 
						|
+#define MIN_NR_GENS		2
 | 
						|
+#define MAX_NR_GENS		((unsigned int)CONFIG_NR_LRU_GENS)
 | 
						|
+
 | 
						|
+struct lrugen {
 | 
						|
+	/* the aging increments the max generation number */
 | 
						|
+	unsigned long max_seq;
 | 
						|
+	/* the eviction increments the min generation numbers */
 | 
						|
+	unsigned long min_seq[ANON_AND_FILE];
 | 
						|
+	/* the birth time of each generation in jiffies */
 | 
						|
+	unsigned long timestamps[MAX_NR_GENS];
 | 
						|
+	/* the multigenerational lru lists */
 | 
						|
+	struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
 | 
						|
+	/* the sizes of the multigenerational lru lists in pages */
 | 
						|
+	unsigned long sizes[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
 | 
						|
+	/* whether the multigenerational lru is enabled */
 | 
						|
+	bool enabled[ANON_AND_FILE];
 | 
						|
+};
 | 
						|
+
 | 
						|
+#define MAX_BATCH_SIZE		8192
 | 
						|
+
 | 
						|
+void lru_gen_init_state(struct mem_cgroup *memcg, struct lruvec *lruvec);
 | 
						|
+void lru_gen_change_state(bool enable, bool main, bool swap);
 | 
						|
+
 | 
						|
+#ifdef CONFIG_MEMCG
 | 
						|
+void lru_gen_init_memcg(struct mem_cgroup *memcg);
 | 
						|
+#endif
 | 
						|
+
 | 
						|
+#else /* !CONFIG_LRU_GEN */
 | 
						|
+
 | 
						|
+static inline void lru_gen_init_state(struct mem_cgroup *memcg, struct lruvec *lruvec)
 | 
						|
+{
 | 
						|
+}
 | 
						|
+
 | 
						|
+static inline void lru_gen_change_state(bool enable, bool main, bool swap)
 | 
						|
+{
 | 
						|
+}
 | 
						|
+
 | 
						|
+#ifdef CONFIG_MEMCG
 | 
						|
+static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
 | 
						|
+{
 | 
						|
+}
 | 
						|
+#endif
 | 
						|
+
 | 
						|
+#endif /* CONFIG_LRU_GEN */
 | 
						|
+
 | 
						|
 struct lruvec {
 | 
						|
 	struct list_head		lists[NR_LRU_LISTS];
 | 
						|
 	/* per lruvec lru_lock for memcg */
 | 
						|
@@ -311,6 +377,10 @@ struct lruvec {
 | 
						|
 	unsigned long			refaults[ANON_AND_FILE];
 | 
						|
 	/* Various lruvec state flags (enum lruvec_flags) */
 | 
						|
 	unsigned long			flags;
 | 
						|
+#ifdef CONFIG_LRU_GEN
 | 
						|
+	/* unevictable pages are on LRU_UNEVICTABLE */
 | 
						|
+	struct lrugen			evictable;
 | 
						|
+#endif
 | 
						|
 #ifdef CONFIG_MEMCG
 | 
						|
 	struct pglist_data *pgdat;
 | 
						|
 #endif
 | 
						|
--- a/include/linux/page-flags-layout.h
 | 
						|
+++ b/include/linux/page-flags-layout.h
 | 
						|
@@ -26,6 +26,14 @@
 | 
						|
 
 | 
						|
 #define ZONES_WIDTH		ZONES_SHIFT
 | 
						|
 
 | 
						|
+#ifdef CONFIG_LRU_GEN
 | 
						|
+/* LRU_GEN_WIDTH is generated from order_base_2(CONFIG_NR_LRU_GENS + 1). */
 | 
						|
+#define LRU_REFS_WIDTH		(CONFIG_TIERS_PER_GEN - 2)
 | 
						|
+#else
 | 
						|
+#define LRU_GEN_WIDTH		0
 | 
						|
+#define LRU_REFS_WIDTH		0
 | 
						|
+#endif /* CONFIG_LRU_GEN */
 | 
						|
+
 | 
						|
 #ifdef CONFIG_SPARSEMEM
 | 
						|
 #include <asm/sparsemem.h>
 | 
						|
 #define SECTIONS_SHIFT	(MAX_PHYSMEM_BITS - SECTION_SIZE_BITS)
 | 
						|
@@ -55,7 +63,8 @@
 | 
						|
 #define SECTIONS_WIDTH		0
 | 
						|
 #endif
 | 
						|
 
 | 
						|
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
 | 
						|
+#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_SHIFT \
 | 
						|
+	<= BITS_PER_LONG - NR_PAGEFLAGS
 | 
						|
 #define NODES_WIDTH		NODES_SHIFT
 | 
						|
 #elif defined(CONFIG_SPARSEMEM_VMEMMAP)
 | 
						|
 #error "Vmemmap: No space for nodes field in page flags"
 | 
						|
@@ -89,8 +98,8 @@
 | 
						|
 #define LAST_CPUPID_SHIFT 0
 | 
						|
 #endif
 | 
						|
 
 | 
						|
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT \
 | 
						|
-	<= BITS_PER_LONG - NR_PAGEFLAGS
 | 
						|
+#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
 | 
						|
+	KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
 | 
						|
 #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
 | 
						|
 #else
 | 
						|
 #define LAST_CPUPID_WIDTH 0
 | 
						|
@@ -100,8 +109,8 @@
 | 
						|
 #define LAST_CPUPID_NOT_IN_PAGE_FLAGS
 | 
						|
 #endif
 | 
						|
 
 | 
						|
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH \
 | 
						|
-	> BITS_PER_LONG - NR_PAGEFLAGS
 | 
						|
+#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
 | 
						|
+	KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS
 | 
						|
 #error "Not enough bits in page flags"
 | 
						|
 #endif
 | 
						|
 
 | 
						|
--- a/include/linux/page-flags.h
 | 
						|
+++ b/include/linux/page-flags.h
 | 
						|
@@ -845,7 +845,7 @@ static inline void ClearPageSlabPfmemall
 | 
						|
 	 1UL << PG_private	| 1UL << PG_private_2	|	\
 | 
						|
 	 1UL << PG_writeback	| 1UL << PG_reserved	|	\
 | 
						|
 	 1UL << PG_slab		| 1UL << PG_active 	|	\
 | 
						|
-	 1UL << PG_unevictable	| __PG_MLOCKED)
 | 
						|
+	 1UL << PG_unevictable	| __PG_MLOCKED | LRU_GEN_MASK)
 | 
						|
 
 | 
						|
 /*
 | 
						|
  * Flags checked when a page is prepped for return by the page allocator.
 | 
						|
@@ -856,7 +856,7 @@ static inline void ClearPageSlabPfmemall
 | 
						|
  * alloc-free cycle to prevent from reusing the page.
 | 
						|
  */
 | 
						|
 #define PAGE_FLAGS_CHECK_AT_PREP	\
 | 
						|
-	(PAGEFLAGS_MASK & ~__PG_HWPOISON)
 | 
						|
+	((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK)
 | 
						|
 
 | 
						|
 #define PAGE_FLAGS_PRIVATE				\
 | 
						|
 	(1UL << PG_private | 1UL << PG_private_2)
 | 
						|
--- a/include/linux/sched.h
 | 
						|
+++ b/include/linux/sched.h
 | 
						|
@@ -911,6 +911,9 @@ struct task_struct {
 | 
						|
 #ifdef CONFIG_MEMCG
 | 
						|
 	unsigned			in_user_fault:1;
 | 
						|
 #endif
 | 
						|
+#ifdef CONFIG_LRU_GEN
 | 
						|
+	unsigned			in_nonseq_fault:1;
 | 
						|
+#endif
 | 
						|
 #ifdef CONFIG_COMPAT_BRK
 | 
						|
 	unsigned			brk_randomized:1;
 | 
						|
 #endif
 | 
						|
--- a/kernel/bounds.c
 | 
						|
+++ b/kernel/bounds.c
 | 
						|
@@ -22,6 +22,9 @@ int main(void)
 | 
						|
 	DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
 | 
						|
 #endif
 | 
						|
 	DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
 | 
						|
+#ifdef CONFIG_LRU_GEN
 | 
						|
+	DEFINE(LRU_GEN_WIDTH, order_base_2(CONFIG_NR_LRU_GENS + 1));
 | 
						|
+#endif
 | 
						|
 	/* End of constants */
 | 
						|
 
 | 
						|
 	return 0;
 | 
						|
--- a/kernel/cgroup/cgroup-internal.h
 | 
						|
+++ b/kernel/cgroup/cgroup-internal.h
 | 
						|
@@ -165,7 +165,6 @@ struct cgroup_mgctx {
 | 
						|
 #define DEFINE_CGROUP_MGCTX(name)						\
 | 
						|
 	struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)
 | 
						|
 
 | 
						|
-extern struct mutex cgroup_mutex;
 | 
						|
 extern spinlock_t css_set_lock;
 | 
						|
 extern struct cgroup_subsys *cgroup_subsys[];
 | 
						|
 extern struct list_head cgroup_roots;
 | 
						|
--- a/mm/huge_memory.c
 | 
						|
+++ b/mm/huge_memory.c
 | 
						|
@@ -2364,7 +2364,8 @@ static void __split_huge_page_tail(struc
 | 
						|
 #ifdef CONFIG_64BIT
 | 
						|
 			 (1L << PG_arch_2) |
 | 
						|
 #endif
 | 
						|
-			 (1L << PG_dirty)));
 | 
						|
+			 (1L << PG_dirty) |
 | 
						|
+			 LRU_GEN_MASK | LRU_REFS_MASK));
 | 
						|
 
 | 
						|
 	/* ->mapping in first tail page is compound_mapcount */
 | 
						|
 	VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
 | 
						|
--- a/mm/memcontrol.c
 | 
						|
+++ b/mm/memcontrol.c
 | 
						|
@@ -5237,6 +5237,7 @@ static struct mem_cgroup *mem_cgroup_all
 | 
						|
 	memcg->deferred_split_queue.split_queue_len = 0;
 | 
						|
 #endif
 | 
						|
 	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
 | 
						|
+	lru_gen_init_memcg(memcg);
 | 
						|
 	return memcg;
 | 
						|
 fail:
 | 
						|
 	mem_cgroup_id_remove(memcg);
 | 
						|
--- a/mm/memory.c
 | 
						|
+++ b/mm/memory.c
 | 
						|
@@ -4788,6 +4788,7 @@ vm_fault_t handle_mm_fault(struct vm_are
 | 
						|
 			   unsigned int flags, struct pt_regs *regs)
 | 
						|
 {
 | 
						|
 	vm_fault_t ret;
 | 
						|
+	bool nonseq_fault = !(vma->vm_flags & VM_SEQ_READ);
 | 
						|
 
 | 
						|
 	__set_current_state(TASK_RUNNING);
 | 
						|
 
 | 
						|
@@ -4809,11 +4810,17 @@ vm_fault_t handle_mm_fault(struct vm_are
 | 
						|
 	if (flags & FAULT_FLAG_USER)
 | 
						|
 		mem_cgroup_enter_user_fault();
 | 
						|
 
 | 
						|
+	if (nonseq_fault)
 | 
						|
+		task_enter_nonseq_fault();
 | 
						|
+
 | 
						|
 	if (unlikely(is_vm_hugetlb_page(vma)))
 | 
						|
 		ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
 | 
						|
 	else
 | 
						|
 		ret = __handle_mm_fault(vma, address, flags);
 | 
						|
 
 | 
						|
+	if (nonseq_fault)
 | 
						|
+		task_exit_nonseq_fault();
 | 
						|
+
 | 
						|
 	if (flags & FAULT_FLAG_USER) {
 | 
						|
 		mem_cgroup_exit_user_fault();
 | 
						|
 		/*
 | 
						|
--- a/mm/mm_init.c
 | 
						|
+++ b/mm/mm_init.c
 | 
						|
@@ -65,14 +65,16 @@ void __init mminit_verify_pageflags_layo
 | 
						|
 
 | 
						|
 	shift = 8 * sizeof(unsigned long);
 | 
						|
 	width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH
 | 
						|
-		- LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH;
 | 
						|
+		- LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH - LRU_GEN_WIDTH - LRU_REFS_WIDTH;
 | 
						|
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths",
 | 
						|
-		"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Flags %d\n",
 | 
						|
+		"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Gen %d Tier %d Flags %d\n",
 | 
						|
 		SECTIONS_WIDTH,
 | 
						|
 		NODES_WIDTH,
 | 
						|
 		ZONES_WIDTH,
 | 
						|
 		LAST_CPUPID_WIDTH,
 | 
						|
 		KASAN_TAG_WIDTH,
 | 
						|
+		LRU_GEN_WIDTH,
 | 
						|
+		LRU_REFS_WIDTH,
 | 
						|
 		NR_PAGEFLAGS);
 | 
						|
 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts",
 | 
						|
 		"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d\n",
 | 
						|
--- a/mm/page_alloc.c
 | 
						|
+++ b/mm/page_alloc.c
 | 
						|
@@ -7456,6 +7456,7 @@ static void __meminit pgdat_init_interna
 | 
						|
 
 | 
						|
 	pgdat_page_ext_init(pgdat);
 | 
						|
 	lruvec_init(&pgdat->__lruvec);
 | 
						|
+	lru_gen_init_state(NULL, &pgdat->__lruvec);
 | 
						|
 }
 | 
						|
 
 | 
						|
 static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
 | 
						|
--- a/mm/swap.c
 | 
						|
+++ b/mm/swap.c
 | 
						|
@@ -446,6 +446,11 @@ void lru_cache_add(struct page *page)
 | 
						|
 	VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
 | 
						|
 	VM_BUG_ON_PAGE(PageLRU(page), page);
 | 
						|
 
 | 
						|
+	/* see the comment in lru_gen_add_page() */
 | 
						|
+	if (lru_gen_enabled() && !PageUnevictable(page) &&
 | 
						|
+	    task_in_nonseq_fault() && !(current->flags & PF_MEMALLOC))
 | 
						|
+		SetPageActive(page);
 | 
						|
+
 | 
						|
 	get_page(page);
 | 
						|
 	local_lock(&lru_pvecs.lock);
 | 
						|
 	pvec = this_cpu_ptr(&lru_pvecs.lru_add);
 | 
						|
@@ -547,7 +552,7 @@ static void lru_deactivate_file_fn(struc
 | 
						|
 
 | 
						|
 static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec)
 | 
						|
 {
 | 
						|
-	if (PageActive(page) && !PageUnevictable(page)) {
 | 
						|
+	if (!PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) {
 | 
						|
 		int nr_pages = thp_nr_pages(page);
 | 
						|
 
 | 
						|
 		del_page_from_lru_list(page, lruvec);
 | 
						|
@@ -661,7 +666,7 @@ void deactivate_file_page(struct page *p
 | 
						|
  */
 | 
						|
 void deactivate_page(struct page *page)
 | 
						|
 {
 | 
						|
-	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
 | 
						|
+	if (PageLRU(page) && !PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) {
 | 
						|
 		struct pagevec *pvec;
 | 
						|
 
 | 
						|
 		local_lock(&lru_pvecs.lock);
 | 
						|
--- a/mm/swapfile.c
 | 
						|
+++ b/mm/swapfile.c
 | 
						|
@@ -2688,6 +2688,7 @@ SYSCALL_DEFINE1(swapoff, const char __us
 | 
						|
 	err = 0;
 | 
						|
 	atomic_inc(&proc_poll_event);
 | 
						|
 	wake_up_interruptible(&proc_poll_wait);
 | 
						|
+	lru_gen_change_state(false, false, true);
 | 
						|
 
 | 
						|
 out_dput:
 | 
						|
 	filp_close(victim, NULL);
 | 
						|
@@ -3349,6 +3350,7 @@ SYSCALL_DEFINE2(swapon, const char __use
 | 
						|
 	mutex_unlock(&swapon_mutex);
 | 
						|
 	atomic_inc(&proc_poll_event);
 | 
						|
 	wake_up_interruptible(&proc_poll_wait);
 | 
						|
+	lru_gen_change_state(true, false, true);
 | 
						|
 
 | 
						|
 	error = 0;
 | 
						|
 	goto out;
 | 
						|
--- a/mm/vmscan.c
 | 
						|
+++ b/mm/vmscan.c
 | 
						|
@@ -50,6 +50,7 @@
 | 
						|
 #include <linux/printk.h>
 | 
						|
 #include <linux/dax.h>
 | 
						|
 #include <linux/psi.h>
 | 
						|
+#include <linux/memory.h>
 | 
						|
 
 | 
						|
 #include <asm/tlbflush.h>
 | 
						|
 #include <asm/div64.h>
 | 
						|
@@ -2815,6 +2816,273 @@ static bool can_age_anon_pages(struct pg
 | 
						|
 	return can_demote(pgdat->node_id, sc);
 | 
						|
 }
 | 
						|
 
 | 
						|
+#ifdef CONFIG_LRU_GEN
 | 
						|
+
 | 
						|
+/******************************************************************************
 | 
						|
+ *                          shorthand helpers
 | 
						|
+ ******************************************************************************/
 | 
						|
+
 | 
						|
+#define for_each_gen_type_zone(gen, type, zone)				\
 | 
						|
+	for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++)			\
 | 
						|
+		for ((type) = 0; (type) < ANON_AND_FILE; (type)++)	\
 | 
						|
+			for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
 | 
						|
+
 | 
						|
+static int page_lru_gen(struct page *page)
 | 
						|
+{
 | 
						|
+	unsigned long flags = READ_ONCE(page->flags);
 | 
						|
+
 | 
						|
+	return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static struct lruvec *get_lruvec(int nid, struct mem_cgroup *memcg)
 | 
						|
+{
 | 
						|
+	struct pglist_data *pgdat = NODE_DATA(nid);
 | 
						|
+
 | 
						|
+#ifdef CONFIG_MEMCG
 | 
						|
+	if (memcg) {
 | 
						|
+		struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec;
 | 
						|
+
 | 
						|
+		if (lruvec->pgdat != pgdat)
 | 
						|
+			lruvec->pgdat = pgdat;
 | 
						|
+
 | 
						|
+		return lruvec;
 | 
						|
+	}
 | 
						|
+#endif
 | 
						|
+	return pgdat ? &pgdat->__lruvec : NULL;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static int get_nr_gens(struct lruvec *lruvec, int type)
 | 
						|
+{
 | 
						|
+	return lruvec->evictable.max_seq - lruvec->evictable.min_seq[type] + 1;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
 | 
						|
+{
 | 
						|
+	return get_nr_gens(lruvec, 1) >= MIN_NR_GENS &&
 | 
						|
+	       get_nr_gens(lruvec, 1) <= get_nr_gens(lruvec, 0) &&
 | 
						|
+	       get_nr_gens(lruvec, 0) <= MAX_NR_GENS;
 | 
						|
+}
 | 
						|
+
 | 
						|
+/******************************************************************************
 | 
						|
+ *                          state change
 | 
						|
+ ******************************************************************************/
 | 
						|
+
 | 
						|
+#ifdef CONFIG_LRU_GEN_ENABLED
 | 
						|
+DEFINE_STATIC_KEY_TRUE(lru_gen_static_key);
 | 
						|
+#else
 | 
						|
+DEFINE_STATIC_KEY_FALSE(lru_gen_static_key);
 | 
						|
+#endif
 | 
						|
+
 | 
						|
+static int lru_gen_nr_swapfiles;
 | 
						|
+
 | 
						|
+static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
 | 
						|
+{
 | 
						|
+	int gen, type, zone;
 | 
						|
+	enum lru_list lru;
 | 
						|
+	struct lrugen *lrugen = &lruvec->evictable;
 | 
						|
+
 | 
						|
+	for_each_evictable_lru(lru) {
 | 
						|
+		type = is_file_lru(lru);
 | 
						|
+
 | 
						|
+		if (lrugen->enabled[type] && !list_empty(&lruvec->lists[lru]))
 | 
						|
+			return false;
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	for_each_gen_type_zone(gen, type, zone) {
 | 
						|
+		if (!lrugen->enabled[type] && !list_empty(&lrugen->lists[gen][type][zone]))
 | 
						|
+			return false;
 | 
						|
+
 | 
						|
+		/* unlikely but not a bug when reset_batch_size() is pending */
 | 
						|
+		VM_WARN_ON(!lrugen->enabled[type] && lrugen->sizes[gen][type][zone]);
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	return true;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static bool fill_lists(struct lruvec *lruvec)
 | 
						|
+{
 | 
						|
+	enum lru_list lru;
 | 
						|
+	int remaining = MAX_BATCH_SIZE;
 | 
						|
+
 | 
						|
+	for_each_evictable_lru(lru) {
 | 
						|
+		int type = is_file_lru(lru);
 | 
						|
+		bool active = is_active_lru(lru);
 | 
						|
+		struct list_head *head = &lruvec->lists[lru];
 | 
						|
+
 | 
						|
+		if (!lruvec->evictable.enabled[type])
 | 
						|
+			continue;
 | 
						|
+
 | 
						|
+		while (!list_empty(head)) {
 | 
						|
+			bool success;
 | 
						|
+			struct page *page = lru_to_page(head);
 | 
						|
+
 | 
						|
+			VM_BUG_ON_PAGE(PageTail(page), page);
 | 
						|
+			VM_BUG_ON_PAGE(PageUnevictable(page), page);
 | 
						|
+			VM_BUG_ON_PAGE(PageActive(page) != active, page);
 | 
						|
+			VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page);
 | 
						|
+			VM_BUG_ON_PAGE(page_lru_gen(page) < MAX_NR_GENS, page);
 | 
						|
+
 | 
						|
+			prefetchw_prev_lru_page(page, head, flags);
 | 
						|
+
 | 
						|
+			del_page_from_lru_list(page, lruvec);
 | 
						|
+			success = lru_gen_add_page(page, lruvec, false);
 | 
						|
+			VM_BUG_ON(!success);
 | 
						|
+
 | 
						|
+			if (!--remaining)
 | 
						|
+				return false;
 | 
						|
+		}
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	return true;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static bool drain_lists(struct lruvec *lruvec)
 | 
						|
+{
 | 
						|
+	int gen, type, zone;
 | 
						|
+	int remaining = MAX_BATCH_SIZE;
 | 
						|
+
 | 
						|
+	for_each_gen_type_zone(gen, type, zone) {
 | 
						|
+		struct list_head *head = &lruvec->evictable.lists[gen][type][zone];
 | 
						|
+
 | 
						|
+		if (lruvec->evictable.enabled[type])
 | 
						|
+			continue;
 | 
						|
+
 | 
						|
+		while (!list_empty(head)) {
 | 
						|
+			bool success;
 | 
						|
+			struct page *page = lru_to_page(head);
 | 
						|
+
 | 
						|
+			VM_BUG_ON_PAGE(PageTail(page), page);
 | 
						|
+			VM_BUG_ON_PAGE(PageUnevictable(page), page);
 | 
						|
+			VM_BUG_ON_PAGE(PageActive(page), page);
 | 
						|
+			VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page);
 | 
						|
+			VM_BUG_ON_PAGE(page_zonenum(page) != zone, page);
 | 
						|
+
 | 
						|
+			prefetchw_prev_lru_page(page, head, flags);
 | 
						|
+
 | 
						|
+			success = lru_gen_del_page(page, lruvec, false);
 | 
						|
+			VM_BUG_ON(!success);
 | 
						|
+			add_page_to_lru_list(page, lruvec);
 | 
						|
+
 | 
						|
+			if (!--remaining)
 | 
						|
+				return false;
 | 
						|
+		}
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	return true;
 | 
						|
+}
 | 
						|
+
 | 
						|
+/*
 | 
						|
+ * For file page tracking, we enable/disable it according to the main switch.
 | 
						|
+ * For anon page tracking, we only enabled it when the main switch is on and
 | 
						|
+ * there is at least one swapfile; we disable it when there are no swapfiles
 | 
						|
+ * regardless of the value of the main switch. Otherwise, we will eventually
 | 
						|
+ * reach the max size of the sliding window and have to call inc_min_seq().
 | 
						|
+ */
 | 
						|
+void lru_gen_change_state(bool enable, bool main, bool swap)
 | 
						|
+{
 | 
						|
+	static DEFINE_MUTEX(state_mutex);
 | 
						|
+
 | 
						|
+	struct mem_cgroup *memcg;
 | 
						|
+
 | 
						|
+	mem_hotplug_begin();
 | 
						|
+	cgroup_lock();
 | 
						|
+	mutex_lock(&state_mutex);
 | 
						|
+
 | 
						|
+	if (swap) {
 | 
						|
+		if (enable)
 | 
						|
+			swap = !lru_gen_nr_swapfiles++;
 | 
						|
+		else
 | 
						|
+			swap = !--lru_gen_nr_swapfiles;
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	if (main && enable != lru_gen_enabled()) {
 | 
						|
+		if (enable)
 | 
						|
+			static_branch_enable(&lru_gen_static_key);
 | 
						|
+		else
 | 
						|
+			static_branch_disable(&lru_gen_static_key);
 | 
						|
+	} else if (!swap || !lru_gen_enabled())
 | 
						|
+		goto unlock;
 | 
						|
+
 | 
						|
+	memcg = mem_cgroup_iter(NULL, NULL, NULL);
 | 
						|
+	do {
 | 
						|
+		int nid;
 | 
						|
+
 | 
						|
+		for_each_node(nid) {
 | 
						|
+			struct lruvec *lruvec = get_lruvec(nid, memcg);
 | 
						|
+
 | 
						|
+			if (!lruvec)
 | 
						|
+				continue;
 | 
						|
+
 | 
						|
+			spin_lock_irq(&lruvec->lru_lock);
 | 
						|
+
 | 
						|
+			VM_BUG_ON(!seq_is_valid(lruvec));
 | 
						|
+			VM_BUG_ON(!state_is_valid(lruvec));
 | 
						|
+
 | 
						|
+			lruvec->evictable.enabled[0] = lru_gen_enabled() && lru_gen_nr_swapfiles;
 | 
						|
+			lruvec->evictable.enabled[1] = lru_gen_enabled();
 | 
						|
+
 | 
						|
+			while (!(enable ? fill_lists(lruvec) : drain_lists(lruvec))) {
 | 
						|
+				spin_unlock_irq(&lruvec->lru_lock);
 | 
						|
+				cond_resched();
 | 
						|
+				spin_lock_irq(&lruvec->lru_lock);
 | 
						|
+			}
 | 
						|
+
 | 
						|
+			spin_unlock_irq(&lruvec->lru_lock);
 | 
						|
+		}
 | 
						|
+
 | 
						|
+		cond_resched();
 | 
						|
+	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
 | 
						|
+unlock:
 | 
						|
+	mutex_unlock(&state_mutex);
 | 
						|
+	cgroup_unlock();
 | 
						|
+	mem_hotplug_done();
 | 
						|
+}
 | 
						|
+
 | 
						|
+/******************************************************************************
 | 
						|
+ *                          initialization
 | 
						|
+ ******************************************************************************/
 | 
						|
+
 | 
						|
+void lru_gen_init_state(struct mem_cgroup *memcg, struct lruvec *lruvec)
 | 
						|
+{
 | 
						|
+	int i;
 | 
						|
+	int gen, type, zone;
 | 
						|
+	struct lrugen *lrugen = &lruvec->evictable;
 | 
						|
+
 | 
						|
+	lrugen->max_seq = MIN_NR_GENS + 1;
 | 
						|
+	lrugen->enabled[0] = lru_gen_enabled() && lru_gen_nr_swapfiles;
 | 
						|
+	lrugen->enabled[1] = lru_gen_enabled();
 | 
						|
+
 | 
						|
+	for (i = 0; i <= MIN_NR_GENS + 1; i++)
 | 
						|
+		lrugen->timestamps[i] = jiffies;
 | 
						|
+
 | 
						|
+	for_each_gen_type_zone(gen, type, zone)
 | 
						|
+		INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
 | 
						|
+}
 | 
						|
+
 | 
						|
+#ifdef CONFIG_MEMCG
 | 
						|
+void lru_gen_init_memcg(struct mem_cgroup *memcg)
 | 
						|
+{
 | 
						|
+	int nid;
 | 
						|
+
 | 
						|
+	for_each_node(nid) {
 | 
						|
+		struct lruvec *lruvec = get_lruvec(nid, memcg);
 | 
						|
+
 | 
						|
+		lru_gen_init_state(memcg, lruvec);
 | 
						|
+	}
 | 
						|
+}
 | 
						|
+#endif
 | 
						|
+
 | 
						|
+static int __init init_lru_gen(void)
 | 
						|
+{
 | 
						|
+	BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
 | 
						|
+	BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
 | 
						|
+
 | 
						|
+	return 0;
 | 
						|
+};
 | 
						|
+late_initcall(init_lru_gen);
 | 
						|
+
 | 
						|
+#endif /* CONFIG_LRU_GEN */
 | 
						|
+
 | 
						|
 static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 | 
						|
 {
 | 
						|
 	unsigned long nr[NR_LRU_LISTS];
 |