Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:4.14 commit in: /
Date: Sun, 11 Jul 2021 14:46:39
Message-Id: 1626014783.97475e3deeb706adf19de4dc8380076168017fd8.mpagano@gentoo
1 commit: 97475e3deeb706adf19de4dc8380076168017fd8
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Sun Jul 11 14:46:23 2021 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Sun Jul 11 14:46:23 2021 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=97475e3d
7
8 Linux patch 4.14.239
9
10 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>
11
12 0000_README | 4 +
13 1238_linux-4.14.239.patch | 872 ++++++++++++++++++++++++++++++++++++++++++++++
14 2 files changed, 876 insertions(+)
15
16 diff --git a/0000_README b/0000_README
17 index 487ae9d..a52d064 100644
18 --- a/0000_README
19 +++ b/0000_README
20 @@ -995,6 +995,10 @@ Patch: 1237_linux-4.14.238.patch
21 From: https://www.kernel.org
22 Desc: Linux 4.14.238
23
24 +Patch: 1238_linux-4.14.239.patch
25 +From: https://www.kernel.org
26 +Desc: Linux 4.14.239
27 +
28 Patch: 1500_XATTR_USER_PREFIX.patch
29 From: https://bugs.gentoo.org/show_bug.cgi?id=470644
30 Desc: Support for namespace user.pax.* on tmpfs.
31
32 diff --git a/1238_linux-4.14.239.patch b/1238_linux-4.14.239.patch
33 new file mode 100644
34 index 0000000..214f7fe
35 --- /dev/null
36 +++ b/1238_linux-4.14.239.patch
37 @@ -0,0 +1,872 @@
38 +diff --git a/Makefile b/Makefile
39 +index 5442918651e00..3bb379664a96e 100644
40 +--- a/Makefile
41 ++++ b/Makefile
42 +@@ -1,7 +1,7 @@
43 + # SPDX-License-Identifier: GPL-2.0
44 + VERSION = 4
45 + PATCHLEVEL = 14
46 +-SUBLEVEL = 238
47 ++SUBLEVEL = 239
48 + EXTRAVERSION =
49 + NAME = Petit Gorille
50 +
51 +diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
52 +index e427f80344c4d..a2d770acd10a9 100644
53 +--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
54 ++++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
55 +@@ -450,7 +450,7 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
56 + struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
57 + int i;
58 +
59 +- if (!ttm_dma)
60 ++ if (!ttm_dma || !ttm_dma->dma_address)
61 + return;
62 +
63 + /* Don't waste time looping if the object is coherent */
64 +@@ -470,7 +470,7 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
65 + struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
66 + int i;
67 +
68 +- if (!ttm_dma)
69 ++ if (!ttm_dma || !ttm_dma->dma_address)
70 + return;
71 +
72 + /* Don't waste time looping if the object is coherent */
73 +diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
74 +index 5be3d6b7991b4..a46fbe2d2ee63 100644
75 +--- a/drivers/scsi/sr.c
76 ++++ b/drivers/scsi/sr.c
77 +@@ -216,6 +216,8 @@ static unsigned int sr_get_events(struct scsi_device *sdev)
78 + return DISK_EVENT_EJECT_REQUEST;
79 + else if (med->media_event_code == 2)
80 + return DISK_EVENT_MEDIA_CHANGE;
81 ++ else if (med->media_event_code == 3)
82 ++ return DISK_EVENT_EJECT_REQUEST;
83 + return 0;
84 + }
85 +
86 +diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
87 +index b370144682ed5..a2f8130e18fec 100644
88 +--- a/drivers/xen/events/events_base.c
89 ++++ b/drivers/xen/events/events_base.c
90 +@@ -524,6 +524,9 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
91 + }
92 +
93 + info->eoi_time = 0;
94 ++
95 ++ /* is_active hasn't been reset yet, do it now. */
96 ++ smp_store_release(&info->is_active, 0);
97 + do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
98 + }
99 +
100 +@@ -1780,10 +1783,22 @@ static void lateeoi_ack_dynirq(struct irq_data *data)
101 + struct irq_info *info = info_for_irq(data->irq);
102 + evtchn_port_t evtchn = info ? info->evtchn : 0;
103 +
104 +- if (VALID_EVTCHN(evtchn)) {
105 +- do_mask(info, EVT_MASK_REASON_EOI_PENDING);
106 +- ack_dynirq(data);
107 +- }
108 ++ if (!VALID_EVTCHN(evtchn))
109 ++ return;
110 ++
111 ++ do_mask(info, EVT_MASK_REASON_EOI_PENDING);
112 ++
113 ++ if (unlikely(irqd_is_setaffinity_pending(data)) &&
114 ++ likely(!irqd_irq_disabled(data))) {
115 ++ do_mask(info, EVT_MASK_REASON_TEMPORARY);
116 ++
117 ++ clear_evtchn(evtchn);
118 ++
119 ++ irq_move_masked_irq(data);
120 ++
121 ++ do_unmask(info, EVT_MASK_REASON_TEMPORARY);
122 ++ } else
123 ++ clear_evtchn(evtchn);
124 + }
125 +
126 + static void lateeoi_mask_ack_dynirq(struct irq_data *data)
127 +diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
128 +index fe0ec0a29db7c..d2b5cc8ce54f9 100644
129 +--- a/include/linux/hugetlb.h
130 ++++ b/include/linux/hugetlb.h
131 +@@ -467,17 +467,6 @@ static inline int hstate_index(struct hstate *h)
132 + return h - hstates;
133 + }
134 +
135 +-pgoff_t __basepage_index(struct page *page);
136 +-
137 +-/* Return page->index in PAGE_SIZE units */
138 +-static inline pgoff_t basepage_index(struct page *page)
139 +-{
140 +- if (!PageCompound(page))
141 +- return page->index;
142 +-
143 +- return __basepage_index(page);
144 +-}
145 +-
146 + extern int dissolve_free_huge_page(struct page *page);
147 + extern int dissolve_free_huge_pages(unsigned long start_pfn,
148 + unsigned long end_pfn);
149 +@@ -572,11 +561,6 @@ static inline int hstate_index(struct hstate *h)
150 + return 0;
151 + }
152 +
153 +-static inline pgoff_t basepage_index(struct page *page)
154 +-{
155 +- return page->index;
156 +-}
157 +-
158 + static inline int dissolve_free_huge_page(struct page *page)
159 + {
160 + return 0;
161 +diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
162 +index 41eb6fdf87a84..86b5fb08e96cd 100644
163 +--- a/include/linux/kfifo.h
164 ++++ b/include/linux/kfifo.h
165 +@@ -113,7 +113,8 @@ struct kfifo_rec_ptr_2 __STRUCT_KFIFO_PTR(unsigned char, 2, void);
166 + * array is a part of the structure and the fifo type where the array is
167 + * outside of the fifo structure.
168 + */
169 +-#define __is_kfifo_ptr(fifo) (sizeof(*fifo) == sizeof(struct __kfifo))
170 ++#define __is_kfifo_ptr(fifo) \
171 ++ (sizeof(*fifo) == sizeof(STRUCT_KFIFO_PTR(typeof(*(fifo)->type))))
172 +
173 + /**
174 + * DECLARE_KFIFO_PTR - macro to declare a fifo pointer object
175 +diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
176 +index 57b0030d38007..5d0767cb424aa 100644
177 +--- a/include/linux/mmdebug.h
178 ++++ b/include/linux/mmdebug.h
179 +@@ -37,10 +37,22 @@ void dump_mm(const struct mm_struct *mm);
180 + BUG(); \
181 + } \
182 + } while (0)
183 +-#define VM_WARN_ON(cond) WARN_ON(cond)
184 +-#define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
185 +-#define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
186 +-#define VM_WARN(cond, format...) WARN(cond, format)
187 ++#define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \
188 ++ static bool __section(".data.once") __warned; \
189 ++ int __ret_warn_once = !!(cond); \
190 ++ \
191 ++ if (unlikely(__ret_warn_once && !__warned)) { \
192 ++ dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\
193 ++ __warned = true; \
194 ++ WARN_ON(1); \
195 ++ } \
196 ++ unlikely(__ret_warn_once); \
197 ++})
198 ++
199 ++#define VM_WARN_ON(cond) (void)WARN_ON(cond)
200 ++#define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
201 ++#define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format)
202 ++#define VM_WARN(cond, format...) (void)WARN(cond, format)
203 + #else
204 + #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
205 + #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
206 +@@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm);
207 + #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
208 + #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
209 + #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
210 ++#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond)
211 + #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
212 + #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
213 + #endif
214 +diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
215 +index e08b5339023c0..84c7fc7f63e73 100644
216 +--- a/include/linux/pagemap.h
217 ++++ b/include/linux/pagemap.h
218 +@@ -399,7 +399,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
219 + }
220 +
221 + /*
222 +- * Get index of the page with in radix-tree
223 ++ * Get index of the page within radix-tree (but not for hugetlb pages).
224 + * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
225 + */
226 + static inline pgoff_t page_to_index(struct page *page)
227 +@@ -418,15 +418,16 @@ static inline pgoff_t page_to_index(struct page *page)
228 + return pgoff;
229 + }
230 +
231 ++extern pgoff_t hugetlb_basepage_index(struct page *page);
232 ++
233 + /*
234 +- * Get the offset in PAGE_SIZE.
235 +- * (TODO: hugepage should have ->index in PAGE_SIZE)
236 ++ * Get the offset in PAGE_SIZE (even for hugetlb pages).
237 ++ * (TODO: hugetlb pages should have ->index in PAGE_SIZE)
238 + */
239 + static inline pgoff_t page_to_pgoff(struct page *page)
240 + {
241 +- if (unlikely(PageHeadHuge(page)))
242 +- return page->index << compound_order(page);
243 +-
244 ++ if (unlikely(PageHuge(page)))
245 ++ return hugetlb_basepage_index(page);
246 + return page_to_index(page);
247 + }
248 +
249 +diff --git a/include/linux/rmap.h b/include/linux/rmap.h
250 +index d7d6d4eb17949..91ccae9467164 100644
251 +--- a/include/linux/rmap.h
252 ++++ b/include/linux/rmap.h
253 +@@ -98,7 +98,8 @@ enum ttu_flags {
254 + * do a final flush if necessary */
255 + TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock:
256 + * caller holds it */
257 +- TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */
258 ++ TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */
259 ++ TTU_SYNC = 0x200, /* avoid racy checks with PVMW_SYNC */
260 + };
261 +
262 + #ifdef CONFIG_MMU
263 +diff --git a/kernel/futex.c b/kernel/futex.c
264 +index af1d9a9939887..e282c083df59d 100644
265 +--- a/kernel/futex.c
266 ++++ b/kernel/futex.c
267 +@@ -719,7 +719,7 @@ again:
268 +
269 + key->both.offset |= FUT_OFF_INODE; /* inode-based key */
270 + key->shared.i_seq = get_inode_sequence_number(inode);
271 +- key->shared.pgoff = basepage_index(tail);
272 ++ key->shared.pgoff = page_to_pgoff(tail);
273 + rcu_read_unlock();
274 + }
275 +
276 +diff --git a/kernel/kthread.c b/kernel/kthread.c
277 +index fd6f9322312aa..7dd2c8a797d7a 100644
278 +--- a/kernel/kthread.c
279 ++++ b/kernel/kthread.c
280 +@@ -979,8 +979,38 @@ void kthread_flush_work(struct kthread_work *work)
281 + EXPORT_SYMBOL_GPL(kthread_flush_work);
282 +
283 + /*
284 +- * This function removes the work from the worker queue. Also it makes sure
285 +- * that it won't get queued later via the delayed work's timer.
286 ++ * Make sure that the timer is neither set nor running and could
287 ++ * not manipulate the work list_head any longer.
288 ++ *
289 ++ * The function is called under worker->lock. The lock is temporary
290 ++ * released but the timer can't be set again in the meantime.
291 ++ */
292 ++static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
293 ++ unsigned long *flags)
294 ++{
295 ++ struct kthread_delayed_work *dwork =
296 ++ container_of(work, struct kthread_delayed_work, work);
297 ++ struct kthread_worker *worker = work->worker;
298 ++
299 ++ /*
300 ++ * del_timer_sync() must be called to make sure that the timer
301 ++ * callback is not running. The lock must be temporary released
302 ++ * to avoid a deadlock with the callback. In the meantime,
303 ++ * any queuing is blocked by setting the canceling counter.
304 ++ */
305 ++ work->canceling++;
306 ++ spin_unlock_irqrestore(&worker->lock, *flags);
307 ++ del_timer_sync(&dwork->timer);
308 ++ spin_lock_irqsave(&worker->lock, *flags);
309 ++ work->canceling--;
310 ++}
311 ++
312 ++/*
313 ++ * This function removes the work from the worker queue.
314 ++ *
315 ++ * It is called under worker->lock. The caller must make sure that
316 ++ * the timer used by delayed work is not running, e.g. by calling
317 ++ * kthread_cancel_delayed_work_timer().
318 + *
319 + * The work might still be in use when this function finishes. See the
320 + * current_work proceed by the worker.
321 +@@ -988,28 +1018,8 @@ EXPORT_SYMBOL_GPL(kthread_flush_work);
322 + * Return: %true if @work was pending and successfully canceled,
323 + * %false if @work was not pending
324 + */
325 +-static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
326 +- unsigned long *flags)
327 ++static bool __kthread_cancel_work(struct kthread_work *work)
328 + {
329 +- /* Try to cancel the timer if exists. */
330 +- if (is_dwork) {
331 +- struct kthread_delayed_work *dwork =
332 +- container_of(work, struct kthread_delayed_work, work);
333 +- struct kthread_worker *worker = work->worker;
334 +-
335 +- /*
336 +- * del_timer_sync() must be called to make sure that the timer
337 +- * callback is not running. The lock must be temporary released
338 +- * to avoid a deadlock with the callback. In the meantime,
339 +- * any queuing is blocked by setting the canceling counter.
340 +- */
341 +- work->canceling++;
342 +- spin_unlock_irqrestore(&worker->lock, *flags);
343 +- del_timer_sync(&dwork->timer);
344 +- spin_lock_irqsave(&worker->lock, *flags);
345 +- work->canceling--;
346 +- }
347 +-
348 + /*
349 + * Try to remove the work from a worker list. It might either
350 + * be from worker->work_list or from worker->delayed_work_list.
351 +@@ -1062,11 +1072,23 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
352 + /* Work must not be used with >1 worker, see kthread_queue_work() */
353 + WARN_ON_ONCE(work->worker != worker);
354 +
355 +- /* Do not fight with another command that is canceling this work. */
356 ++ /*
357 ++ * Temporary cancel the work but do not fight with another command
358 ++ * that is canceling the work as well.
359 ++ *
360 ++ * It is a bit tricky because of possible races with another
361 ++ * mod_delayed_work() and cancel_delayed_work() callers.
362 ++ *
363 ++ * The timer must be canceled first because worker->lock is released
364 ++ * when doing so. But the work can be removed from the queue (list)
365 ++ * only when it can be queued again so that the return value can
366 ++ * be used for reference counting.
367 ++ */
368 ++ kthread_cancel_delayed_work_timer(work, &flags);
369 + if (work->canceling)
370 + goto out;
371 ++ ret = __kthread_cancel_work(work);
372 +
373 +- ret = __kthread_cancel_work(work, true, &flags);
374 + fast_queue:
375 + __kthread_queue_delayed_work(worker, dwork, delay);
376 + out:
377 +@@ -1088,7 +1110,10 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
378 + /* Work must not be used with >1 worker, see kthread_queue_work(). */
379 + WARN_ON_ONCE(work->worker != worker);
380 +
381 +- ret = __kthread_cancel_work(work, is_dwork, &flags);
382 ++ if (is_dwork)
383 ++ kthread_cancel_delayed_work_timer(work, &flags);
384 ++
385 ++ ret = __kthread_cancel_work(work);
386 +
387 + if (worker->current_work != work)
388 + goto out_fast;
389 +diff --git a/mm/huge_memory.c b/mm/huge_memory.c
390 +index 513f0cf173ad5..972893908bcda 100644
391 +--- a/mm/huge_memory.c
392 ++++ b/mm/huge_memory.c
393 +@@ -2324,16 +2324,16 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
394 + static void unmap_page(struct page *page)
395 + {
396 + enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
397 +- TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
398 +- bool unmap_success;
399 ++ TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD | TTU_SYNC;
400 +
401 + VM_BUG_ON_PAGE(!PageHead(page), page);
402 +
403 + if (PageAnon(page))
404 + ttu_flags |= TTU_SPLIT_FREEZE;
405 +
406 +- unmap_success = try_to_unmap(page, ttu_flags);
407 +- VM_BUG_ON_PAGE(!unmap_success, page);
408 ++ try_to_unmap(page, ttu_flags);
409 ++
410 ++ VM_WARN_ON_ONCE_PAGE(page_mapped(page), page);
411 + }
412 +
413 + static void remap_page(struct page *page)
414 +@@ -2586,7 +2586,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
415 + struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
416 + struct anon_vma *anon_vma = NULL;
417 + struct address_space *mapping = NULL;
418 +- int count, mapcount, extra_pins, ret;
419 ++ int extra_pins, ret;
420 + bool mlocked;
421 + unsigned long flags;
422 + pgoff_t end;
423 +@@ -2648,7 +2648,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
424 +
425 + mlocked = PageMlocked(page);
426 + unmap_page(head);
427 +- VM_BUG_ON_PAGE(compound_mapcount(head), head);
428 +
429 + /* Make sure the page is not on per-CPU pagevec as it takes pin */
430 + if (mlocked)
431 +@@ -2674,9 +2673,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
432 +
433 + /* Prevent deferred_split_scan() touching ->_refcount */
434 + spin_lock(&pgdata->split_queue_lock);
435 +- count = page_count(head);
436 +- mapcount = total_mapcount(head);
437 +- if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
438 ++ if (page_ref_freeze(head, 1 + extra_pins)) {
439 + if (!list_empty(page_deferred_list(head))) {
440 + pgdata->split_queue_len--;
441 + list_del(page_deferred_list(head));
442 +@@ -2692,16 +2689,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
443 + } else
444 + ret = 0;
445 + } else {
446 +- if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
447 +- pr_alert("total_mapcount: %u, page_count(): %u\n",
448 +- mapcount, count);
449 +- if (PageTail(page))
450 +- dump_page(head, NULL);
451 +- dump_page(page, "total_mapcount(head) > 0");
452 +- BUG();
453 +- }
454 + spin_unlock(&pgdata->split_queue_lock);
455 +-fail: if (mapping)
456 ++fail:
457 ++ if (mapping)
458 + spin_unlock(&mapping->tree_lock);
459 + spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
460 + remap_page(head);
461 +diff --git a/mm/hugetlb.c b/mm/hugetlb.c
462 +index 0dc181290d1fb..c765fd01f0aa4 100644
463 +--- a/mm/hugetlb.c
464 ++++ b/mm/hugetlb.c
465 +@@ -1403,15 +1403,12 @@ int PageHeadHuge(struct page *page_head)
466 + return get_compound_page_dtor(page_head) == free_huge_page;
467 + }
468 +
469 +-pgoff_t __basepage_index(struct page *page)
470 ++pgoff_t hugetlb_basepage_index(struct page *page)
471 + {
472 + struct page *page_head = compound_head(page);
473 + pgoff_t index = page_index(page_head);
474 + unsigned long compound_idx;
475 +
476 +- if (!PageHuge(page_head))
477 +- return page_index(page);
478 +-
479 + if (compound_order(page_head) >= MAX_ORDER)
480 + compound_idx = page_to_pfn(page) - page_to_pfn(page_head);
481 + else
482 +diff --git a/mm/internal.h b/mm/internal.h
483 +index a182506242c43..97c8e896cd2f6 100644
484 +--- a/mm/internal.h
485 ++++ b/mm/internal.h
486 +@@ -330,27 +330,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
487 + extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
488 +
489 + /*
490 +- * At what user virtual address is page expected in @vma?
491 ++ * At what user virtual address is page expected in vma?
492 ++ * Returns -EFAULT if all of the page is outside the range of vma.
493 ++ * If page is a compound head, the entire compound page is considered.
494 + */
495 + static inline unsigned long
496 +-__vma_address(struct page *page, struct vm_area_struct *vma)
497 ++vma_address(struct page *page, struct vm_area_struct *vma)
498 + {
499 +- pgoff_t pgoff = page_to_pgoff(page);
500 +- return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
501 ++ pgoff_t pgoff;
502 ++ unsigned long address;
503 ++
504 ++ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
505 ++ pgoff = page_to_pgoff(page);
506 ++ if (pgoff >= vma->vm_pgoff) {
507 ++ address = vma->vm_start +
508 ++ ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
509 ++ /* Check for address beyond vma (or wrapped through 0?) */
510 ++ if (address < vma->vm_start || address >= vma->vm_end)
511 ++ address = -EFAULT;
512 ++ } else if (PageHead(page) &&
513 ++ pgoff + (1UL << compound_order(page)) - 1 >= vma->vm_pgoff) {
514 ++ /* Test above avoids possibility of wrap to 0 on 32-bit */
515 ++ address = vma->vm_start;
516 ++ } else {
517 ++ address = -EFAULT;
518 ++ }
519 ++ return address;
520 + }
521 +
522 ++/*
523 ++ * Then at what user virtual address will none of the page be found in vma?
524 ++ * Assumes that vma_address() already returned a good starting address.
525 ++ * If page is a compound head, the entire compound page is considered.
526 ++ */
527 + static inline unsigned long
528 +-vma_address(struct page *page, struct vm_area_struct *vma)
529 ++vma_address_end(struct page *page, struct vm_area_struct *vma)
530 + {
531 +- unsigned long start, end;
532 +-
533 +- start = __vma_address(page, vma);
534 +- end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);
535 +-
536 +- /* page should be within @vma mapping range */
537 +- VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma);
538 +-
539 +- return max(start, vma->vm_start);
540 ++ pgoff_t pgoff;
541 ++ unsigned long address;
542 ++
543 ++ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
544 ++ pgoff = page_to_pgoff(page) + (1UL << compound_order(page));
545 ++ address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
546 ++ /* Check for address beyond vma (or wrapped through 0?) */
547 ++ if (address < vma->vm_start || address > vma->vm_end)
548 ++ address = vma->vm_end;
549 ++ return address;
550 + }
551 +
552 + #else /* !CONFIG_MMU */
553 +diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
554 +index e00d985a51c56..a612daef5f009 100644
555 +--- a/mm/page_vma_mapped.c
556 ++++ b/mm/page_vma_mapped.c
557 +@@ -110,6 +110,13 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
558 + return true;
559 + }
560 +
561 ++static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
562 ++{
563 ++ pvmw->address = (pvmw->address + size) & ~(size - 1);
564 ++ if (!pvmw->address)
565 ++ pvmw->address = ULONG_MAX;
566 ++}
567 ++
568 + /**
569 + * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
570 + * @pvmw->address
571 +@@ -138,6 +145,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
572 + {
573 + struct mm_struct *mm = pvmw->vma->vm_mm;
574 + struct page *page = pvmw->page;
575 ++ unsigned long end;
576 + pgd_t *pgd;
577 + p4d_t *p4d;
578 + pud_t *pud;
579 +@@ -147,10 +155,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
580 + if (pvmw->pmd && !pvmw->pte)
581 + return not_found(pvmw);
582 +
583 +- if (pvmw->pte)
584 +- goto next_pte;
585 ++ if (unlikely(PageHuge(page))) {
586 ++ /* The only possible mapping was handled on last iteration */
587 ++ if (pvmw->pte)
588 ++ return not_found(pvmw);
589 +
590 +- if (unlikely(PageHuge(pvmw->page))) {
591 + /* when pud is not present, pte will be NULL */
592 + pvmw->pte = huge_pte_offset(mm, pvmw->address,
593 + PAGE_SIZE << compound_order(page));
594 +@@ -163,78 +172,108 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
595 + return not_found(pvmw);
596 + return true;
597 + }
598 +-restart:
599 +- pgd = pgd_offset(mm, pvmw->address);
600 +- if (!pgd_present(*pgd))
601 +- return false;
602 +- p4d = p4d_offset(pgd, pvmw->address);
603 +- if (!p4d_present(*p4d))
604 +- return false;
605 +- pud = pud_offset(p4d, pvmw->address);
606 +- if (!pud_present(*pud))
607 +- return false;
608 +- pvmw->pmd = pmd_offset(pud, pvmw->address);
609 ++
610 + /*
611 +- * Make sure the pmd value isn't cached in a register by the
612 +- * compiler and used as a stale value after we've observed a
613 +- * subsequent update.
614 ++ * Seek to next pte only makes sense for THP.
615 ++ * But more important than that optimization, is to filter out
616 ++ * any PageKsm page: whose page->index misleads vma_address()
617 ++ * and vma_address_end() to disaster.
618 + */
619 +- pmde = READ_ONCE(*pvmw->pmd);
620 +- if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
621 +- pvmw->ptl = pmd_lock(mm, pvmw->pmd);
622 +- if (likely(pmd_trans_huge(*pvmw->pmd))) {
623 +- if (pvmw->flags & PVMW_MIGRATION)
624 +- return not_found(pvmw);
625 +- if (pmd_page(*pvmw->pmd) != page)
626 +- return not_found(pvmw);
627 +- return true;
628 +- } else if (!pmd_present(*pvmw->pmd)) {
629 +- if (thp_migration_supported()) {
630 +- if (!(pvmw->flags & PVMW_MIGRATION))
631 ++ end = PageTransCompound(page) ?
632 ++ vma_address_end(page, pvmw->vma) :
633 ++ pvmw->address + PAGE_SIZE;
634 ++ if (pvmw->pte)
635 ++ goto next_pte;
636 ++restart:
637 ++ do {
638 ++ pgd = pgd_offset(mm, pvmw->address);
639 ++ if (!pgd_present(*pgd)) {
640 ++ step_forward(pvmw, PGDIR_SIZE);
641 ++ continue;
642 ++ }
643 ++ p4d = p4d_offset(pgd, pvmw->address);
644 ++ if (!p4d_present(*p4d)) {
645 ++ step_forward(pvmw, P4D_SIZE);
646 ++ continue;
647 ++ }
648 ++ pud = pud_offset(p4d, pvmw->address);
649 ++ if (!pud_present(*pud)) {
650 ++ step_forward(pvmw, PUD_SIZE);
651 ++ continue;
652 ++ }
653 ++
654 ++ pvmw->pmd = pmd_offset(pud, pvmw->address);
655 ++ /*
656 ++ * Make sure the pmd value isn't cached in a register by the
657 ++ * compiler and used as a stale value after we've observed a
658 ++ * subsequent update.
659 ++ */
660 ++ pmde = READ_ONCE(*pvmw->pmd);
661 ++
662 ++ if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
663 ++ pvmw->ptl = pmd_lock(mm, pvmw->pmd);
664 ++ pmde = *pvmw->pmd;
665 ++ if (likely(pmd_trans_huge(pmde))) {
666 ++ if (pvmw->flags & PVMW_MIGRATION)
667 ++ return not_found(pvmw);
668 ++ if (pmd_page(pmde) != page)
669 + return not_found(pvmw);
670 +- if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) {
671 +- swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd);
672 ++ return true;
673 ++ }
674 ++ if (!pmd_present(pmde)) {
675 ++ swp_entry_t entry;
676 +
677 +- if (migration_entry_to_page(entry) != page)
678 +- return not_found(pvmw);
679 +- return true;
680 +- }
681 ++ if (!thp_migration_supported() ||
682 ++ !(pvmw->flags & PVMW_MIGRATION))
683 ++ return not_found(pvmw);
684 ++ entry = pmd_to_swp_entry(pmde);
685 ++ if (!is_migration_entry(entry) ||
686 ++ migration_entry_to_page(entry) != page)
687 ++ return not_found(pvmw);
688 ++ return true;
689 + }
690 +- return not_found(pvmw);
691 +- } else {
692 + /* THP pmd was split under us: handle on pte level */
693 + spin_unlock(pvmw->ptl);
694 + pvmw->ptl = NULL;
695 ++ } else if (!pmd_present(pmde)) {
696 ++ /*
697 ++ * If PVMW_SYNC, take and drop THP pmd lock so that we
698 ++ * cannot return prematurely, while zap_huge_pmd() has
699 ++ * cleared *pmd but not decremented compound_mapcount().
700 ++ */
701 ++ if ((pvmw->flags & PVMW_SYNC) &&
702 ++ PageTransCompound(page)) {
703 ++ spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
704 ++
705 ++ spin_unlock(ptl);
706 ++ }
707 ++ step_forward(pvmw, PMD_SIZE);
708 ++ continue;
709 + }
710 +- } else if (!pmd_present(pmde)) {
711 +- return false;
712 +- }
713 +- if (!map_pte(pvmw))
714 +- goto next_pte;
715 +- while (1) {
716 ++ if (!map_pte(pvmw))
717 ++ goto next_pte;
718 ++this_pte:
719 + if (check_pte(pvmw))
720 + return true;
721 + next_pte:
722 +- /* Seek to next pte only makes sense for THP */
723 +- if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
724 +- return not_found(pvmw);
725 + do {
726 + pvmw->address += PAGE_SIZE;
727 +- if (pvmw->address >= pvmw->vma->vm_end ||
728 +- pvmw->address >=
729 +- __vma_address(pvmw->page, pvmw->vma) +
730 +- hpage_nr_pages(pvmw->page) * PAGE_SIZE)
731 ++ if (pvmw->address >= end)
732 + return not_found(pvmw);
733 + /* Did we cross page table boundary? */
734 +- if (pvmw->address % PMD_SIZE == 0) {
735 +- pte_unmap(pvmw->pte);
736 ++ if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
737 + if (pvmw->ptl) {
738 + spin_unlock(pvmw->ptl);
739 + pvmw->ptl = NULL;
740 + }
741 ++ pte_unmap(pvmw->pte);
742 ++ pvmw->pte = NULL;
743 + goto restart;
744 +- } else {
745 +- pvmw->pte++;
746 ++ }
747 ++ pvmw->pte++;
748 ++ if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) {
749 ++ pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
750 ++ spin_lock(pvmw->ptl);
751 + }
752 + } while (pte_none(*pvmw->pte));
753 +
754 +@@ -242,7 +281,10 @@ next_pte:
755 + pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
756 + spin_lock(pvmw->ptl);
757 + }
758 +- }
759 ++ goto this_pte;
760 ++ } while (pvmw->address < end);
761 ++
762 ++ return false;
763 + }
764 +
765 + /**
766 +@@ -261,14 +303,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
767 + .vma = vma,
768 + .flags = PVMW_SYNC,
769 + };
770 +- unsigned long start, end;
771 +-
772 +- start = __vma_address(page, vma);
773 +- end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);
774 +
775 +- if (unlikely(end < vma->vm_start || start >= vma->vm_end))
776 ++ pvmw.address = vma_address(page, vma);
777 ++ if (pvmw.address == -EFAULT)
778 + return 0;
779 +- pvmw.address = max(start, vma->vm_start);
780 + if (!page_vma_mapped_walk(&pvmw))
781 + return 0;
782 + page_vma_mapped_walk_done(&pvmw);
783 +diff --git a/mm/rmap.c b/mm/rmap.c
784 +index 8bd2ddd8febd5..8ed8ec113d5a9 100644
785 +--- a/mm/rmap.c
786 ++++ b/mm/rmap.c
787 +@@ -686,7 +686,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
788 + */
789 + unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
790 + {
791 +- unsigned long address;
792 + if (PageAnon(page)) {
793 + struct anon_vma *page__anon_vma = page_anon_vma(page);
794 + /*
795 +@@ -696,15 +695,13 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
796 + if (!vma->anon_vma || !page__anon_vma ||
797 + vma->anon_vma->root != page__anon_vma->root)
798 + return -EFAULT;
799 +- } else if (page->mapping) {
800 +- if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
801 +- return -EFAULT;
802 +- } else
803 ++ } else if (!vma->vm_file) {
804 + return -EFAULT;
805 +- address = __vma_address(page, vma);
806 +- if (unlikely(address < vma->vm_start || address >= vma->vm_end))
807 ++ } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
808 + return -EFAULT;
809 +- return address;
810 ++ }
811 ++
812 ++ return vma_address(page, vma);
813 + }
814 +
815 + pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
816 +@@ -896,7 +893,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
817 + * We have to assume the worse case ie pmd for invalidation. Note that
818 + * the page can not be free from this function.
819 + */
820 +- end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
821 ++ end = vma_address_end(page, vma);
822 + mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
823 +
824 + while (page_vma_mapped_walk(&pvmw)) {
825 +@@ -1344,6 +1341,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
826 + unsigned long start = address, end;
827 + enum ttu_flags flags = (enum ttu_flags)arg;
828 +
829 ++ /*
830 ++ * When racing against e.g. zap_pte_range() on another cpu,
831 ++ * in between its ptep_get_and_clear_full() and page_remove_rmap(),
832 ++ * try_to_unmap() may return false when it is about to become true,
833 ++ * if page table locking is skipped: use TTU_SYNC to wait for that.
834 ++ */
835 ++ if (flags & TTU_SYNC)
836 ++ pvmw.flags = PVMW_SYNC;
837 ++
838 + /* munlock has nothing to gain from examining un-locked vmas */
839 + if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
840 + return true;
841 +@@ -1365,7 +1371,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
842 + * Note that the page can not be free in this function as call of
843 + * try_to_unmap() must hold a reference on the page.
844 + */
845 +- end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
846 ++ end = PageKsm(page) ?
847 ++ address + PAGE_SIZE : vma_address_end(page, vma);
848 + if (PageHuge(page)) {
849 + /*
850 + * If sharing is possible, start and end will be adjusted
851 +@@ -1624,9 +1631,9 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
852 + return is_vma_temporary_stack(vma);
853 + }
854 +
855 +-static int page_mapcount_is_zero(struct page *page)
856 ++static int page_not_mapped(struct page *page)
857 + {
858 +- return !total_mapcount(page);
859 ++ return !page_mapped(page);
860 + }
861 +
862 + /**
863 +@@ -1644,7 +1651,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
864 + struct rmap_walk_control rwc = {
865 + .rmap_one = try_to_unmap_one,
866 + .arg = (void *)flags,
867 +- .done = page_mapcount_is_zero,
868 ++ .done = page_not_mapped,
869 + .anon_lock = page_lock_anon_vma_read,
870 + };
871 +
872 +@@ -1665,14 +1672,15 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
873 + else
874 + rmap_walk(page, &rwc);
875 +
876 +- return !page_mapcount(page) ? true : false;
877 ++ /*
878 ++ * When racing against e.g. zap_pte_range() on another cpu,
879 ++ * in between its ptep_get_and_clear_full() and page_remove_rmap(),
880 ++ * try_to_unmap() may return false when it is about to become true,
881 ++ * if page table locking is skipped: use TTU_SYNC to wait for that.
882 ++ */
883 ++ return !page_mapcount(page);
884 + }
885 +
886 +-static int page_not_mapped(struct page *page)
887 +-{
888 +- return !page_mapped(page);
889 +-};
890 +-
891 + /**
892 + * try_to_munlock - try to munlock a page
893 + * @page: the page to be munlocked
894 +@@ -1767,6 +1775,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
895 + struct vm_area_struct *vma = avc->vma;
896 + unsigned long address = vma_address(page, vma);
897 +
898 ++ VM_BUG_ON_VMA(address == -EFAULT, vma);
899 + cond_resched();
900 +
901 + if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
902 +@@ -1821,6 +1830,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
903 + pgoff_start, pgoff_end) {
904 + unsigned long address = vma_address(page, vma);
905 +
906 ++ VM_BUG_ON_VMA(address == -EFAULT, vma);
907 + cond_resched();
908 +
909 + if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))