Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:5.12 commit in: /
Date: Wed, 30 Jun 2021 14:22:53
Message-Id: 1625062901.fcca3bfe3f19c1750be503fe947954344ceecfbd.mpagano@gentoo
1 commit: fcca3bfe3f19c1750be503fe947954344ceecfbd
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Wed Jun 30 14:21:41 2021 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Wed Jun 30 14:21:41 2021 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=fcca3bfe
7
8 Linux patch 5.12.14
9
10 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>
11
12 0000_README | 4 +
13 1013_linux-5.12.14.patch | 4590 ++++++++++++++++++++++++++++++++++++++++++++++
14 2 files changed, 4594 insertions(+)
15
16 diff --git a/0000_README b/0000_README
17 index 34c90d1..96f1ac7 100644
18 --- a/0000_README
19 +++ b/0000_README
20 @@ -95,6 +95,10 @@ Patch: 1012_linux-5.12.13.patch
21 From: http://www.kernel.org
22 Desc: Linux 5.12.13
23
24 +Patch: 1013_linux-5.12.14.patch
25 +From: http://www.kernel.org
26 +Desc: Linux 5.12.14
27 +
28 Patch: 1500_XATTR_USER_PREFIX.patch
29 From: https://bugs.gentoo.org/show_bug.cgi?id=470644
30 Desc: Support for namespace user.pax.* on tmpfs.
31
32 diff --git a/1013_linux-5.12.14.patch b/1013_linux-5.12.14.patch
33 new file mode 100644
34 index 0000000..6e18a21
35 --- /dev/null
36 +++ b/1013_linux-5.12.14.patch
37 @@ -0,0 +1,4590 @@
38 +diff --git a/Makefile b/Makefile
39 +index d2fe36db78aed..433f164f9ee0f 100644
40 +--- a/Makefile
41 ++++ b/Makefile
42 +@@ -1,7 +1,7 @@
43 + # SPDX-License-Identifier: GPL-2.0
44 + VERSION = 5
45 + PATCHLEVEL = 12
46 +-SUBLEVEL = 13
47 ++SUBLEVEL = 14
48 + EXTRAVERSION =
49 + NAME = Frozen Wasteland
50 +
51 +diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
52 +index 1a5edf562e85e..73ca7797b92f6 100644
53 +--- a/arch/arm/kernel/setup.c
54 ++++ b/arch/arm/kernel/setup.c
55 +@@ -545,9 +545,11 @@ void notrace cpu_init(void)
56 + * In Thumb-2, msr with an immediate value is not allowed.
57 + */
58 + #ifdef CONFIG_THUMB2_KERNEL
59 +-#define PLC "r"
60 ++#define PLC_l "l"
61 ++#define PLC_r "r"
62 + #else
63 +-#define PLC "I"
64 ++#define PLC_l "I"
65 ++#define PLC_r "I"
66 + #endif
67 +
68 + /*
69 +@@ -569,15 +571,15 @@ void notrace cpu_init(void)
70 + "msr cpsr_c, %9"
71 + :
72 + : "r" (stk),
73 +- PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
74 ++ PLC_r (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
75 + "I" (offsetof(struct stack, irq[0])),
76 +- PLC (PSR_F_BIT | PSR_I_BIT | ABT_MODE),
77 ++ PLC_r (PSR_F_BIT | PSR_I_BIT | ABT_MODE),
78 + "I" (offsetof(struct stack, abt[0])),
79 +- PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE),
80 ++ PLC_r (PSR_F_BIT | PSR_I_BIT | UND_MODE),
81 + "I" (offsetof(struct stack, und[0])),
82 +- PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE),
83 ++ PLC_r (PSR_F_BIT | PSR_I_BIT | FIQ_MODE),
84 + "I" (offsetof(struct stack, fiq[0])),
85 +- PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
86 ++ PLC_l (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
87 + : "r14");
88 + #endif
89 + }
90 +diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
91 +index 5243bf2327c02..a5ee34117321d 100644
92 +--- a/arch/riscv/Makefile
93 ++++ b/arch/riscv/Makefile
94 +@@ -16,7 +16,7 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
95 + CC_FLAGS_FTRACE := -fpatchable-function-entry=8
96 + endif
97 +
98 +-ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy)
99 ++ifeq ($(CONFIG_CMODEL_MEDLOW),y)
100 + KBUILD_CFLAGS_MODULE += -mcmodel=medany
101 + endif
102 +
103 +diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
104 +index eeb4f8c3e0e72..d0d206cdb9990 100644
105 +--- a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
106 ++++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
107 +@@ -272,7 +272,7 @@
108 + cache-size = <2097152>;
109 + cache-unified;
110 + interrupt-parent = <&plic0>;
111 +- interrupts = <19 20 21 22>;
112 ++ interrupts = <19 21 22 20>;
113 + reg = <0x0 0x2010000 0x0 0x1000>;
114 + };
115 + gpio: gpio@10060000 {
116 +diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
117 +index 2b543163d90a0..76c6034428be8 100644
118 +--- a/arch/s390/include/asm/stacktrace.h
119 ++++ b/arch/s390/include/asm/stacktrace.h
120 +@@ -91,12 +91,16 @@ struct stack_frame {
121 + CALL_ARGS_4(arg1, arg2, arg3, arg4); \
122 + register unsigned long r4 asm("6") = (unsigned long)(arg5)
123 +
124 +-#define CALL_FMT_0 "=&d" (r2) :
125 +-#define CALL_FMT_1 "+&d" (r2) :
126 +-#define CALL_FMT_2 CALL_FMT_1 "d" (r3),
127 +-#define CALL_FMT_3 CALL_FMT_2 "d" (r4),
128 +-#define CALL_FMT_4 CALL_FMT_3 "d" (r5),
129 +-#define CALL_FMT_5 CALL_FMT_4 "d" (r6),
130 ++/*
131 ++ * To keep this simple mark register 2-6 as being changed (volatile)
132 ++ * by the called function, even though register 6 is saved/nonvolatile.
133 ++ */
134 ++#define CALL_FMT_0 "=&d" (r2)
135 ++#define CALL_FMT_1 "+&d" (r2)
136 ++#define CALL_FMT_2 CALL_FMT_1, "+&d" (r3)
137 ++#define CALL_FMT_3 CALL_FMT_2, "+&d" (r4)
138 ++#define CALL_FMT_4 CALL_FMT_3, "+&d" (r5)
139 ++#define CALL_FMT_5 CALL_FMT_4, "+&d" (r6)
140 +
141 + #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory"
142 + #define CALL_CLOBBER_4 CALL_CLOBBER_5
143 +@@ -118,7 +122,7 @@ struct stack_frame {
144 + " brasl 14,%[_fn]\n" \
145 + " la 15,0(%[_prev])\n" \
146 + : [_prev] "=&a" (prev), CALL_FMT_##nr \
147 +- [_stack] "R" (stack), \
148 ++ : [_stack] "R" (stack), \
149 + [_bc] "i" (offsetof(struct stack_frame, back_chain)), \
150 + [_frame] "d" (frame), \
151 + [_fn] "X" (fn) : CALL_CLOBBER_##nr); \
152 +diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
153 +index 9cc71ca9a88f9..e84f495e7eb29 100644
154 +--- a/arch/s390/kernel/entry.S
155 ++++ b/arch/s390/kernel/entry.S
156 +@@ -418,6 +418,7 @@ ENTRY(\name)
157 + xgr %r6,%r6
158 + xgr %r7,%r7
159 + xgr %r10,%r10
160 ++ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
161 + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
162 + stmg %r8,%r9,__PT_PSW(%r11)
163 + tm %r8,0x0001 # coming from user space?
164 +diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
165 +index 90163e6184f5c..080e7aed181f4 100644
166 +--- a/arch/s390/kernel/signal.c
167 ++++ b/arch/s390/kernel/signal.c
168 +@@ -512,7 +512,6 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal)
169 +
170 + /* No handlers present - check for system call restart */
171 + clear_pt_regs_flag(regs, PIF_SYSCALL);
172 +- clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART);
173 + if (current->thread.system_call) {
174 + regs->int_code = current->thread.system_call;
175 + switch (regs->gprs[2]) {
176 +diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
177 +index bfcc327acc6b2..26aa2614ee352 100644
178 +--- a/arch/s390/kernel/topology.c
179 ++++ b/arch/s390/kernel/topology.c
180 +@@ -66,7 +66,10 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c
181 + {
182 + static cpumask_t mask;
183 +
184 +- cpumask_copy(&mask, cpumask_of(cpu));
185 ++ cpumask_clear(&mask);
186 ++ if (!cpu_online(cpu))
187 ++ goto out;
188 ++ cpumask_set_cpu(cpu, &mask);
189 + switch (topology_mode) {
190 + case TOPOLOGY_MODE_HW:
191 + while (info) {
192 +@@ -83,10 +86,10 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c
193 + default:
194 + fallthrough;
195 + case TOPOLOGY_MODE_SINGLE:
196 +- cpumask_copy(&mask, cpumask_of(cpu));
197 + break;
198 + }
199 + cpumask_and(&mask, &mask, cpu_online_mask);
200 ++out:
201 + cpumask_copy(dst, &mask);
202 + }
203 +
204 +@@ -95,7 +98,10 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
205 + static cpumask_t mask;
206 + int i;
207 +
208 +- cpumask_copy(&mask, cpumask_of(cpu));
209 ++ cpumask_clear(&mask);
210 ++ if (!cpu_online(cpu))
211 ++ goto out;
212 ++ cpumask_set_cpu(cpu, &mask);
213 + if (topology_mode != TOPOLOGY_MODE_HW)
214 + goto out;
215 + cpu -= cpu % (smp_cpu_mtid + 1);
216 +diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
217 +index 4efd39aacb9f2..8767dc53b5699 100644
218 +--- a/arch/x86/entry/common.c
219 ++++ b/arch/x86/entry/common.c
220 +@@ -127,8 +127,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
221 + /* User code screwed up. */
222 + regs->ax = -EFAULT;
223 +
224 +- instrumentation_end();
225 + local_irq_disable();
226 ++ instrumentation_end();
227 + irqentry_exit_to_user_mode(regs);
228 + return false;
229 + }
230 +@@ -266,15 +266,16 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
231 + irqentry_state_t state = irqentry_enter(regs);
232 + bool inhcall;
233 +
234 ++ instrumentation_begin();
235 + run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs);
236 +
237 + inhcall = get_and_clear_inhcall();
238 + if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) {
239 +- instrumentation_begin();
240 + irqentry_exit_cond_resched();
241 + instrumentation_end();
242 + restore_inhcall(inhcall);
243 + } else {
244 ++ instrumentation_end();
245 + irqentry_exit(regs, state);
246 + }
247 + }
248 +diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
249 +index 18df171296955..7050a9ebd73f1 100644
250 +--- a/arch/x86/events/core.c
251 ++++ b/arch/x86/events/core.c
252 +@@ -45,9 +45,11 @@
253 + #include "perf_event.h"
254 +
255 + struct x86_pmu x86_pmu __read_mostly;
256 ++static struct pmu pmu;
257 +
258 + DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
259 + .enabled = 1,
260 ++ .pmu = &pmu,
261 + };
262 +
263 + DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
264 +@@ -380,10 +382,12 @@ int x86_reserve_hardware(void)
265 + if (!atomic_inc_not_zero(&pmc_refcount)) {
266 + mutex_lock(&pmc_reserve_mutex);
267 + if (atomic_read(&pmc_refcount) == 0) {
268 +- if (!reserve_pmc_hardware())
269 ++ if (!reserve_pmc_hardware()) {
270 + err = -EBUSY;
271 +- else
272 ++ } else {
273 + reserve_ds_buffers();
274 ++ reserve_lbr_buffers();
275 ++ }
276 + }
277 + if (!err)
278 + atomic_inc(&pmc_refcount);
279 +@@ -724,16 +728,23 @@ void x86_pmu_enable_all(int added)
280 + }
281 + }
282 +
283 +-static struct pmu pmu;
284 +-
285 + static inline int is_x86_event(struct perf_event *event)
286 + {
287 + return event->pmu == &pmu;
288 + }
289 +
290 +-struct pmu *x86_get_pmu(void)
291 ++struct pmu *x86_get_pmu(unsigned int cpu)
292 + {
293 +- return &pmu;
294 ++ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
295 ++
296 ++ /*
297 ++ * All CPUs of the hybrid type have been offline.
298 ++ * The x86_get_pmu() should not be invoked.
299 ++ */
300 ++ if (WARN_ON_ONCE(!cpuc->pmu))
301 ++ return &pmu;
302 ++
303 ++ return cpuc->pmu;
304 + }
305 + /*
306 + * Event scheduler state:
307 +diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
308 +index 4c18e7fb58f58..77fe4fece6798 100644
309 +--- a/arch/x86/events/intel/core.c
310 ++++ b/arch/x86/events/intel/core.c
311 +@@ -4879,7 +4879,7 @@ static void update_tfa_sched(void *ignored)
312 + * and if so force schedule out for all event types all contexts
313 + */
314 + if (test_bit(3, cpuc->active_mask))
315 +- perf_pmu_resched(x86_get_pmu());
316 ++ perf_pmu_resched(x86_get_pmu(smp_processor_id()));
317 + }
318 +
319 + static ssize_t show_sysctl_tfa(struct device *cdev,
320 +diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
321 +index d32b302719fe5..72df2f392c863 100644
322 +--- a/arch/x86/events/intel/ds.c
323 ++++ b/arch/x86/events/intel/ds.c
324 +@@ -2192,7 +2192,7 @@ void __init intel_ds_init(void)
325 + PERF_SAMPLE_TIME;
326 + x86_pmu.flags |= PMU_FL_PEBS_ALL;
327 + pebs_qual = "-baseline";
328 +- x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
329 ++ x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
330 + } else {
331 + /* Only basic record supported */
332 + x86_pmu.large_pebs_flags &=
333 +@@ -2207,7 +2207,7 @@ void __init intel_ds_init(void)
334 +
335 + if (x86_pmu.intel_cap.pebs_output_pt_available) {
336 + pr_cont("PEBS-via-PT, ");
337 +- x86_get_pmu()->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
338 ++ x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
339 + }
340 +
341 + break;
342 +diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
343 +index 21890dacfcfee..c9cd6ce0fa2ad 100644
344 +--- a/arch/x86/events/intel/lbr.c
345 ++++ b/arch/x86/events/intel/lbr.c
346 +@@ -658,7 +658,6 @@ static inline bool branch_user_callstack(unsigned br_sel)
347 +
348 + void intel_pmu_lbr_add(struct perf_event *event)
349 + {
350 +- struct kmem_cache *kmem_cache = event->pmu->task_ctx_cache;
351 + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
352 +
353 + if (!x86_pmu.lbr_nr)
354 +@@ -696,16 +695,11 @@ void intel_pmu_lbr_add(struct perf_event *event)
355 + perf_sched_cb_inc(event->ctx->pmu);
356 + if (!cpuc->lbr_users++ && !event->total_time_running)
357 + intel_pmu_lbr_reset();
358 +-
359 +- if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
360 +- kmem_cache && !cpuc->lbr_xsave &&
361 +- (cpuc->lbr_users != cpuc->lbr_pebs_users))
362 +- cpuc->lbr_xsave = kmem_cache_alloc(kmem_cache, GFP_KERNEL);
363 + }
364 +
365 + void release_lbr_buffers(void)
366 + {
367 +- struct kmem_cache *kmem_cache = x86_get_pmu()->task_ctx_cache;
368 ++ struct kmem_cache *kmem_cache;
369 + struct cpu_hw_events *cpuc;
370 + int cpu;
371 +
372 +@@ -714,6 +708,7 @@ void release_lbr_buffers(void)
373 +
374 + for_each_possible_cpu(cpu) {
375 + cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
376 ++ kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
377 + if (kmem_cache && cpuc->lbr_xsave) {
378 + kmem_cache_free(kmem_cache, cpuc->lbr_xsave);
379 + cpuc->lbr_xsave = NULL;
380 +@@ -721,6 +716,27 @@ void release_lbr_buffers(void)
381 + }
382 + }
383 +
384 ++void reserve_lbr_buffers(void)
385 ++{
386 ++ struct kmem_cache *kmem_cache;
387 ++ struct cpu_hw_events *cpuc;
388 ++ int cpu;
389 ++
390 ++ if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
391 ++ return;
392 ++
393 ++ for_each_possible_cpu(cpu) {
394 ++ cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
395 ++ kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
396 ++ if (!kmem_cache || cpuc->lbr_xsave)
397 ++ continue;
398 ++
399 ++ cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache,
400 ++ GFP_KERNEL | __GFP_ZERO,
401 ++ cpu_to_node(cpu));
402 ++ }
403 ++}
404 ++
405 + void intel_pmu_lbr_del(struct perf_event *event)
406 + {
407 + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
408 +@@ -1609,7 +1625,7 @@ void intel_pmu_lbr_init_hsw(void)
409 + x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
410 + x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
411 +
412 +- x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0);
413 ++ x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
414 +
415 + if (lbr_from_signext_quirk_needed())
416 + static_branch_enable(&lbr_from_quirk_key);
417 +@@ -1629,7 +1645,7 @@ __init void intel_pmu_lbr_init_skl(void)
418 + x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
419 + x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
420 +
421 +- x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0);
422 ++ x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
423 +
424 + /*
425 + * SW branch filter usage:
426 +@@ -1726,7 +1742,7 @@ static bool is_arch_lbr_xsave_available(void)
427 +
428 + void __init intel_pmu_arch_lbr_init(void)
429 + {
430 +- struct pmu *pmu = x86_get_pmu();
431 ++ struct pmu *pmu = x86_get_pmu(smp_processor_id());
432 + union cpuid28_eax eax;
433 + union cpuid28_ebx ebx;
434 + union cpuid28_ecx ecx;
435 +diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
436 +index 53b2b5fc23bca..35cdece5644fb 100644
437 +--- a/arch/x86/events/perf_event.h
438 ++++ b/arch/x86/events/perf_event.h
439 +@@ -327,6 +327,8 @@ struct cpu_hw_events {
440 + int n_pair; /* Large increment events */
441 +
442 + void *kfree_on_online[X86_PERF_KFREE_MAX];
443 ++
444 ++ struct pmu *pmu;
445 + };
446 +
447 + #define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) { \
448 +@@ -905,7 +907,7 @@ static struct perf_pmu_events_ht_attr event_attr_##v = { \
449 + .event_str_ht = ht, \
450 + }
451 +
452 +-struct pmu *x86_get_pmu(void);
453 ++struct pmu *x86_get_pmu(unsigned int cpu);
454 + extern struct x86_pmu x86_pmu __read_mostly;
455 +
456 + static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
457 +@@ -1135,6 +1137,8 @@ void reserve_ds_buffers(void);
458 +
459 + void release_lbr_buffers(void);
460 +
461 ++void reserve_lbr_buffers(void);
462 ++
463 + extern struct event_constraint bts_constraint;
464 + extern struct event_constraint vlbr_constraint;
465 +
466 +@@ -1282,6 +1286,10 @@ static inline void release_lbr_buffers(void)
467 + {
468 + }
469 +
470 ++static inline void reserve_lbr_buffers(void)
471 ++{
472 ++}
473 ++
474 + static inline int intel_pmu_init(void)
475 + {
476 + return 0;
477 +diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
478 +index fdee23ea4e173..16bf4d4a8159e 100644
479 +--- a/arch/x86/include/asm/fpu/internal.h
480 ++++ b/arch/x86/include/asm/fpu/internal.h
481 +@@ -204,6 +204,14 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
482 + asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
483 + }
484 +
485 ++static inline void fxsave(struct fxregs_state *fx)
486 ++{
487 ++ if (IS_ENABLED(CONFIG_X86_32))
488 ++ asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx));
489 ++ else
490 ++ asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx));
491 ++}
492 ++
493 + /* These macros all use (%edi)/(%rdi) as the single memory argument. */
494 + #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
495 + #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
496 +@@ -268,28 +276,6 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
497 + : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
498 + : "memory")
499 +
500 +-/*
501 +- * This function is called only during boot time when x86 caps are not set
502 +- * up and alternative can not be used yet.
503 +- */
504 +-static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
505 +-{
506 +- u64 mask = xfeatures_mask_all;
507 +- u32 lmask = mask;
508 +- u32 hmask = mask >> 32;
509 +- int err;
510 +-
511 +- WARN_ON(system_state != SYSTEM_BOOTING);
512 +-
513 +- if (boot_cpu_has(X86_FEATURE_XSAVES))
514 +- XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
515 +- else
516 +- XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
517 +-
518 +- /* We should never fault when copying to a kernel buffer: */
519 +- WARN_ON_FPU(err);
520 +-}
521 +-
522 + /*
523 + * This function is called only during boot time when x86 caps are not set
524 + * up and alternative can not be used yet.
525 +diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
526 +index ec3ae30547920..b7b92cdf3add4 100644
527 +--- a/arch/x86/kernel/fpu/signal.c
528 ++++ b/arch/x86/kernel/fpu/signal.c
529 +@@ -221,28 +221,18 @@ sanitize_restored_user_xstate(union fpregs_state *state,
530 +
531 + if (use_xsave()) {
532 + /*
533 +- * Note: we don't need to zero the reserved bits in the
534 +- * xstate_header here because we either didn't copy them at all,
535 +- * or we checked earlier that they aren't set.
536 ++ * Clear all feature bits which are not set in
537 ++ * user_xfeatures and clear all extended features
538 ++ * for fx_only mode.
539 + */
540 ++ u64 mask = fx_only ? XFEATURE_MASK_FPSSE : user_xfeatures;
541 +
542 + /*
543 +- * 'user_xfeatures' might have bits clear which are
544 +- * set in header->xfeatures. This represents features that
545 +- * were in init state prior to a signal delivery, and need
546 +- * to be reset back to the init state. Clear any user
547 +- * feature bits which are set in the kernel buffer to get
548 +- * them back to the init state.
549 +- *
550 +- * Supervisor state is unchanged by input from userspace.
551 +- * Ensure supervisor state bits stay set and supervisor
552 +- * state is not modified.
553 ++ * Supervisor state has to be preserved. The sigframe
554 ++ * restore can only modify user features, i.e. @mask
555 ++ * cannot contain them.
556 + */
557 +- if (fx_only)
558 +- header->xfeatures = XFEATURE_MASK_FPSSE;
559 +- else
560 +- header->xfeatures &= user_xfeatures |
561 +- xfeatures_mask_supervisor();
562 ++ header->xfeatures &= mask | xfeatures_mask_supervisor();
563 + }
564 +
565 + if (use_fxsr()) {
566 +diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
567 +index 2ad57cc14b83f..451435d7ff413 100644
568 +--- a/arch/x86/kernel/fpu/xstate.c
569 ++++ b/arch/x86/kernel/fpu/xstate.c
570 +@@ -440,6 +440,25 @@ static void __init print_xstate_offset_size(void)
571 + }
572 + }
573 +
574 ++/*
575 ++ * All supported features have either init state all zeros or are
576 ++ * handled in setup_init_fpu() individually. This is an explicit
577 ++ * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
578 ++ * newly added supported features at build time and make people
579 ++ * actually look at the init state for the new feature.
580 ++ */
581 ++#define XFEATURES_INIT_FPSTATE_HANDLED \
582 ++ (XFEATURE_MASK_FP | \
583 ++ XFEATURE_MASK_SSE | \
584 ++ XFEATURE_MASK_YMM | \
585 ++ XFEATURE_MASK_OPMASK | \
586 ++ XFEATURE_MASK_ZMM_Hi256 | \
587 ++ XFEATURE_MASK_Hi16_ZMM | \
588 ++ XFEATURE_MASK_PKRU | \
589 ++ XFEATURE_MASK_BNDREGS | \
590 ++ XFEATURE_MASK_BNDCSR | \
591 ++ XFEATURE_MASK_PASID)
592 ++
593 + /*
594 + * setup the xstate image representing the init state
595 + */
596 +@@ -447,6 +466,10 @@ static void __init setup_init_fpu_buf(void)
597 + {
598 + static int on_boot_cpu __initdata = 1;
599 +
600 ++ BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
601 ++ XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
602 ++ XFEATURES_INIT_FPSTATE_HANDLED);
603 ++
604 + WARN_ON_FPU(!on_boot_cpu);
605 + on_boot_cpu = 0;
606 +
607 +@@ -466,10 +489,22 @@ static void __init setup_init_fpu_buf(void)
608 + copy_kernel_to_xregs_booting(&init_fpstate.xsave);
609 +
610 + /*
611 +- * Dump the init state again. This is to identify the init state
612 +- * of any feature which is not represented by all zero's.
613 ++ * All components are now in init state. Read the state back so
614 ++ * that init_fpstate contains all non-zero init state. This only
615 ++ * works with XSAVE, but not with XSAVEOPT and XSAVES because
616 ++ * those use the init optimization which skips writing data for
617 ++ * components in init state.
618 ++ *
619 ++ * XSAVE could be used, but that would require to reshuffle the
620 ++ * data when XSAVES is available because XSAVES uses xstate
621 ++ * compaction. But doing so is a pointless exercise because most
622 ++ * components have an all zeros init state except for the legacy
623 ++ * ones (FP and SSE). Those can be saved with FXSAVE into the
624 ++ * legacy area. Adding new features requires to ensure that init
625 ++ * state is all zeroes or if not to add the necessary handling
626 ++ * here.
627 + */
628 +- copy_xregs_to_kernel_booting(&init_fpstate.xsave);
629 ++ fxsave(&init_fpstate.fxsave);
630 + }
631 +
632 + static int xfeature_uncompacted_offset(int xfeature_nr)
633 +diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
634 +index dbc6214d69def..8f3b438f6fd3b 100644
635 +--- a/arch/x86/kvm/svm/sev.c
636 ++++ b/arch/x86/kvm/svm/sev.c
637 +@@ -143,9 +143,25 @@ static void sev_asid_free(int asid)
638 + mutex_unlock(&sev_bitmap_lock);
639 + }
640 +
641 +-static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
642 ++static void sev_decommission(unsigned int handle)
643 + {
644 + struct sev_data_decommission *decommission;
645 ++
646 ++ if (!handle)
647 ++ return;
648 ++
649 ++ decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
650 ++ if (!decommission)
651 ++ return;
652 ++
653 ++ decommission->handle = handle;
654 ++ sev_guest_decommission(decommission, NULL);
655 ++
656 ++ kfree(decommission);
657 ++}
658 ++
659 ++static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
660 ++{
661 + struct sev_data_deactivate *data;
662 +
663 + if (!handle)
664 +@@ -165,15 +181,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
665 +
666 + kfree(data);
667 +
668 +- decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
669 +- if (!decommission)
670 +- return;
671 +-
672 +- /* decommission handle */
673 +- decommission->handle = handle;
674 +- sev_guest_decommission(decommission, NULL);
675 +-
676 +- kfree(decommission);
677 ++ sev_decommission(handle);
678 + }
679 +
680 + static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
681 +@@ -303,8 +311,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
682 +
683 + /* Bind ASID to this guest */
684 + ret = sev_bind_asid(kvm, start->handle, error);
685 +- if (ret)
686 ++ if (ret) {
687 ++ sev_decommission(start->handle);
688 + goto e_free_session;
689 ++ }
690 +
691 + /* return handle to userspace */
692 + params.handle = start->handle;
693 +diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
694 +index 0a0e168be1cbe..9b0e771302cee 100644
695 +--- a/arch/x86/pci/fixup.c
696 ++++ b/arch/x86/pci/fixup.c
697 +@@ -779,4 +779,48 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
698 + DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
699 + DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
700 +
701 ++#define RS690_LOWER_TOP_OF_DRAM2 0x30
702 ++#define RS690_LOWER_TOP_OF_DRAM2_VALID 0x1
703 ++#define RS690_UPPER_TOP_OF_DRAM2 0x31
704 ++#define RS690_HTIU_NB_INDEX 0xA8
705 ++#define RS690_HTIU_NB_INDEX_WR_ENABLE 0x100
706 ++#define RS690_HTIU_NB_DATA 0xAC
707 ++
708 ++/*
709 ++ * Some BIOS implementations support RAM above 4GB, but do not configure the
710 ++ * PCI host to respond to bus master accesses for these addresses. These
711 ++ * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA
712 ++ * works as expected for addresses below 4GB.
713 ++ *
714 ++ * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57)
715 ++ * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf
716 ++ */
717 ++static void rs690_fix_64bit_dma(struct pci_dev *pdev)
718 ++{
719 ++ u32 val = 0;
720 ++ phys_addr_t top_of_dram = __pa(high_memory - 1) + 1;
721 ++
722 ++ if (top_of_dram <= (1ULL << 32))
723 ++ return;
724 ++
725 ++ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
726 ++ RS690_LOWER_TOP_OF_DRAM2);
727 ++ pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val);
728 ++
729 ++ if (val)
730 ++ return;
731 ++
732 ++ pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram);
733 ++
734 ++ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
735 ++ RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
736 ++ pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32);
737 ++
738 ++ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
739 ++ RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
740 ++ pci_write_config_dword(pdev, RS690_HTIU_NB_DATA,
741 ++ top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID);
742 ++}
743 ++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);
744 ++
745 + #endif
746 +diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
747 +index 8183ddb3700c4..64db5852432e7 100644
748 +--- a/arch/x86/xen/enlighten_pv.c
749 ++++ b/arch/x86/xen/enlighten_pv.c
750 +@@ -592,8 +592,10 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_debug)
751 + DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap)
752 + {
753 + /* This should never happen and there is no way to handle it. */
754 ++ instrumentation_begin();
755 + pr_err("Unknown trap in Xen PV mode.");
756 + BUG();
757 ++ instrumentation_end();
758 + }
759 +
760 + #ifdef CONFIG_X86_MCE
761 +diff --git a/certs/Kconfig b/certs/Kconfig
762 +index c94e93d8bccf0..ab88d2a7f3c7f 100644
763 +--- a/certs/Kconfig
764 ++++ b/certs/Kconfig
765 +@@ -83,4 +83,21 @@ config SYSTEM_BLACKLIST_HASH_LIST
766 + wrapper to incorporate the list into the kernel. Each <hash> should
767 + be a string of hex digits.
768 +
769 ++config SYSTEM_REVOCATION_LIST
770 ++ bool "Provide system-wide ring of revocation certificates"
771 ++ depends on SYSTEM_BLACKLIST_KEYRING
772 ++ depends on PKCS7_MESSAGE_PARSER=y
773 ++ help
774 ++ If set, this allows revocation certificates to be stored in the
775 ++ blacklist keyring and implements a hook whereby a PKCS#7 message can
776 ++ be checked to see if it matches such a certificate.
777 ++
778 ++config SYSTEM_REVOCATION_KEYS
779 ++ string "X.509 certificates to be preloaded into the system blacklist keyring"
780 ++ depends on SYSTEM_REVOCATION_LIST
781 ++ help
782 ++ If set, this option should be the filename of a PEM-formatted file
783 ++ containing X.509 certificates to be included in the default blacklist
784 ++ keyring.
785 ++
786 + endmenu
787 +diff --git a/certs/Makefile b/certs/Makefile
788 +index f4c25b67aad90..b6db52ebf0beb 100644
789 +--- a/certs/Makefile
790 ++++ b/certs/Makefile
791 +@@ -3,8 +3,9 @@
792 + # Makefile for the linux kernel signature checking certificates.
793 + #
794 +
795 +-obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o
796 +-obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o
797 ++obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o
798 ++obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o common.o
799 ++obj-$(CONFIG_SYSTEM_REVOCATION_LIST) += revocation_certificates.o
800 + ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),"")
801 + obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o
802 + else
803 +@@ -29,7 +30,7 @@ $(obj)/x509_certificate_list: scripts/extract-cert $(SYSTEM_TRUSTED_KEYS_SRCPREF
804 + $(call if_changed,extract_certs,$(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_TRUSTED_KEYS))
805 + endif # CONFIG_SYSTEM_TRUSTED_KEYRING
806 +
807 +-clean-files := x509_certificate_list .x509.list
808 ++clean-files := x509_certificate_list .x509.list x509_revocation_list
809 +
810 + ifeq ($(CONFIG_MODULE_SIG),y)
811 + ###############################################################################
812 +@@ -104,3 +105,17 @@ targets += signing_key.x509
813 + $(obj)/signing_key.x509: scripts/extract-cert $(X509_DEP) FORCE
814 + $(call if_changed,extract_certs,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY))
815 + endif # CONFIG_MODULE_SIG
816 ++
817 ++ifeq ($(CONFIG_SYSTEM_REVOCATION_LIST),y)
818 ++
819 ++$(eval $(call config_filename,SYSTEM_REVOCATION_KEYS))
820 ++
821 ++$(obj)/revocation_certificates.o: $(obj)/x509_revocation_list
822 ++
823 ++quiet_cmd_extract_certs = EXTRACT_CERTS $(patsubst "%",%,$(2))
824 ++ cmd_extract_certs = scripts/extract-cert $(2) $@
825 ++
826 ++targets += x509_revocation_list
827 ++$(obj)/x509_revocation_list: scripts/extract-cert $(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(SYSTEM_REVOCATION_KEYS_FILENAME) FORCE
828 ++ $(call if_changed,extract_certs,$(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_REVOCATION_KEYS))
829 ++endif
830 +diff --git a/certs/blacklist.c b/certs/blacklist.c
831 +index bffe4c6f4a9e2..c9a435b15af40 100644
832 +--- a/certs/blacklist.c
833 ++++ b/certs/blacklist.c
834 +@@ -17,9 +17,15 @@
835 + #include <linux/uidgid.h>
836 + #include <keys/system_keyring.h>
837 + #include "blacklist.h"
838 ++#include "common.h"
839 +
840 + static struct key *blacklist_keyring;
841 +
842 ++#ifdef CONFIG_SYSTEM_REVOCATION_LIST
843 ++extern __initconst const u8 revocation_certificate_list[];
844 ++extern __initconst const unsigned long revocation_certificate_list_size;
845 ++#endif
846 ++
847 + /*
848 + * The description must be a type prefix, a colon and then an even number of
849 + * hex digits. The hash is kept in the description.
850 +@@ -145,6 +151,49 @@ int is_binary_blacklisted(const u8 *hash, size_t hash_len)
851 + }
852 + EXPORT_SYMBOL_GPL(is_binary_blacklisted);
853 +
854 ++#ifdef CONFIG_SYSTEM_REVOCATION_LIST
855 ++/**
856 ++ * add_key_to_revocation_list - Add a revocation certificate to the blacklist
857 ++ * @data: The data blob containing the certificate
858 ++ * @size: The size of data blob
859 ++ */
860 ++int add_key_to_revocation_list(const char *data, size_t size)
861 ++{
862 ++ key_ref_t key;
863 ++
864 ++ key = key_create_or_update(make_key_ref(blacklist_keyring, true),
865 ++ "asymmetric",
866 ++ NULL,
867 ++ data,
868 ++ size,
869 ++ ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW),
870 ++ KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_BUILT_IN);
871 ++
872 ++ if (IS_ERR(key)) {
873 ++ pr_err("Problem with revocation key (%ld)\n", PTR_ERR(key));
874 ++ return PTR_ERR(key);
875 ++ }
876 ++
877 ++ return 0;
878 ++}
879 ++
880 ++/**
881 ++ * is_key_on_revocation_list - Determine if the key for a PKCS#7 message is revoked
882 ++ * @pkcs7: The PKCS#7 message to check
883 ++ */
884 ++int is_key_on_revocation_list(struct pkcs7_message *pkcs7)
885 ++{
886 ++ int ret;
887 ++
888 ++ ret = pkcs7_validate_trust(pkcs7, blacklist_keyring);
889 ++
890 ++ if (ret == 0)
891 ++ return -EKEYREJECTED;
892 ++
893 ++ return -ENOKEY;
894 ++}
895 ++#endif
896 ++
897 + /*
898 + * Initialise the blacklist
899 + */
900 +@@ -177,3 +226,18 @@ static int __init blacklist_init(void)
901 + * Must be initialised before we try and load the keys into the keyring.
902 + */
903 + device_initcall(blacklist_init);
904 ++
905 ++#ifdef CONFIG_SYSTEM_REVOCATION_LIST
906 ++/*
907 ++ * Load the compiled-in list of revocation X.509 certificates.
908 ++ */
909 ++static __init int load_revocation_certificate_list(void)
910 ++{
911 ++ if (revocation_certificate_list_size)
912 ++ pr_notice("Loading compiled-in revocation X.509 certificates\n");
913 ++
914 ++ return load_certificate_list(revocation_certificate_list, revocation_certificate_list_size,
915 ++ blacklist_keyring);
916 ++}
917 ++late_initcall(load_revocation_certificate_list);
918 ++#endif
919 +diff --git a/certs/blacklist.h b/certs/blacklist.h
920 +index 1efd6fa0dc608..51b320cf85749 100644
921 +--- a/certs/blacklist.h
922 ++++ b/certs/blacklist.h
923 +@@ -1,3 +1,5 @@
924 + #include <linux/kernel.h>
925 ++#include <linux/errno.h>
926 ++#include <crypto/pkcs7.h>
927 +
928 + extern const char __initconst *const blacklist_hashes[];
929 +diff --git a/certs/common.c b/certs/common.c
930 +new file mode 100644
931 +index 0000000000000..16a220887a53e
932 +--- /dev/null
933 ++++ b/certs/common.c
934 +@@ -0,0 +1,57 @@
935 ++// SPDX-License-Identifier: GPL-2.0-or-later
936 ++
937 ++#include <linux/kernel.h>
938 ++#include <linux/key.h>
939 ++#include "common.h"
940 ++
941 ++int load_certificate_list(const u8 cert_list[],
942 ++ const unsigned long list_size,
943 ++ const struct key *keyring)
944 ++{
945 ++ key_ref_t key;
946 ++ const u8 *p, *end;
947 ++ size_t plen;
948 ++
949 ++ p = cert_list;
950 ++ end = p + list_size;
951 ++ while (p < end) {
952 ++ /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
953 ++ * than 256 bytes in size.
954 ++ */
955 ++ if (end - p < 4)
956 ++ goto dodgy_cert;
957 ++ if (p[0] != 0x30 &&
958 ++ p[1] != 0x82)
959 ++ goto dodgy_cert;
960 ++ plen = (p[2] << 8) | p[3];
961 ++ plen += 4;
962 ++ if (plen > end - p)
963 ++ goto dodgy_cert;
964 ++
965 ++ key = key_create_or_update(make_key_ref(keyring, 1),
966 ++ "asymmetric",
967 ++ NULL,
968 ++ p,
969 ++ plen,
970 ++ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
971 ++ KEY_USR_VIEW | KEY_USR_READ),
972 ++ KEY_ALLOC_NOT_IN_QUOTA |
973 ++ KEY_ALLOC_BUILT_IN |
974 ++ KEY_ALLOC_BYPASS_RESTRICTION);
975 ++ if (IS_ERR(key)) {
976 ++ pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
977 ++ PTR_ERR(key));
978 ++ } else {
979 ++ pr_notice("Loaded X.509 cert '%s'\n",
980 ++ key_ref_to_ptr(key)->description);
981 ++ key_ref_put(key);
982 ++ }
983 ++ p += plen;
984 ++ }
985 ++
986 ++ return 0;
987 ++
988 ++dodgy_cert:
989 ++ pr_err("Problem parsing in-kernel X.509 certificate list\n");
990 ++ return 0;
991 ++}
992 +diff --git a/certs/common.h b/certs/common.h
993 +new file mode 100644
994 +index 0000000000000..abdb5795936b7
995 +--- /dev/null
996 ++++ b/certs/common.h
997 +@@ -0,0 +1,9 @@
998 ++/* SPDX-License-Identifier: GPL-2.0-or-later */
999 ++
1000 ++#ifndef _CERT_COMMON_H
1001 ++#define _CERT_COMMON_H
1002 ++
1003 ++int load_certificate_list(const u8 cert_list[], const unsigned long list_size,
1004 ++ const struct key *keyring);
1005 ++
1006 ++#endif
1007 +diff --git a/certs/revocation_certificates.S b/certs/revocation_certificates.S
1008 +new file mode 100644
1009 +index 0000000000000..f21aae8a8f0ef
1010 +--- /dev/null
1011 ++++ b/certs/revocation_certificates.S
1012 +@@ -0,0 +1,21 @@
1013 ++/* SPDX-License-Identifier: GPL-2.0 */
1014 ++#include <linux/export.h>
1015 ++#include <linux/init.h>
1016 ++
1017 ++ __INITRODATA
1018 ++
1019 ++ .align 8
1020 ++ .globl revocation_certificate_list
1021 ++revocation_certificate_list:
1022 ++__revocation_list_start:
1023 ++ .incbin "certs/x509_revocation_list"
1024 ++__revocation_list_end:
1025 ++
1026 ++ .align 8
1027 ++ .globl revocation_certificate_list_size
1028 ++revocation_certificate_list_size:
1029 ++#ifdef CONFIG_64BIT
1030 ++ .quad __revocation_list_end - __revocation_list_start
1031 ++#else
1032 ++ .long __revocation_list_end - __revocation_list_start
1033 ++#endif
1034 +diff --git a/certs/system_keyring.c b/certs/system_keyring.c
1035 +index 4b693da488f14..0c9a4795e847b 100644
1036 +--- a/certs/system_keyring.c
1037 ++++ b/certs/system_keyring.c
1038 +@@ -16,6 +16,7 @@
1039 + #include <keys/asymmetric-type.h>
1040 + #include <keys/system_keyring.h>
1041 + #include <crypto/pkcs7.h>
1042 ++#include "common.h"
1043 +
1044 + static struct key *builtin_trusted_keys;
1045 + #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
1046 +@@ -137,54 +138,10 @@ device_initcall(system_trusted_keyring_init);
1047 + */
1048 + static __init int load_system_certificate_list(void)
1049 + {
1050 +- key_ref_t key;
1051 +- const u8 *p, *end;
1052 +- size_t plen;
1053 +-
1054 + pr_notice("Loading compiled-in X.509 certificates\n");
1055 +
1056 +- p = system_certificate_list;
1057 +- end = p + system_certificate_list_size;
1058 +- while (p < end) {
1059 +- /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
1060 +- * than 256 bytes in size.
1061 +- */
1062 +- if (end - p < 4)
1063 +- goto dodgy_cert;
1064 +- if (p[0] != 0x30 &&
1065 +- p[1] != 0x82)
1066 +- goto dodgy_cert;
1067 +- plen = (p[2] << 8) | p[3];
1068 +- plen += 4;
1069 +- if (plen > end - p)
1070 +- goto dodgy_cert;
1071 +-
1072 +- key = key_create_or_update(make_key_ref(builtin_trusted_keys, 1),
1073 +- "asymmetric",
1074 +- NULL,
1075 +- p,
1076 +- plen,
1077 +- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
1078 +- KEY_USR_VIEW | KEY_USR_READ),
1079 +- KEY_ALLOC_NOT_IN_QUOTA |
1080 +- KEY_ALLOC_BUILT_IN |
1081 +- KEY_ALLOC_BYPASS_RESTRICTION);
1082 +- if (IS_ERR(key)) {
1083 +- pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
1084 +- PTR_ERR(key));
1085 +- } else {
1086 +- pr_notice("Loaded X.509 cert '%s'\n",
1087 +- key_ref_to_ptr(key)->description);
1088 +- key_ref_put(key);
1089 +- }
1090 +- p += plen;
1091 +- }
1092 +-
1093 +- return 0;
1094 +-
1095 +-dodgy_cert:
1096 +- pr_err("Problem parsing in-kernel X.509 certificate list\n");
1097 +- return 0;
1098 ++ return load_certificate_list(system_certificate_list, system_certificate_list_size,
1099 ++ builtin_trusted_keys);
1100 + }
1101 + late_initcall(load_system_certificate_list);
1102 +
1103 +@@ -242,6 +199,12 @@ int verify_pkcs7_message_sig(const void *data, size_t len,
1104 + pr_devel("PKCS#7 platform keyring is not available\n");
1105 + goto error;
1106 + }
1107 ++
1108 ++ ret = is_key_on_revocation_list(pkcs7);
1109 ++ if (ret != -ENOKEY) {
1110 ++ pr_devel("PKCS#7 platform key is on revocation list\n");
1111 ++ goto error;
1112 ++ }
1113 + }
1114 + ret = pkcs7_validate_trust(pkcs7, trusted_keys);
1115 + if (ret < 0) {
1116 +diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c
1117 +index 88310ac9ce906..62c536f9d9258 100644
1118 +--- a/drivers/base/swnode.c
1119 ++++ b/drivers/base/swnode.c
1120 +@@ -1032,7 +1032,15 @@ int device_add_software_node(struct device *dev, const struct software_node *nod
1121 + }
1122 +
1123 + set_secondary_fwnode(dev, &swnode->fwnode);
1124 +- software_node_notify(dev, KOBJ_ADD);
1125 ++
1126 ++ /*
1127 ++ * If the device has been fully registered by the time this function is
1128 ++ * called, software_node_notify() must be called separately so that the
1129 ++ * symlinks get created and the reference count of the node is kept in
1130 ++ * balance.
1131 ++ */
1132 ++ if (device_is_registered(dev))
1133 ++ software_node_notify(dev, KOBJ_ADD);
1134 +
1135 + return 0;
1136 + }
1137 +@@ -1052,7 +1060,8 @@ void device_remove_software_node(struct device *dev)
1138 + if (!swnode)
1139 + return;
1140 +
1141 +- software_node_notify(dev, KOBJ_REMOVE);
1142 ++ if (device_is_registered(dev))
1143 ++ software_node_notify(dev, KOBJ_REMOVE);
1144 + set_secondary_fwnode(dev, NULL);
1145 + kobject_put(&swnode->kobj);
1146 + }
1147 +@@ -1106,8 +1115,7 @@ int software_node_notify(struct device *dev, unsigned long action)
1148 +
1149 + switch (action) {
1150 + case KOBJ_ADD:
1151 +- ret = sysfs_create_link_nowarn(&dev->kobj, &swnode->kobj,
1152 +- "software_node");
1153 ++ ret = sysfs_create_link(&dev->kobj, &swnode->kobj, "software_node");
1154 + if (ret)
1155 + break;
1156 +
1157 +diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
1158 +index 03b1b03349477..c42b17b76640e 100644
1159 +--- a/drivers/dma/Kconfig
1160 ++++ b/drivers/dma/Kconfig
1161 +@@ -690,6 +690,7 @@ config XILINX_ZYNQMP_DMA
1162 +
1163 + config XILINX_ZYNQMP_DPDMA
1164 + tristate "Xilinx DPDMA Engine"
1165 ++ depends on HAS_IOMEM && OF
1166 + select DMA_ENGINE
1167 + select DMA_VIRTUAL_CHANNELS
1168 + help
1169 +diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
1170 +index 1d8a3876b7452..5ba8e8bc609fc 100644
1171 +--- a/drivers/dma/idxd/cdev.c
1172 ++++ b/drivers/dma/idxd/cdev.c
1173 +@@ -110,6 +110,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
1174 + pasid = iommu_sva_get_pasid(sva);
1175 + if (pasid == IOMMU_PASID_INVALID) {
1176 + iommu_sva_unbind_device(sva);
1177 ++ rc = -EINVAL;
1178 + goto failed;
1179 + }
1180 +
1181 +diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c
1182 +index 27c07350971dd..375e7e647df6b 100644
1183 +--- a/drivers/dma/mediatek/mtk-uart-apdma.c
1184 ++++ b/drivers/dma/mediatek/mtk-uart-apdma.c
1185 +@@ -131,10 +131,7 @@ static unsigned int mtk_uart_apdma_read(struct mtk_chan *c, unsigned int reg)
1186 +
1187 + static void mtk_uart_apdma_desc_free(struct virt_dma_desc *vd)
1188 + {
1189 +- struct dma_chan *chan = vd->tx.chan;
1190 +- struct mtk_chan *c = to_mtk_uart_apdma_chan(chan);
1191 +-
1192 +- kfree(c->desc);
1193 ++ kfree(container_of(vd, struct mtk_uart_apdma_desc, vd));
1194 + }
1195 +
1196 + static void mtk_uart_apdma_start_tx(struct mtk_chan *c)
1197 +@@ -207,14 +204,9 @@ static void mtk_uart_apdma_start_rx(struct mtk_chan *c)
1198 +
1199 + static void mtk_uart_apdma_tx_handler(struct mtk_chan *c)
1200 + {
1201 +- struct mtk_uart_apdma_desc *d = c->desc;
1202 +-
1203 + mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B);
1204 + mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B);
1205 + mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B);
1206 +-
1207 +- list_del(&d->vd.node);
1208 +- vchan_cookie_complete(&d->vd);
1209 + }
1210 +
1211 + static void mtk_uart_apdma_rx_handler(struct mtk_chan *c)
1212 +@@ -245,9 +237,17 @@ static void mtk_uart_apdma_rx_handler(struct mtk_chan *c)
1213 +
1214 + c->rx_status = d->avail_len - cnt;
1215 + mtk_uart_apdma_write(c, VFF_RPT, wg);
1216 ++}
1217 +
1218 +- list_del(&d->vd.node);
1219 +- vchan_cookie_complete(&d->vd);
1220 ++static void mtk_uart_apdma_chan_complete_handler(struct mtk_chan *c)
1221 ++{
1222 ++ struct mtk_uart_apdma_desc *d = c->desc;
1223 ++
1224 ++ if (d) {
1225 ++ list_del(&d->vd.node);
1226 ++ vchan_cookie_complete(&d->vd);
1227 ++ c->desc = NULL;
1228 ++ }
1229 + }
1230 +
1231 + static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id)
1232 +@@ -261,6 +261,7 @@ static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id)
1233 + mtk_uart_apdma_rx_handler(c);
1234 + else if (c->dir == DMA_MEM_TO_DEV)
1235 + mtk_uart_apdma_tx_handler(c);
1236 ++ mtk_uart_apdma_chan_complete_handler(c);
1237 + spin_unlock_irqrestore(&c->vc.lock, flags);
1238 +
1239 + return IRQ_HANDLED;
1240 +@@ -348,7 +349,7 @@ static struct dma_async_tx_descriptor *mtk_uart_apdma_prep_slave_sg
1241 + return NULL;
1242 +
1243 + /* Now allocate and setup the descriptor */
1244 +- d = kzalloc(sizeof(*d), GFP_ATOMIC);
1245 ++ d = kzalloc(sizeof(*d), GFP_NOWAIT);
1246 + if (!d)
1247 + return NULL;
1248 +
1249 +@@ -366,7 +367,7 @@ static void mtk_uart_apdma_issue_pending(struct dma_chan *chan)
1250 + unsigned long flags;
1251 +
1252 + spin_lock_irqsave(&c->vc.lock, flags);
1253 +- if (vchan_issue_pending(&c->vc)) {
1254 ++ if (vchan_issue_pending(&c->vc) && !c->desc) {
1255 + vd = vchan_next_desc(&c->vc);
1256 + c->desc = to_mtk_uart_apdma_desc(&vd->tx);
1257 +
1258 +diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
1259 +index d530c1bf11d97..6885b3dcd7a97 100644
1260 +--- a/drivers/dma/sh/rcar-dmac.c
1261 ++++ b/drivers/dma/sh/rcar-dmac.c
1262 +@@ -1913,7 +1913,7 @@ static int rcar_dmac_probe(struct platform_device *pdev)
1263 +
1264 + /* Enable runtime PM and initialize the device. */
1265 + pm_runtime_enable(&pdev->dev);
1266 +- ret = pm_runtime_get_sync(&pdev->dev);
1267 ++ ret = pm_runtime_resume_and_get(&pdev->dev);
1268 + if (ret < 0) {
1269 + dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
1270 + return ret;
1271 +diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
1272 +index 36ba8b43e78de..18cbd1e43c2e8 100644
1273 +--- a/drivers/dma/stm32-mdma.c
1274 ++++ b/drivers/dma/stm32-mdma.c
1275 +@@ -1452,7 +1452,7 @@ static int stm32_mdma_alloc_chan_resources(struct dma_chan *c)
1276 + return -ENOMEM;
1277 + }
1278 +
1279 +- ret = pm_runtime_get_sync(dmadev->ddev.dev);
1280 ++ ret = pm_runtime_resume_and_get(dmadev->ddev.dev);
1281 + if (ret < 0)
1282 + return ret;
1283 +
1284 +@@ -1718,7 +1718,7 @@ static int stm32_mdma_pm_suspend(struct device *dev)
1285 + u32 ccr, id;
1286 + int ret;
1287 +
1288 +- ret = pm_runtime_get_sync(dev);
1289 ++ ret = pm_runtime_resume_and_get(dev);
1290 + if (ret < 0)
1291 + return ret;
1292 +
1293 +diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c
1294 +index ff7dfb3fdeb47..6c709803203ad 100644
1295 +--- a/drivers/dma/xilinx/xilinx_dpdma.c
1296 ++++ b/drivers/dma/xilinx/xilinx_dpdma.c
1297 +@@ -113,6 +113,7 @@
1298 + #define XILINX_DPDMA_CH_VDO 0x020
1299 + #define XILINX_DPDMA_CH_PYLD_SZ 0x024
1300 + #define XILINX_DPDMA_CH_DESC_ID 0x028
1301 ++#define XILINX_DPDMA_CH_DESC_ID_MASK GENMASK(15, 0)
1302 +
1303 + /* DPDMA descriptor fields */
1304 + #define XILINX_DPDMA_DESC_CONTROL_PREEMBLE 0xa5
1305 +@@ -866,7 +867,8 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan)
1306 + * will be used, but it should be enough.
1307 + */
1308 + list_for_each_entry(sw_desc, &desc->descriptors, node)
1309 +- sw_desc->hw.desc_id = desc->vdesc.tx.cookie;
1310 ++ sw_desc->hw.desc_id = desc->vdesc.tx.cookie
1311 ++ & XILINX_DPDMA_CH_DESC_ID_MASK;
1312 +
1313 + sw_desc = list_first_entry(&desc->descriptors,
1314 + struct xilinx_dpdma_sw_desc, node);
1315 +@@ -1086,7 +1088,8 @@ static void xilinx_dpdma_chan_vsync_irq(struct xilinx_dpdma_chan *chan)
1316 + if (!chan->running || !pending)
1317 + goto out;
1318 +
1319 +- desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID);
1320 ++ desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID)
1321 ++ & XILINX_DPDMA_CH_DESC_ID_MASK;
1322 +
1323 + /* If the retrigger raced with vsync, retry at the next frame. */
1324 + sw_desc = list_first_entry(&pending->descriptors,
1325 +diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c
1326 +index d8419565b92cc..5fecf5aa6e858 100644
1327 +--- a/drivers/dma/xilinx/zynqmp_dma.c
1328 ++++ b/drivers/dma/xilinx/zynqmp_dma.c
1329 +@@ -468,7 +468,7 @@ static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan)
1330 + struct zynqmp_dma_desc_sw *desc;
1331 + int i, ret;
1332 +
1333 +- ret = pm_runtime_get_sync(chan->dev);
1334 ++ ret = pm_runtime_resume_and_get(chan->dev);
1335 + if (ret < 0)
1336 + return ret;
1337 +
1338 +diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
1339 +index 1631727bf0da1..c7b5446d01fd2 100644
1340 +--- a/drivers/gpio/gpiolib-cdev.c
1341 ++++ b/drivers/gpio/gpiolib-cdev.c
1342 +@@ -1880,6 +1880,7 @@ static void gpio_v2_line_info_changed_to_v1(
1343 + struct gpio_v2_line_info_changed *lic_v2,
1344 + struct gpioline_info_changed *lic_v1)
1345 + {
1346 ++ memset(lic_v1, 0, sizeof(*lic_v1));
1347 + gpio_v2_line_info_to_v1(&lic_v2->info, &lic_v1->info);
1348 + lic_v1->timestamp = lic_v2->timestamp_ns;
1349 + lic_v1->event_type = lic_v2->event_type;
1350 +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
1351 +index 47e0b48dc26fd..1c4623d25a62a 100644
1352 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
1353 ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
1354 +@@ -214,9 +214,21 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
1355 + {
1356 + struct drm_gem_object *obj = attach->dmabuf->priv;
1357 + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
1358 ++ int r;
1359 +
1360 + /* pin buffer into GTT */
1361 +- return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
1362 ++ r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
1363 ++ if (r)
1364 ++ return r;
1365 ++
1366 ++ if (bo->tbo.moving) {
1367 ++ r = dma_fence_wait(bo->tbo.moving, true);
1368 ++ if (r) {
1369 ++ amdgpu_bo_unpin(bo);
1370 ++ return r;
1371 ++ }
1372 ++ }
1373 ++ return 0;
1374 + }
1375 +
1376 + /**
1377 +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
1378 +index 72d23651501d4..2342c5d216f9b 100644
1379 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
1380 ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
1381 +@@ -6769,12 +6769,8 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
1382 + if (ring->use_doorbell) {
1383 + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
1384 + (adev->doorbell_index.kiq * 2) << 2);
1385 +- /* If GC has entered CGPG, ringing doorbell > first page doesn't
1386 +- * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround
1387 +- * this issue.
1388 +- */
1389 + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
1390 +- (adev->doorbell.size - 4));
1391 ++ (adev->doorbell_index.userqueue_end * 2) << 2);
1392 + }
1393 +
1394 + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
1395 +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
1396 +index 1fdfb7783404e..d2c020a91c0be 100644
1397 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
1398 ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
1399 +@@ -3623,12 +3623,8 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
1400 + if (ring->use_doorbell) {
1401 + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
1402 + (adev->doorbell_index.kiq * 2) << 2);
1403 +- /* If GC has entered CGPG, ringing doorbell > first page doesn't
1404 +- * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround
1405 +- * this issue.
1406 +- */
1407 + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
1408 +- (adev->doorbell.size - 4));
1409 ++ (adev->doorbell_index.userqueue_end * 2) << 2);
1410 + }
1411 +
1412 + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
1413 +diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c
1414 +index f64e06e1067dd..96ea1a2c11dd6 100644
1415 +--- a/drivers/gpu/drm/kmb/kmb_drv.c
1416 ++++ b/drivers/gpu/drm/kmb/kmb_drv.c
1417 +@@ -137,6 +137,7 @@ static int kmb_hw_init(struct drm_device *drm, unsigned long flags)
1418 + /* Allocate LCD interrupt resources */
1419 + irq_lcd = platform_get_irq(pdev, 0);
1420 + if (irq_lcd < 0) {
1421 ++ ret = irq_lcd;
1422 + drm_err(&kmb->drm, "irq_lcd not found");
1423 + goto setup_fail;
1424 + }
1425 +diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c
1426 +index 347488685f745..60019d0532fcf 100644
1427 +--- a/drivers/gpu/drm/nouveau/nouveau_prime.c
1428 ++++ b/drivers/gpu/drm/nouveau/nouveau_prime.c
1429 +@@ -93,7 +93,22 @@ int nouveau_gem_prime_pin(struct drm_gem_object *obj)
1430 + if (ret)
1431 + return -EINVAL;
1432 +
1433 +- return 0;
1434 ++ ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
1435 ++ if (ret)
1436 ++ goto error;
1437 ++
1438 ++ if (nvbo->bo.moving)
1439 ++ ret = dma_fence_wait(nvbo->bo.moving, true);
1440 ++
1441 ++ ttm_bo_unreserve(&nvbo->bo);
1442 ++ if (ret)
1443 ++ goto error;
1444 ++
1445 ++ return ret;
1446 ++
1447 ++error:
1448 ++ nouveau_bo_unpin(nvbo);
1449 ++ return ret;
1450 + }
1451 +
1452 + void nouveau_gem_prime_unpin(struct drm_gem_object *obj)
1453 +diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
1454 +index 42a87948e28c5..4a90807351e72 100644
1455 +--- a/drivers/gpu/drm/radeon/radeon_prime.c
1456 ++++ b/drivers/gpu/drm/radeon/radeon_prime.c
1457 +@@ -77,9 +77,19 @@ int radeon_gem_prime_pin(struct drm_gem_object *obj)
1458 +
1459 + /* pin buffer into GTT */
1460 + ret = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_GTT, NULL);
1461 +- if (likely(ret == 0))
1462 +- bo->prime_shared_count++;
1463 +-
1464 ++ if (unlikely(ret))
1465 ++ goto error;
1466 ++
1467 ++ if (bo->tbo.moving) {
1468 ++ ret = dma_fence_wait(bo->tbo.moving, false);
1469 ++ if (unlikely(ret)) {
1470 ++ radeon_bo_unpin(bo);
1471 ++ goto error;
1472 ++ }
1473 ++ }
1474 ++
1475 ++ bo->prime_shared_count++;
1476 ++error:
1477 + radeon_bo_unreserve(bo);
1478 + return ret;
1479 + }
1480 +diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
1481 +index 1fda574579afc..8106b5634fe10 100644
1482 +--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
1483 ++++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
1484 +@@ -159,6 +159,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
1485 + struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector);
1486 + bool connected = false;
1487 +
1488 ++ WARN_ON(pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev));
1489 ++
1490 + if (vc4_hdmi->hpd_gpio) {
1491 + if (gpio_get_value_cansleep(vc4_hdmi->hpd_gpio) ^
1492 + vc4_hdmi->hpd_active_low)
1493 +@@ -180,10 +182,12 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
1494 + }
1495 + }
1496 +
1497 ++ pm_runtime_put(&vc4_hdmi->pdev->dev);
1498 + return connector_status_connected;
1499 + }
1500 +
1501 + cec_phys_addr_invalidate(vc4_hdmi->cec_adap);
1502 ++ pm_runtime_put(&vc4_hdmi->pdev->dev);
1503 + return connector_status_disconnected;
1504 + }
1505 +
1506 +@@ -473,7 +477,6 @@ static void vc4_hdmi_encoder_post_crtc_powerdown(struct drm_encoder *encoder,
1507 + HDMI_READ(HDMI_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE);
1508 +
1509 + clk_disable_unprepare(vc4_hdmi->pixel_bvb_clock);
1510 +- clk_disable_unprepare(vc4_hdmi->hsm_clock);
1511 + clk_disable_unprepare(vc4_hdmi->pixel_clock);
1512 +
1513 + ret = pm_runtime_put(&vc4_hdmi->pdev->dev);
1514 +@@ -784,13 +787,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder,
1515 + return;
1516 + }
1517 +
1518 +- ret = clk_prepare_enable(vc4_hdmi->hsm_clock);
1519 +- if (ret) {
1520 +- DRM_ERROR("Failed to turn on HSM clock: %d\n", ret);
1521 +- clk_disable_unprepare(vc4_hdmi->pixel_clock);
1522 +- return;
1523 +- }
1524 +-
1525 + vc4_hdmi_cec_update_clk_div(vc4_hdmi);
1526 +
1527 + /*
1528 +@@ -801,7 +797,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder,
1529 + (hsm_rate > VC4_HSM_MID_CLOCK ? 150000000 : 75000000));
1530 + if (ret) {
1531 + DRM_ERROR("Failed to set pixel bvb clock rate: %d\n", ret);
1532 +- clk_disable_unprepare(vc4_hdmi->hsm_clock);
1533 + clk_disable_unprepare(vc4_hdmi->pixel_clock);
1534 + return;
1535 + }
1536 +@@ -809,7 +804,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder,
1537 + ret = clk_prepare_enable(vc4_hdmi->pixel_bvb_clock);
1538 + if (ret) {
1539 + DRM_ERROR("Failed to turn on pixel bvb clock: %d\n", ret);
1540 +- clk_disable_unprepare(vc4_hdmi->hsm_clock);
1541 + clk_disable_unprepare(vc4_hdmi->pixel_clock);
1542 + return;
1543 + }
1544 +@@ -1929,6 +1923,29 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi)
1545 + return 0;
1546 + }
1547 +
1548 ++#ifdef CONFIG_PM
1549 ++static int vc4_hdmi_runtime_suspend(struct device *dev)
1550 ++{
1551 ++ struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev);
1552 ++
1553 ++ clk_disable_unprepare(vc4_hdmi->hsm_clock);
1554 ++
1555 ++ return 0;
1556 ++}
1557 ++
1558 ++static int vc4_hdmi_runtime_resume(struct device *dev)
1559 ++{
1560 ++ struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev);
1561 ++ int ret;
1562 ++
1563 ++ ret = clk_prepare_enable(vc4_hdmi->hsm_clock);
1564 ++ if (ret)
1565 ++ return ret;
1566 ++
1567 ++ return 0;
1568 ++}
1569 ++#endif
1570 ++
1571 + static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
1572 + {
1573 + const struct vc4_hdmi_variant *variant = of_device_get_match_data(dev);
1574 +@@ -2165,11 +2182,18 @@ static const struct of_device_id vc4_hdmi_dt_match[] = {
1575 + {}
1576 + };
1577 +
1578 ++static const struct dev_pm_ops vc4_hdmi_pm_ops = {
1579 ++ SET_RUNTIME_PM_OPS(vc4_hdmi_runtime_suspend,
1580 ++ vc4_hdmi_runtime_resume,
1581 ++ NULL)
1582 ++};
1583 ++
1584 + struct platform_driver vc4_hdmi_driver = {
1585 + .probe = vc4_hdmi_dev_probe,
1586 + .remove = vc4_hdmi_dev_remove,
1587 + .driver = {
1588 + .name = "vc4_hdmi",
1589 + .of_match_table = vc4_hdmi_dt_match,
1590 ++ .pm = &vc4_hdmi_pm_ops,
1591 + },
1592 + };
1593 +diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
1594 +index f9e1c2ceaac05..04a1e38f2a6f0 100644
1595 +--- a/drivers/i2c/busses/i2c-i801.c
1596 ++++ b/drivers/i2c/busses/i2c-i801.c
1597 +@@ -978,6 +978,9 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
1598 + }
1599 +
1600 + out:
1601 ++ /* Unlock the SMBus device for use by BIOS/ACPI */
1602 ++ outb_p(SMBHSTSTS_INUSE_STS, SMBHSTSTS(priv));
1603 ++
1604 + pm_runtime_mark_last_busy(&priv->pci_dev->dev);
1605 + pm_runtime_put_autosuspend(&priv->pci_dev->dev);
1606 + mutex_unlock(&priv->acpi_lock);
1607 +diff --git a/drivers/i2c/busses/i2c-robotfuzz-osif.c b/drivers/i2c/busses/i2c-robotfuzz-osif.c
1608 +index a39f7d0927973..66dfa211e736b 100644
1609 +--- a/drivers/i2c/busses/i2c-robotfuzz-osif.c
1610 ++++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c
1611 +@@ -83,7 +83,7 @@ static int osif_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs,
1612 + }
1613 + }
1614 +
1615 +- ret = osif_usb_read(adapter, OSIFI2C_STOP, 0, 0, NULL, 0);
1616 ++ ret = osif_usb_write(adapter, OSIFI2C_STOP, 0, 0, NULL, 0);
1617 + if (ret) {
1618 + dev_err(&adapter->dev, "failure sending STOP\n");
1619 + return -EREMOTEIO;
1620 +@@ -153,7 +153,7 @@ static int osif_probe(struct usb_interface *interface,
1621 + * Set bus frequency. The frequency is:
1622 + * 120,000,000 / ( 16 + 2 * div * 4^prescale).
1623 + * Using dev = 52, prescale = 0 give 100KHz */
1624 +- ret = osif_usb_read(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0,
1625 ++ ret = osif_usb_write(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0,
1626 + NULL, 0);
1627 + if (ret) {
1628 + dev_err(&interface->dev, "failure sending bit rate");
1629 +diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
1630 +index 016a6106151a5..3f28eb4d17fe7 100644
1631 +--- a/drivers/mmc/host/meson-gx-mmc.c
1632 ++++ b/drivers/mmc/host/meson-gx-mmc.c
1633 +@@ -165,6 +165,7 @@ struct meson_host {
1634 +
1635 + unsigned int bounce_buf_size;
1636 + void *bounce_buf;
1637 ++ void __iomem *bounce_iomem_buf;
1638 + dma_addr_t bounce_dma_addr;
1639 + struct sd_emmc_desc *descs;
1640 + dma_addr_t descs_dma_addr;
1641 +@@ -745,6 +746,47 @@ static void meson_mmc_desc_chain_transfer(struct mmc_host *mmc, u32 cmd_cfg)
1642 + writel(start, host->regs + SD_EMMC_START);
1643 + }
1644 +
1645 ++/* local sg copy to buffer version with _to/fromio usage for dram_access_quirk */
1646 ++static void meson_mmc_copy_buffer(struct meson_host *host, struct mmc_data *data,
1647 ++ size_t buflen, bool to_buffer)
1648 ++{
1649 ++ unsigned int sg_flags = SG_MITER_ATOMIC;
1650 ++ struct scatterlist *sgl = data->sg;
1651 ++ unsigned int nents = data->sg_len;
1652 ++ struct sg_mapping_iter miter;
1653 ++ unsigned int offset = 0;
1654 ++
1655 ++ if (to_buffer)
1656 ++ sg_flags |= SG_MITER_FROM_SG;
1657 ++ else
1658 ++ sg_flags |= SG_MITER_TO_SG;
1659 ++
1660 ++ sg_miter_start(&miter, sgl, nents, sg_flags);
1661 ++
1662 ++ while ((offset < buflen) && sg_miter_next(&miter)) {
1663 ++ unsigned int len;
1664 ++
1665 ++ len = min(miter.length, buflen - offset);
1666 ++
1667 ++ /* When dram_access_quirk, the bounce buffer is a iomem mapping */
1668 ++ if (host->dram_access_quirk) {
1669 ++ if (to_buffer)
1670 ++ memcpy_toio(host->bounce_iomem_buf + offset, miter.addr, len);
1671 ++ else
1672 ++ memcpy_fromio(miter.addr, host->bounce_iomem_buf + offset, len);
1673 ++ } else {
1674 ++ if (to_buffer)
1675 ++ memcpy(host->bounce_buf + offset, miter.addr, len);
1676 ++ else
1677 ++ memcpy(miter.addr, host->bounce_buf + offset, len);
1678 ++ }
1679 ++
1680 ++ offset += len;
1681 ++ }
1682 ++
1683 ++ sg_miter_stop(&miter);
1684 ++}
1685 ++
1686 + static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
1687 + {
1688 + struct meson_host *host = mmc_priv(mmc);
1689 +@@ -788,8 +830,7 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
1690 + if (data->flags & MMC_DATA_WRITE) {
1691 + cmd_cfg |= CMD_CFG_DATA_WR;
1692 + WARN_ON(xfer_bytes > host->bounce_buf_size);
1693 +- sg_copy_to_buffer(data->sg, data->sg_len,
1694 +- host->bounce_buf, xfer_bytes);
1695 ++ meson_mmc_copy_buffer(host, data, xfer_bytes, true);
1696 + dma_wmb();
1697 + }
1698 +
1699 +@@ -958,8 +999,7 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id)
1700 + if (meson_mmc_bounce_buf_read(data)) {
1701 + xfer_bytes = data->blksz * data->blocks;
1702 + WARN_ON(xfer_bytes > host->bounce_buf_size);
1703 +- sg_copy_from_buffer(data->sg, data->sg_len,
1704 +- host->bounce_buf, xfer_bytes);
1705 ++ meson_mmc_copy_buffer(host, data, xfer_bytes, false);
1706 + }
1707 +
1708 + next_cmd = meson_mmc_get_next_command(cmd);
1709 +@@ -1179,7 +1219,7 @@ static int meson_mmc_probe(struct platform_device *pdev)
1710 + * instead of the DDR memory
1711 + */
1712 + host->bounce_buf_size = SD_EMMC_SRAM_DATA_BUF_LEN;
1713 +- host->bounce_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF;
1714 ++ host->bounce_iomem_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF;
1715 + host->bounce_dma_addr = res->start + SD_EMMC_SRAM_DATA_BUF_OFF;
1716 + } else {
1717 + /* data bounce buffer */
1718 +diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
1719 +index 9f30748da4ab9..8c38f224becbc 100644
1720 +--- a/drivers/net/caif/caif_serial.c
1721 ++++ b/drivers/net/caif/caif_serial.c
1722 +@@ -350,6 +350,7 @@ static int ldisc_open(struct tty_struct *tty)
1723 + rtnl_lock();
1724 + result = register_netdevice(dev);
1725 + if (result) {
1726 ++ tty_kref_put(tty);
1727 + rtnl_unlock();
1728 + free_netdev(dev);
1729 + return -ENODEV;
1730 +diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
1731 +index 17d5b649eb36b..e81dd34a3cac2 100644
1732 +--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
1733 ++++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
1734 +@@ -1266,9 +1266,11 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn,
1735 + p_hwfn->p_dcbx_info->set.ver_num |= DCBX_CONFIG_VERSION_STATIC;
1736 +
1737 + p_hwfn->p_dcbx_info->set.enabled = dcbx_info->operational.enabled;
1738 ++ BUILD_BUG_ON(sizeof(dcbx_info->operational.params) !=
1739 ++ sizeof(p_hwfn->p_dcbx_info->set.config.params));
1740 + memcpy(&p_hwfn->p_dcbx_info->set.config.params,
1741 + &dcbx_info->operational.params,
1742 +- sizeof(struct qed_dcbx_admin_params));
1743 ++ sizeof(p_hwfn->p_dcbx_info->set.config.params));
1744 + p_hwfn->p_dcbx_info->set.config.valid = true;
1745 +
1746 + memcpy(params, &p_hwfn->p_dcbx_info->set, sizeof(struct qed_dcbx_set));
1747 +diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
1748 +index 1df2c002c9f64..f7a56e05ec8a4 100644
1749 +--- a/drivers/net/ethernet/realtek/r8169_main.c
1750 ++++ b/drivers/net/ethernet/realtek/r8169_main.c
1751 +@@ -1673,7 +1673,7 @@ static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data)
1752 + {
1753 + switch(stringset) {
1754 + case ETH_SS_STATS:
1755 +- memcpy(data, *rtl8169_gstrings, sizeof(rtl8169_gstrings));
1756 ++ memcpy(data, rtl8169_gstrings, sizeof(rtl8169_gstrings));
1757 + break;
1758 + }
1759 + }
1760 +diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
1761 +index f029c7c03804f..393cf99856ed3 100644
1762 +--- a/drivers/net/ethernet/renesas/sh_eth.c
1763 ++++ b/drivers/net/ethernet/renesas/sh_eth.c
1764 +@@ -2287,7 +2287,7 @@ static void sh_eth_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
1765 + {
1766 + switch (stringset) {
1767 + case ETH_SS_STATS:
1768 +- memcpy(data, *sh_eth_gstrings_stats,
1769 ++ memcpy(data, sh_eth_gstrings_stats,
1770 + sizeof(sh_eth_gstrings_stats));
1771 + break;
1772 + }
1773 +diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
1774 +index 01bb36e7cff0a..6bd3a389d389c 100644
1775 +--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
1776 ++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
1777 +@@ -774,12 +774,15 @@ static void temac_start_xmit_done(struct net_device *ndev)
1778 + stat = be32_to_cpu(cur_p->app0);
1779 +
1780 + while (stat & STS_CTRL_APP0_CMPLT) {
1781 ++ /* Make sure that the other fields are read after bd is
1782 ++ * released by dma
1783 ++ */
1784 ++ rmb();
1785 + dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys),
1786 + be32_to_cpu(cur_p->len), DMA_TO_DEVICE);
1787 + skb = (struct sk_buff *)ptr_from_txbd(cur_p);
1788 + if (skb)
1789 + dev_consume_skb_irq(skb);
1790 +- cur_p->app0 = 0;
1791 + cur_p->app1 = 0;
1792 + cur_p->app2 = 0;
1793 + cur_p->app3 = 0;
1794 +@@ -788,6 +791,12 @@ static void temac_start_xmit_done(struct net_device *ndev)
1795 + ndev->stats.tx_packets++;
1796 + ndev->stats.tx_bytes += be32_to_cpu(cur_p->len);
1797 +
1798 ++ /* app0 must be visible last, as it is used to flag
1799 ++ * availability of the bd
1800 ++ */
1801 ++ smp_mb();
1802 ++ cur_p->app0 = 0;
1803 ++
1804 + lp->tx_bd_ci++;
1805 + if (lp->tx_bd_ci >= lp->tx_bd_num)
1806 + lp->tx_bd_ci = 0;
1807 +@@ -814,6 +823,9 @@ static inline int temac_check_tx_bd_space(struct temac_local *lp, int num_frag)
1808 + if (cur_p->app0)
1809 + return NETDEV_TX_BUSY;
1810 +
1811 ++ /* Make sure to read next bd app0 after this one */
1812 ++ rmb();
1813 ++
1814 + tail++;
1815 + if (tail >= lp->tx_bd_num)
1816 + tail = 0;
1817 +@@ -930,6 +942,11 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
1818 + wmb();
1819 + lp->dma_out(lp, TX_TAILDESC_PTR, tail_p); /* DMA start */
1820 +
1821 ++ if (temac_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) {
1822 ++ netdev_info(ndev, "%s -> netif_stop_queue\n", __func__);
1823 ++ netif_stop_queue(ndev);
1824 ++ }
1825 ++
1826 + return NETDEV_TX_OK;
1827 + }
1828 +
1829 +diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
1830 +index 9bd9a5c0b1db3..6bbc81ad295fb 100644
1831 +--- a/drivers/net/phy/dp83867.c
1832 ++++ b/drivers/net/phy/dp83867.c
1833 +@@ -826,16 +826,12 @@ static int dp83867_phy_reset(struct phy_device *phydev)
1834 + {
1835 + int err;
1836 +
1837 +- err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESET);
1838 ++ err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESTART);
1839 + if (err < 0)
1840 + return err;
1841 +
1842 + usleep_range(10, 20);
1843 +
1844 +- /* After reset FORCE_LINK_GOOD bit is set. Although the
1845 +- * default value should be unset. Disable FORCE_LINK_GOOD
1846 +- * for the phy to work properly.
1847 +- */
1848 + return phy_modify(phydev, MII_DP83867_PHYCTRL,
1849 + DP83867_PHYCR_FORCE_LINK_GOOD, 0);
1850 + }
1851 +diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
1852 +index 6700f1970b240..bc55ec739af90 100644
1853 +--- a/drivers/net/usb/qmi_wwan.c
1854 ++++ b/drivers/net/usb/qmi_wwan.c
1855 +@@ -575,7 +575,7 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
1856 +
1857 + if (info->flags & QMI_WWAN_FLAG_PASS_THROUGH) {
1858 + skb->protocol = htons(ETH_P_MAP);
1859 +- return (netif_rx(skb) == NET_RX_SUCCESS);
1860 ++ return 1;
1861 + }
1862 +
1863 + switch (skb->data[0] & 0xf0) {
1864 +diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
1865 +index 20fb5638ac653..23fae943a1192 100644
1866 +--- a/drivers/net/usb/r8152.c
1867 ++++ b/drivers/net/usb/r8152.c
1868 +@@ -6078,7 +6078,7 @@ static void rtl8152_get_strings(struct net_device *dev, u32 stringset, u8 *data)
1869 + {
1870 + switch (stringset) {
1871 + case ETH_SS_STATS:
1872 +- memcpy(data, *rtl8152_gstrings, sizeof(rtl8152_gstrings));
1873 ++ memcpy(data, rtl8152_gstrings, sizeof(rtl8152_gstrings));
1874 + break;
1875 + }
1876 + }
1877 +diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
1878 +index fa7d4c20dc13a..30b39cb4056a3 100644
1879 +--- a/drivers/net/wireless/mac80211_hwsim.c
1880 ++++ b/drivers/net/wireless/mac80211_hwsim.c
1881 +@@ -1693,8 +1693,13 @@ static int mac80211_hwsim_start(struct ieee80211_hw *hw)
1882 + static void mac80211_hwsim_stop(struct ieee80211_hw *hw)
1883 + {
1884 + struct mac80211_hwsim_data *data = hw->priv;
1885 ++
1886 + data->started = false;
1887 + hrtimer_cancel(&data->beacon_timer);
1888 ++
1889 ++ while (!skb_queue_empty(&data->pending))
1890 ++ ieee80211_free_txskb(hw, skb_dequeue(&data->pending));
1891 ++
1892 + wiphy_dbg(hw->wiphy, "%s\n", __func__);
1893 + }
1894 +
1895 +diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
1896 +index e4d4e399004b4..16a17215f633d 100644
1897 +--- a/drivers/pci/pci.c
1898 ++++ b/drivers/pci/pci.c
1899 +@@ -1870,11 +1870,21 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
1900 + int err;
1901 + int i, bars = 0;
1902 +
1903 +- if (atomic_inc_return(&dev->enable_cnt) > 1) {
1904 +- pci_update_current_state(dev, dev->current_state);
1905 +- return 0; /* already enabled */
1906 ++ /*
1907 ++ * Power state could be unknown at this point, either due to a fresh
1908 ++ * boot or a device removal call. So get the current power state
1909 ++ * so that things like MSI message writing will behave as expected
1910 ++ * (e.g. if the device really is in D0 at enable time).
1911 ++ */
1912 ++ if (dev->pm_cap) {
1913 ++ u16 pmcsr;
1914 ++ pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
1915 ++ dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
1916 + }
1917 +
1918 ++ if (atomic_inc_return(&dev->enable_cnt) > 1)
1919 ++ return 0; /* already enabled */
1920 ++
1921 + bridge = pci_upstream_bridge(dev);
1922 + if (bridge)
1923 + pci_enable_bridge(bridge);
1924 +diff --git a/drivers/pinctrl/pinctrl-microchip-sgpio.c b/drivers/pinctrl/pinctrl-microchip-sgpio.c
1925 +index c12fa57ebd12c..165cb7a597155 100644
1926 +--- a/drivers/pinctrl/pinctrl-microchip-sgpio.c
1927 ++++ b/drivers/pinctrl/pinctrl-microchip-sgpio.c
1928 +@@ -845,8 +845,10 @@ static int microchip_sgpio_probe(struct platform_device *pdev)
1929 + i = 0;
1930 + device_for_each_child_node(dev, fwnode) {
1931 + ret = microchip_sgpio_register_bank(dev, priv, fwnode, i++);
1932 +- if (ret)
1933 ++ if (ret) {
1934 ++ fwnode_handle_put(fwnode);
1935 + return ret;
1936 ++ }
1937 + }
1938 +
1939 + if (priv->in.gpio.ngpio != priv->out.gpio.ngpio) {
1940 +diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
1941 +index 7d9bdedcd71bb..3af4430543dca 100644
1942 +--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
1943 ++++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
1944 +@@ -1229,7 +1229,7 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl,
1945 + struct device *dev = pctl->dev;
1946 + struct resource res;
1947 + int npins = STM32_GPIO_PINS_PER_BANK;
1948 +- int bank_nr, err;
1949 ++ int bank_nr, err, i = 0;
1950 +
1951 + if (!IS_ERR(bank->rstc))
1952 + reset_control_deassert(bank->rstc);
1953 +@@ -1251,9 +1251,14 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl,
1954 +
1955 + of_property_read_string(np, "st,bank-name", &bank->gpio_chip.label);
1956 +
1957 +- if (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args)) {
1958 ++ if (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, i, &args)) {
1959 + bank_nr = args.args[1] / STM32_GPIO_PINS_PER_BANK;
1960 + bank->gpio_chip.base = args.args[1];
1961 ++
1962 ++ npins = args.args[2];
1963 ++ while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3,
1964 ++ ++i, &args))
1965 ++ npins += args.args[2];
1966 + } else {
1967 + bank_nr = pctl->nbanks;
1968 + bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK;
1969 +diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
1970 +index ed0b1bb99f083..a0356f3707b86 100644
1971 +--- a/drivers/scsi/sd.c
1972 ++++ b/drivers/scsi/sd.c
1973 +@@ -1387,6 +1387,22 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
1974 + }
1975 + }
1976 +
1977 ++static bool sd_need_revalidate(struct block_device *bdev,
1978 ++ struct scsi_disk *sdkp)
1979 ++{
1980 ++ if (sdkp->device->removable || sdkp->write_prot) {
1981 ++ if (bdev_check_media_change(bdev))
1982 ++ return true;
1983 ++ }
1984 ++
1985 ++ /*
1986 ++ * Force a full rescan after ioctl(BLKRRPART). While the disk state has
1987 ++ * nothing to do with partitions, BLKRRPART is used to force a full
1988 ++ * revalidate after things like a format for historical reasons.
1989 ++ */
1990 ++ return test_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
1991 ++}
1992 ++
1993 + /**
1994 + * sd_open - open a scsi disk device
1995 + * @bdev: Block device of the scsi disk to open
1996 +@@ -1423,10 +1439,8 @@ static int sd_open(struct block_device *bdev, fmode_t mode)
1997 + if (!scsi_block_when_processing_errors(sdev))
1998 + goto error_out;
1999 +
2000 +- if (sdev->removable || sdkp->write_prot) {
2001 +- if (bdev_check_media_change(bdev))
2002 +- sd_revalidate_disk(bdev->bd_disk);
2003 +- }
2004 ++ if (sd_need_revalidate(bdev, sdkp))
2005 ++ sd_revalidate_disk(bdev->bd_disk);
2006 +
2007 + /*
2008 + * If the drive is empty, just let the open fail.
2009 +diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c
2010 +index ab9035662717a..bcc0b5a3a459c 100644
2011 +--- a/drivers/spi/spi-nxp-fspi.c
2012 ++++ b/drivers/spi/spi-nxp-fspi.c
2013 +@@ -1033,12 +1033,6 @@ static int nxp_fspi_probe(struct platform_device *pdev)
2014 + goto err_put_ctrl;
2015 + }
2016 +
2017 +- /* Clear potential interrupts */
2018 +- reg = fspi_readl(f, f->iobase + FSPI_INTR);
2019 +- if (reg)
2020 +- fspi_writel(f, reg, f->iobase + FSPI_INTR);
2021 +-
2022 +-
2023 + /* find the resources - controller memory mapped space */
2024 + if (is_acpi_node(f->dev->fwnode))
2025 + res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
2026 +@@ -1076,6 +1070,11 @@ static int nxp_fspi_probe(struct platform_device *pdev)
2027 + }
2028 + }
2029 +
2030 ++ /* Clear potential interrupts */
2031 ++ reg = fspi_readl(f, f->iobase + FSPI_INTR);
2032 ++ if (reg)
2033 ++ fspi_writel(f, reg, f->iobase + FSPI_INTR);
2034 ++
2035 + /* find the irq */
2036 + ret = platform_get_irq(pdev, 0);
2037 + if (ret < 0)
2038 +diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
2039 +index 7bbfd58958bcc..d7e361fb05482 100644
2040 +--- a/drivers/xen/events/events_base.c
2041 ++++ b/drivers/xen/events/events_base.c
2042 +@@ -642,6 +642,9 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
2043 + }
2044 +
2045 + info->eoi_time = 0;
2046 ++
2047 ++ /* is_active hasn't been reset yet, do it now. */
2048 ++ smp_store_release(&info->is_active, 0);
2049 + do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
2050 + }
2051 +
2052 +@@ -811,6 +814,7 @@ static void xen_evtchn_close(evtchn_port_t port)
2053 + BUG();
2054 + }
2055 +
2056 ++/* Not called for lateeoi events. */
2057 + static void event_handler_exit(struct irq_info *info)
2058 + {
2059 + smp_store_release(&info->is_active, 0);
2060 +@@ -1883,7 +1887,12 @@ static void lateeoi_ack_dynirq(struct irq_data *data)
2061 +
2062 + if (VALID_EVTCHN(evtchn)) {
2063 + do_mask(info, EVT_MASK_REASON_EOI_PENDING);
2064 +- event_handler_exit(info);
2065 ++ /*
2066 ++ * Don't call event_handler_exit().
2067 ++ * Need to keep is_active non-zero in order to ignore re-raised
2068 ++ * events after cpu affinity changes while a lateeoi is pending.
2069 ++ */
2070 ++ clear_evtchn(evtchn);
2071 + }
2072 + }
2073 +
2074 +diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
2075 +index 26e66436f0058..c000fe338f7e0 100644
2076 +--- a/fs/ceph/addr.c
2077 ++++ b/fs/ceph/addr.c
2078 +@@ -1302,6 +1302,45 @@ ceph_find_incompatible(struct page *page)
2079 + return NULL;
2080 + }
2081 +
2082 ++/**
2083 ++ * prep_noread_page - prep a page for writing without reading first
2084 ++ * @page: page being prepared
2085 ++ * @pos: starting position for the write
2086 ++ * @len: length of write
2087 ++ *
2088 ++ * In some cases, write_begin doesn't need to read at all:
2089 ++ * - full page write
2090 ++ * - file is currently zero-length
2091 ++ * - write that lies in a page that is completely beyond EOF
2092 ++ * - write that covers the the page from start to EOF or beyond it
2093 ++ *
2094 ++ * If any of these criteria are met, then zero out the unwritten parts
2095 ++ * of the page and return true. Otherwise, return false.
2096 ++ */
2097 ++static bool skip_page_read(struct page *page, loff_t pos, size_t len)
2098 ++{
2099 ++ struct inode *inode = page->mapping->host;
2100 ++ loff_t i_size = i_size_read(inode);
2101 ++ size_t offset = offset_in_page(pos);
2102 ++
2103 ++ /* Full page write */
2104 ++ if (offset == 0 && len >= PAGE_SIZE)
2105 ++ return true;
2106 ++
2107 ++ /* pos beyond last page in the file */
2108 ++ if (pos - offset >= i_size)
2109 ++ goto zero_out;
2110 ++
2111 ++ /* write that covers the whole page from start to EOF or beyond it */
2112 ++ if (offset == 0 && (pos + len) >= i_size)
2113 ++ goto zero_out;
2114 ++
2115 ++ return false;
2116 ++zero_out:
2117 ++ zero_user_segments(page, 0, offset, offset + len, PAGE_SIZE);
2118 ++ return true;
2119 ++}
2120 ++
2121 + /*
2122 + * We are only allowed to write into/dirty the page if the page is
2123 + * clean, or already dirty within the same snap context.
2124 +@@ -1315,7 +1354,6 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
2125 + struct ceph_snap_context *snapc;
2126 + struct page *page = NULL;
2127 + pgoff_t index = pos >> PAGE_SHIFT;
2128 +- int pos_in_page = pos & ~PAGE_MASK;
2129 + int r = 0;
2130 +
2131 + dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
2132 +@@ -1350,19 +1388,9 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
2133 + break;
2134 + }
2135 +
2136 +- /*
2137 +- * In some cases we don't need to read at all:
2138 +- * - full page write
2139 +- * - write that lies completely beyond EOF
2140 +- * - write that covers the the page from start to EOF or beyond it
2141 +- */
2142 +- if ((pos_in_page == 0 && len == PAGE_SIZE) ||
2143 +- (pos >= i_size_read(inode)) ||
2144 +- (pos_in_page == 0 && (pos + len) >= i_size_read(inode))) {
2145 +- zero_user_segments(page, 0, pos_in_page,
2146 +- pos_in_page + len, PAGE_SIZE);
2147 ++ /* No need to read in some cases */
2148 ++ if (skip_page_read(page, pos, len))
2149 + break;
2150 +- }
2151 +
2152 + /*
2153 + * We need to read it. If we get back -EINPROGRESS, then the page was
2154 +diff --git a/fs/ceph/file.c b/fs/ceph/file.c
2155 +index 209535d5b8d38..3d2e3dd4ee01d 100644
2156 +--- a/fs/ceph/file.c
2157 ++++ b/fs/ceph/file.c
2158 +@@ -578,6 +578,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
2159 + struct ceph_inode_info *ci = ceph_inode(dir);
2160 + struct inode *inode;
2161 + struct timespec64 now;
2162 ++ struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
2163 + struct ceph_vino vino = { .ino = req->r_deleg_ino,
2164 + .snap = CEPH_NOSNAP };
2165 +
2166 +@@ -615,8 +616,10 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
2167 +
2168 + ceph_file_layout_to_legacy(lo, &in.layout);
2169 +
2170 ++ down_read(&mdsc->snap_rwsem);
2171 + ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session,
2172 + req->r_fmode, NULL);
2173 ++ up_read(&mdsc->snap_rwsem);
2174 + if (ret) {
2175 + dout("%s failed to fill inode: %d\n", __func__, ret);
2176 + ceph_dir_clear_complete(dir);
2177 +diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
2178 +index 179d2ef69a24a..7ee6023adb363 100644
2179 +--- a/fs/ceph/inode.c
2180 ++++ b/fs/ceph/inode.c
2181 +@@ -762,6 +762,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
2182 + bool new_version = false;
2183 + bool fill_inline = false;
2184 +
2185 ++ lockdep_assert_held(&mdsc->snap_rwsem);
2186 ++
2187 + dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__,
2188 + inode, ceph_vinop(inode), le64_to_cpu(info->version),
2189 + ci->i_version);
2190 +diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
2191 +index 303d71430bdd1..9c6c0e2e5880a 100644
2192 +--- a/fs/nilfs2/sysfs.c
2193 ++++ b/fs/nilfs2/sysfs.c
2194 +@@ -1053,6 +1053,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
2195 + nilfs_sysfs_delete_superblock_group(nilfs);
2196 + nilfs_sysfs_delete_segctor_group(nilfs);
2197 + kobject_del(&nilfs->ns_dev_kobj);
2198 ++ kobject_put(&nilfs->ns_dev_kobj);
2199 + kfree(nilfs->ns_dev_subgroups);
2200 + }
2201 +
2202 +diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
2203 +index fb8b07daa9d15..875e002a41804 100644
2204 +--- a/include/keys/system_keyring.h
2205 ++++ b/include/keys/system_keyring.h
2206 +@@ -31,6 +31,7 @@ extern int restrict_link_by_builtin_and_secondary_trusted(
2207 + #define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted
2208 + #endif
2209 +
2210 ++extern struct pkcs7_message *pkcs7;
2211 + #ifdef CONFIG_SYSTEM_BLACKLIST_KEYRING
2212 + extern int mark_hash_blacklisted(const char *hash);
2213 + extern int is_hash_blacklisted(const u8 *hash, size_t hash_len,
2214 +@@ -49,6 +50,20 @@ static inline int is_binary_blacklisted(const u8 *hash, size_t hash_len)
2215 + }
2216 + #endif
2217 +
2218 ++#ifdef CONFIG_SYSTEM_REVOCATION_LIST
2219 ++extern int add_key_to_revocation_list(const char *data, size_t size);
2220 ++extern int is_key_on_revocation_list(struct pkcs7_message *pkcs7);
2221 ++#else
2222 ++static inline int add_key_to_revocation_list(const char *data, size_t size)
2223 ++{
2224 ++ return 0;
2225 ++}
2226 ++static inline int is_key_on_revocation_list(struct pkcs7_message *pkcs7)
2227 ++{
2228 ++ return -ENOKEY;
2229 ++}
2230 ++#endif
2231 ++
2232 + #ifdef CONFIG_IMA_BLACKLIST_KEYRING
2233 + extern struct key *ima_blacklist_keyring;
2234 +
2235 +diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
2236 +index 2915f56ad4214..edb5c186b0b7a 100644
2237 +--- a/include/linux/debug_locks.h
2238 ++++ b/include/linux/debug_locks.h
2239 +@@ -27,8 +27,10 @@ extern int debug_locks_off(void);
2240 + int __ret = 0; \
2241 + \
2242 + if (!oops_in_progress && unlikely(c)) { \
2243 ++ instrumentation_begin(); \
2244 + if (debug_locks_off() && !debug_locks_silent) \
2245 + WARN(1, "DEBUG_LOCKS_WARN_ON(%s)", #c); \
2246 ++ instrumentation_end(); \
2247 + __ret = 1; \
2248 + } \
2249 + __ret; \
2250 +diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
2251 +index ba973efcd3692..6686a0baa91d3 100644
2252 +--- a/include/linux/huge_mm.h
2253 ++++ b/include/linux/huge_mm.h
2254 +@@ -289,6 +289,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
2255 + vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
2256 +
2257 + extern struct page *huge_zero_page;
2258 ++extern unsigned long huge_zero_pfn;
2259 +
2260 + static inline bool is_huge_zero_page(struct page *page)
2261 + {
2262 +@@ -297,7 +298,7 @@ static inline bool is_huge_zero_page(struct page *page)
2263 +
2264 + static inline bool is_huge_zero_pmd(pmd_t pmd)
2265 + {
2266 +- return is_huge_zero_page(pmd_page(pmd));
2267 ++ return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd);
2268 + }
2269 +
2270 + static inline bool is_huge_zero_pud(pud_t pud)
2271 +@@ -443,6 +444,11 @@ static inline bool is_huge_zero_page(struct page *page)
2272 + return false;
2273 + }
2274 +
2275 ++static inline bool is_huge_zero_pmd(pmd_t pmd)
2276 ++{
2277 ++ return false;
2278 ++}
2279 ++
2280 + static inline bool is_huge_zero_pud(pud_t pud)
2281 + {
2282 + return false;
2283 +diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
2284 +index 5dae4187210d9..28fa3f9bbbfdd 100644
2285 +--- a/include/linux/hugetlb.h
2286 ++++ b/include/linux/hugetlb.h
2287 +@@ -728,17 +728,6 @@ static inline int hstate_index(struct hstate *h)
2288 + return h - hstates;
2289 + }
2290 +
2291 +-pgoff_t __basepage_index(struct page *page);
2292 +-
2293 +-/* Return page->index in PAGE_SIZE units */
2294 +-static inline pgoff_t basepage_index(struct page *page)
2295 +-{
2296 +- if (!PageCompound(page))
2297 +- return page->index;
2298 +-
2299 +- return __basepage_index(page);
2300 +-}
2301 +-
2302 + extern int dissolve_free_huge_page(struct page *page);
2303 + extern int dissolve_free_huge_pages(unsigned long start_pfn,
2304 + unsigned long end_pfn);
2305 +@@ -969,11 +958,6 @@ static inline int hstate_index(struct hstate *h)
2306 + return 0;
2307 + }
2308 +
2309 +-static inline pgoff_t basepage_index(struct page *page)
2310 +-{
2311 +- return page->index;
2312 +-}
2313 +-
2314 + static inline int dissolve_free_huge_page(struct page *page)
2315 + {
2316 + return 0;
2317 +diff --git a/include/linux/mm.h b/include/linux/mm.h
2318 +index 6c1b29bb35636..cfb0842a7fb96 100644
2319 +--- a/include/linux/mm.h
2320 ++++ b/include/linux/mm.h
2321 +@@ -1680,6 +1680,7 @@ struct zap_details {
2322 + struct address_space *check_mapping; /* Check page->mapping if set */
2323 + pgoff_t first_index; /* Lowest page->index to unmap */
2324 + pgoff_t last_index; /* Highest page->index to unmap */
2325 ++ struct page *single_page; /* Locked page to be unmapped */
2326 + };
2327 +
2328 + struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
2329 +@@ -1727,6 +1728,7 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
2330 + extern int fixup_user_fault(struct mm_struct *mm,
2331 + unsigned long address, unsigned int fault_flags,
2332 + bool *unlocked);
2333 ++void unmap_mapping_page(struct page *page);
2334 + void unmap_mapping_pages(struct address_space *mapping,
2335 + pgoff_t start, pgoff_t nr, bool even_cows);
2336 + void unmap_mapping_range(struct address_space *mapping,
2337 +@@ -1747,6 +1749,7 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address,
2338 + BUG();
2339 + return -EFAULT;
2340 + }
2341 ++static inline void unmap_mapping_page(struct page *page) { }
2342 + static inline void unmap_mapping_pages(struct address_space *mapping,
2343 + pgoff_t start, pgoff_t nr, bool even_cows) { }
2344 + static inline void unmap_mapping_range(struct address_space *mapping,
2345 +diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
2346 +index 8c9947fd62f30..e0023e5f9aa67 100644
2347 +--- a/include/linux/pagemap.h
2348 ++++ b/include/linux/pagemap.h
2349 +@@ -501,7 +501,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
2350 + }
2351 +
2352 + /*
2353 +- * Get index of the page with in radix-tree
2354 ++ * Get index of the page within radix-tree (but not for hugetlb pages).
2355 + * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
2356 + */
2357 + static inline pgoff_t page_to_index(struct page *page)
2358 +@@ -520,15 +520,16 @@ static inline pgoff_t page_to_index(struct page *page)
2359 + return pgoff;
2360 + }
2361 +
2362 ++extern pgoff_t hugetlb_basepage_index(struct page *page);
2363 ++
2364 + /*
2365 +- * Get the offset in PAGE_SIZE.
2366 +- * (TODO: hugepage should have ->index in PAGE_SIZE)
2367 ++ * Get the offset in PAGE_SIZE (even for hugetlb pages).
2368 ++ * (TODO: hugetlb pages should have ->index in PAGE_SIZE)
2369 + */
2370 + static inline pgoff_t page_to_pgoff(struct page *page)
2371 + {
2372 +- if (unlikely(PageHeadHuge(page)))
2373 +- return page->index << compound_order(page);
2374 +-
2375 ++ if (unlikely(PageHuge(page)))
2376 ++ return hugetlb_basepage_index(page);
2377 + return page_to_index(page);
2378 + }
2379 +
2380 +diff --git a/include/linux/rmap.h b/include/linux/rmap.h
2381 +index def5c62c93b3b..8d04e7deedc66 100644
2382 +--- a/include/linux/rmap.h
2383 ++++ b/include/linux/rmap.h
2384 +@@ -91,6 +91,7 @@ enum ttu_flags {
2385 +
2386 + TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */
2387 + TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */
2388 ++ TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */
2389 + TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */
2390 + TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible
2391 + * and caller guarantees they will
2392 +diff --git a/include/net/sock.h b/include/net/sock.h
2393 +index 62e3811e95a78..b9bdeca1d784f 100644
2394 +--- a/include/net/sock.h
2395 ++++ b/include/net/sock.h
2396 +@@ -1928,7 +1928,8 @@ static inline u32 net_tx_rndhash(void)
2397 +
2398 + static inline void sk_set_txhash(struct sock *sk)
2399 + {
2400 +- sk->sk_txhash = net_tx_rndhash();
2401 ++ /* This pairs with READ_ONCE() in skb_set_hash_from_sk() */
2402 ++ WRITE_ONCE(sk->sk_txhash, net_tx_rndhash());
2403 + }
2404 +
2405 + static inline bool sk_rethink_txhash(struct sock *sk)
2406 +@@ -2200,9 +2201,12 @@ static inline void sock_poll_wait(struct file *filp, struct socket *sock,
2407 +
2408 + static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
2409 + {
2410 +- if (sk->sk_txhash) {
2411 ++ /* This pairs with WRITE_ONCE() in sk_set_txhash() */
2412 ++ u32 txhash = READ_ONCE(sk->sk_txhash);
2413 ++
2414 ++ if (txhash) {
2415 + skb->l4_hash = 1;
2416 +- skb->hash = sk->sk_txhash;
2417 ++ skb->hash = txhash;
2418 + }
2419 + }
2420 +
2421 +@@ -2260,8 +2264,13 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk);
2422 + static inline int sock_error(struct sock *sk)
2423 + {
2424 + int err;
2425 +- if (likely(!sk->sk_err))
2426 ++
2427 ++ /* Avoid an atomic operation for the common case.
2428 ++ * This is racy since another cpu/thread can change sk_err under us.
2429 ++ */
2430 ++ if (likely(data_race(!sk->sk_err)))
2431 + return 0;
2432 ++
2433 + err = xchg(&sk->sk_err, 0);
2434 + return -err;
2435 + }
2436 +diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
2437 +index fe4c01c14ab2c..e96f3808e4316 100644
2438 +--- a/kernel/dma/swiotlb.c
2439 ++++ b/kernel/dma/swiotlb.c
2440 +@@ -724,11 +724,17 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
2441 + int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
2442 + size_t orig_size = io_tlb_orig_size[index];
2443 + phys_addr_t orig_addr = io_tlb_orig_addr[index];
2444 ++ unsigned int tlb_offset;
2445 +
2446 + if (orig_addr == INVALID_PHYS_ADDR)
2447 + return;
2448 +
2449 +- validate_sync_size_and_truncate(hwdev, orig_size, &size);
2450 ++ tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) -
2451 ++ swiotlb_align_offset(hwdev, orig_addr);
2452 ++
2453 ++ orig_addr += tlb_offset;
2454 ++
2455 ++ validate_sync_size_and_truncate(hwdev, orig_size - tlb_offset, &size);
2456 +
2457 + switch (target) {
2458 + case SYNC_FOR_CPU:
2459 +diff --git a/kernel/futex.c b/kernel/futex.c
2460 +index a8629b695d38e..5aa6d0a6c7677 100644
2461 +--- a/kernel/futex.c
2462 ++++ b/kernel/futex.c
2463 +@@ -35,7 +35,6 @@
2464 + #include <linux/jhash.h>
2465 + #include <linux/pagemap.h>
2466 + #include <linux/syscalls.h>
2467 +-#include <linux/hugetlb.h>
2468 + #include <linux/freezer.h>
2469 + #include <linux/memblock.h>
2470 + #include <linux/fault-inject.h>
2471 +@@ -650,7 +649,7 @@ again:
2472 +
2473 + key->both.offset |= FUT_OFF_INODE; /* inode-based key */
2474 + key->shared.i_seq = get_inode_sequence_number(inode);
2475 +- key->shared.pgoff = basepage_index(tail);
2476 ++ key->shared.pgoff = page_to_pgoff(tail);
2477 + rcu_read_unlock();
2478 + }
2479 +
2480 +diff --git a/kernel/kthread.c b/kernel/kthread.c
2481 +index 6d3c488a0f824..4fdf2bd9b5589 100644
2482 +--- a/kernel/kthread.c
2483 ++++ b/kernel/kthread.c
2484 +@@ -1092,8 +1092,38 @@ void kthread_flush_work(struct kthread_work *work)
2485 + EXPORT_SYMBOL_GPL(kthread_flush_work);
2486 +
2487 + /*
2488 +- * This function removes the work from the worker queue. Also it makes sure
2489 +- * that it won't get queued later via the delayed work's timer.
2490 ++ * Make sure that the timer is neither set nor running and could
2491 ++ * not manipulate the work list_head any longer.
2492 ++ *
2493 ++ * The function is called under worker->lock. The lock is temporary
2494 ++ * released but the timer can't be set again in the meantime.
2495 ++ */
2496 ++static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
2497 ++ unsigned long *flags)
2498 ++{
2499 ++ struct kthread_delayed_work *dwork =
2500 ++ container_of(work, struct kthread_delayed_work, work);
2501 ++ struct kthread_worker *worker = work->worker;
2502 ++
2503 ++ /*
2504 ++ * del_timer_sync() must be called to make sure that the timer
2505 ++ * callback is not running. The lock must be temporary released
2506 ++ * to avoid a deadlock with the callback. In the meantime,
2507 ++ * any queuing is blocked by setting the canceling counter.
2508 ++ */
2509 ++ work->canceling++;
2510 ++ raw_spin_unlock_irqrestore(&worker->lock, *flags);
2511 ++ del_timer_sync(&dwork->timer);
2512 ++ raw_spin_lock_irqsave(&worker->lock, *flags);
2513 ++ work->canceling--;
2514 ++}
2515 ++
2516 ++/*
2517 ++ * This function removes the work from the worker queue.
2518 ++ *
2519 ++ * It is called under worker->lock. The caller must make sure that
2520 ++ * the timer used by delayed work is not running, e.g. by calling
2521 ++ * kthread_cancel_delayed_work_timer().
2522 + *
2523 + * The work might still be in use when this function finishes. See the
2524 + * current_work proceed by the worker.
2525 +@@ -1101,28 +1131,8 @@ EXPORT_SYMBOL_GPL(kthread_flush_work);
2526 + * Return: %true if @work was pending and successfully canceled,
2527 + * %false if @work was not pending
2528 + */
2529 +-static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
2530 +- unsigned long *flags)
2531 ++static bool __kthread_cancel_work(struct kthread_work *work)
2532 + {
2533 +- /* Try to cancel the timer if exists. */
2534 +- if (is_dwork) {
2535 +- struct kthread_delayed_work *dwork =
2536 +- container_of(work, struct kthread_delayed_work, work);
2537 +- struct kthread_worker *worker = work->worker;
2538 +-
2539 +- /*
2540 +- * del_timer_sync() must be called to make sure that the timer
2541 +- * callback is not running. The lock must be temporary released
2542 +- * to avoid a deadlock with the callback. In the meantime,
2543 +- * any queuing is blocked by setting the canceling counter.
2544 +- */
2545 +- work->canceling++;
2546 +- raw_spin_unlock_irqrestore(&worker->lock, *flags);
2547 +- del_timer_sync(&dwork->timer);
2548 +- raw_spin_lock_irqsave(&worker->lock, *flags);
2549 +- work->canceling--;
2550 +- }
2551 +-
2552 + /*
2553 + * Try to remove the work from a worker list. It might either
2554 + * be from worker->work_list or from worker->delayed_work_list.
2555 +@@ -1175,11 +1185,23 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
2556 + /* Work must not be used with >1 worker, see kthread_queue_work() */
2557 + WARN_ON_ONCE(work->worker != worker);
2558 +
2559 +- /* Do not fight with another command that is canceling this work. */
2560 ++ /*
2561 ++ * Temporary cancel the work but do not fight with another command
2562 ++ * that is canceling the work as well.
2563 ++ *
2564 ++ * It is a bit tricky because of possible races with another
2565 ++ * mod_delayed_work() and cancel_delayed_work() callers.
2566 ++ *
2567 ++ * The timer must be canceled first because worker->lock is released
2568 ++ * when doing so. But the work can be removed from the queue (list)
2569 ++ * only when it can be queued again so that the return value can
2570 ++ * be used for reference counting.
2571 ++ */
2572 ++ kthread_cancel_delayed_work_timer(work, &flags);
2573 + if (work->canceling)
2574 + goto out;
2575 ++ ret = __kthread_cancel_work(work);
2576 +
2577 +- ret = __kthread_cancel_work(work, true, &flags);
2578 + fast_queue:
2579 + __kthread_queue_delayed_work(worker, dwork, delay);
2580 + out:
2581 +@@ -1201,7 +1223,10 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
2582 + /* Work must not be used with >1 worker, see kthread_queue_work(). */
2583 + WARN_ON_ONCE(work->worker != worker);
2584 +
2585 +- ret = __kthread_cancel_work(work, is_dwork, &flags);
2586 ++ if (is_dwork)
2587 ++ kthread_cancel_delayed_work_timer(work, &flags);
2588 ++
2589 ++ ret = __kthread_cancel_work(work);
2590 +
2591 + if (worker->current_work != work)
2592 + goto out_fast;
2593 +diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
2594 +index f39c383c71804..5bf6b1659215d 100644
2595 +--- a/kernel/locking/lockdep.c
2596 ++++ b/kernel/locking/lockdep.c
2597 +@@ -842,7 +842,7 @@ static int count_matching_names(struct lock_class *new_class)
2598 + }
2599 +
2600 + /* used from NMI context -- must be lockless */
2601 +-static __always_inline struct lock_class *
2602 ++static noinstr struct lock_class *
2603 + look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass)
2604 + {
2605 + struct lockdep_subclass_key *key;
2606 +@@ -850,12 +850,14 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass)
2607 + struct lock_class *class;
2608 +
2609 + if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
2610 ++ instrumentation_begin();
2611 + debug_locks_off();
2612 + printk(KERN_ERR
2613 + "BUG: looking up invalid subclass: %u\n", subclass);
2614 + printk(KERN_ERR
2615 + "turning off the locking correctness validator.\n");
2616 + dump_stack();
2617 ++ instrumentation_end();
2618 + return NULL;
2619 + }
2620 +
2621 +diff --git a/kernel/module.c b/kernel/module.c
2622 +index 30479355ab850..260d6f3f6d68f 100644
2623 +--- a/kernel/module.c
2624 ++++ b/kernel/module.c
2625 +@@ -266,9 +266,18 @@ static void module_assert_mutex_or_preempt(void)
2626 + #endif
2627 + }
2628 +
2629 ++#ifdef CONFIG_MODULE_SIG
2630 + static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
2631 + module_param(sig_enforce, bool_enable_only, 0644);
2632 +
2633 ++void set_module_sig_enforced(void)
2634 ++{
2635 ++ sig_enforce = true;
2636 ++}
2637 ++#else
2638 ++#define sig_enforce false
2639 ++#endif
2640 ++
2641 + /*
2642 + * Export sig_enforce kernel cmdline parameter to allow other subsystems rely
2643 + * on that instead of directly to CONFIG_MODULE_SIG_FORCE config.
2644 +@@ -279,11 +288,6 @@ bool is_module_sig_enforced(void)
2645 + }
2646 + EXPORT_SYMBOL(is_module_sig_enforced);
2647 +
2648 +-void set_module_sig_enforced(void)
2649 +-{
2650 +- sig_enforce = true;
2651 +-}
2652 +-
2653 + /* Block module loading/unloading? */
2654 + int modules_disabled = 0;
2655 + core_param(nomodule, modules_disabled, bint, 0);
2656 +diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
2657 +index 651218ded9817..ef37acd28e4ac 100644
2658 +--- a/kernel/sched/psi.c
2659 ++++ b/kernel/sched/psi.c
2660 +@@ -965,7 +965,7 @@ void psi_cgroup_free(struct cgroup *cgroup)
2661 + */
2662 + void cgroup_move_task(struct task_struct *task, struct css_set *to)
2663 + {
2664 +- unsigned int task_flags = 0;
2665 ++ unsigned int task_flags;
2666 + struct rq_flags rf;
2667 + struct rq *rq;
2668 +
2669 +@@ -980,15 +980,31 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to)
2670 +
2671 + rq = task_rq_lock(task, &rf);
2672 +
2673 +- if (task_on_rq_queued(task)) {
2674 +- task_flags = TSK_RUNNING;
2675 +- if (task_current(rq, task))
2676 +- task_flags |= TSK_ONCPU;
2677 +- } else if (task->in_iowait)
2678 +- task_flags = TSK_IOWAIT;
2679 +-
2680 +- if (task->in_memstall)
2681 +- task_flags |= TSK_MEMSTALL;
2682 ++ /*
2683 ++ * We may race with schedule() dropping the rq lock between
2684 ++ * deactivating prev and switching to next. Because the psi
2685 ++ * updates from the deactivation are deferred to the switch
2686 ++ * callback to save cgroup tree updates, the task's scheduling
2687 ++ * state here is not coherent with its psi state:
2688 ++ *
2689 ++ * schedule() cgroup_move_task()
2690 ++ * rq_lock()
2691 ++ * deactivate_task()
2692 ++ * p->on_rq = 0
2693 ++ * psi_dequeue() // defers TSK_RUNNING & TSK_IOWAIT updates
2694 ++ * pick_next_task()
2695 ++ * rq_unlock()
2696 ++ * rq_lock()
2697 ++ * psi_task_change() // old cgroup
2698 ++ * task->cgroups = to
2699 ++ * psi_task_change() // new cgroup
2700 ++ * rq_unlock()
2701 ++ * rq_lock()
2702 ++ * psi_sched_switch() // does deferred updates in new cgroup
2703 ++ *
2704 ++ * Don't rely on the scheduling state. Use psi_flags instead.
2705 ++ */
2706 ++ task_flags = task->psi_flags;
2707 +
2708 + if (task_flags)
2709 + psi_task_change(task, task_flags, 0);
2710 +diff --git a/lib/debug_locks.c b/lib/debug_locks.c
2711 +index 06d3135bd184c..a75ee30b77cb8 100644
2712 +--- a/lib/debug_locks.c
2713 ++++ b/lib/debug_locks.c
2714 +@@ -36,7 +36,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent);
2715 + /*
2716 + * Generic 'turn off all lock debugging' function:
2717 + */
2718 +-noinstr int debug_locks_off(void)
2719 ++int debug_locks_off(void)
2720 + {
2721 + if (debug_locks && __debug_locks_off()) {
2722 + if (!debug_locks_silent) {
2723 +diff --git a/mm/huge_memory.c b/mm/huge_memory.c
2724 +index ae907a9c20506..44c455dbbd637 100644
2725 +--- a/mm/huge_memory.c
2726 ++++ b/mm/huge_memory.c
2727 +@@ -61,6 +61,7 @@ static struct shrinker deferred_split_shrinker;
2728 +
2729 + static atomic_t huge_zero_refcount;
2730 + struct page *huge_zero_page __read_mostly;
2731 ++unsigned long huge_zero_pfn __read_mostly = ~0UL;
2732 +
2733 + bool transparent_hugepage_enabled(struct vm_area_struct *vma)
2734 + {
2735 +@@ -97,6 +98,7 @@ retry:
2736 + __free_pages(zero_page, compound_order(zero_page));
2737 + goto retry;
2738 + }
2739 ++ WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page));
2740 +
2741 + /* We take additional reference here. It will be put back by shrinker */
2742 + atomic_set(&huge_zero_refcount, 2);
2743 +@@ -146,6 +148,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
2744 + if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
2745 + struct page *zero_page = xchg(&huge_zero_page, NULL);
2746 + BUG_ON(zero_page == NULL);
2747 ++ WRITE_ONCE(huge_zero_pfn, ~0UL);
2748 + __free_pages(zero_page, compound_order(zero_page));
2749 + return HPAGE_PMD_NR;
2750 + }
2751 +@@ -2046,7 +2049,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
2752 + count_vm_event(THP_SPLIT_PMD);
2753 +
2754 + if (!vma_is_anonymous(vma)) {
2755 +- _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
2756 ++ old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
2757 + /*
2758 + * We are going to unmap this huge page. So
2759 + * just go ahead and zap it
2760 +@@ -2055,16 +2058,25 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
2761 + zap_deposited_table(mm, pmd);
2762 + if (vma_is_special_huge(vma))
2763 + return;
2764 +- page = pmd_page(_pmd);
2765 +- if (!PageDirty(page) && pmd_dirty(_pmd))
2766 +- set_page_dirty(page);
2767 +- if (!PageReferenced(page) && pmd_young(_pmd))
2768 +- SetPageReferenced(page);
2769 +- page_remove_rmap(page, true);
2770 +- put_page(page);
2771 ++ if (unlikely(is_pmd_migration_entry(old_pmd))) {
2772 ++ swp_entry_t entry;
2773 ++
2774 ++ entry = pmd_to_swp_entry(old_pmd);
2775 ++ page = migration_entry_to_page(entry);
2776 ++ } else {
2777 ++ page = pmd_page(old_pmd);
2778 ++ if (!PageDirty(page) && pmd_dirty(old_pmd))
2779 ++ set_page_dirty(page);
2780 ++ if (!PageReferenced(page) && pmd_young(old_pmd))
2781 ++ SetPageReferenced(page);
2782 ++ page_remove_rmap(page, true);
2783 ++ put_page(page);
2784 ++ }
2785 + add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
2786 + return;
2787 +- } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
2788 ++ }
2789 ++
2790 ++ if (is_huge_zero_pmd(*pmd)) {
2791 + /*
2792 + * FIXME: Do we want to invalidate secondary mmu by calling
2793 + * mmu_notifier_invalidate_range() see comments below inside
2794 +@@ -2346,17 +2358,17 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
2795 +
2796 + static void unmap_page(struct page *page)
2797 + {
2798 +- enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK |
2799 ++ enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC |
2800 + TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
2801 +- bool unmap_success;
2802 +
2803 + VM_BUG_ON_PAGE(!PageHead(page), page);
2804 +
2805 + if (PageAnon(page))
2806 + ttu_flags |= TTU_SPLIT_FREEZE;
2807 +
2808 +- unmap_success = try_to_unmap(page, ttu_flags);
2809 +- VM_BUG_ON_PAGE(!unmap_success, page);
2810 ++ try_to_unmap(page, ttu_flags);
2811 ++
2812 ++ VM_WARN_ON_ONCE_PAGE(page_mapped(page), page);
2813 + }
2814 +
2815 + static void remap_page(struct page *page, unsigned int nr)
2816 +@@ -2667,7 +2679,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
2817 + struct deferred_split *ds_queue = get_deferred_split_queue(head);
2818 + struct anon_vma *anon_vma = NULL;
2819 + struct address_space *mapping = NULL;
2820 +- int count, mapcount, extra_pins, ret;
2821 ++ int extra_pins, ret;
2822 + pgoff_t end;
2823 +
2824 + VM_BUG_ON_PAGE(is_huge_zero_page(head), head);
2825 +@@ -2726,7 +2738,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
2826 + }
2827 +
2828 + unmap_page(head);
2829 +- VM_BUG_ON_PAGE(compound_mapcount(head), head);
2830 +
2831 + /* block interrupt reentry in xa_lock and spinlock */
2832 + local_irq_disable();
2833 +@@ -2744,9 +2755,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
2834 +
2835 + /* Prevent deferred_split_scan() touching ->_refcount */
2836 + spin_lock(&ds_queue->split_queue_lock);
2837 +- count = page_count(head);
2838 +- mapcount = total_mapcount(head);
2839 +- if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
2840 ++ if (page_ref_freeze(head, 1 + extra_pins)) {
2841 + if (!list_empty(page_deferred_list(head))) {
2842 + ds_queue->split_queue_len--;
2843 + list_del(page_deferred_list(head));
2844 +@@ -2766,16 +2775,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
2845 + __split_huge_page(page, list, end);
2846 + ret = 0;
2847 + } else {
2848 +- if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
2849 +- pr_alert("total_mapcount: %u, page_count(): %u\n",
2850 +- mapcount, count);
2851 +- if (PageTail(page))
2852 +- dump_page(head, NULL);
2853 +- dump_page(page, "total_mapcount(head) > 0");
2854 +- BUG();
2855 +- }
2856 + spin_unlock(&ds_queue->split_queue_lock);
2857 +-fail: if (mapping)
2858 ++fail:
2859 ++ if (mapping)
2860 + xa_unlock(&mapping->i_pages);
2861 + local_irq_enable();
2862 + remap_page(head, thp_nr_pages(head));
2863 +diff --git a/mm/hugetlb.c b/mm/hugetlb.c
2864 +index 3da4817190f3d..7ba7d9b20494a 100644
2865 +--- a/mm/hugetlb.c
2866 ++++ b/mm/hugetlb.c
2867 +@@ -1584,15 +1584,12 @@ struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage)
2868 + return NULL;
2869 + }
2870 +
2871 +-pgoff_t __basepage_index(struct page *page)
2872 ++pgoff_t hugetlb_basepage_index(struct page *page)
2873 + {
2874 + struct page *page_head = compound_head(page);
2875 + pgoff_t index = page_index(page_head);
2876 + unsigned long compound_idx;
2877 +
2878 +- if (!PageHuge(page_head))
2879 +- return page_index(page);
2880 +-
2881 + if (compound_order(page_head) >= MAX_ORDER)
2882 + compound_idx = page_to_pfn(page) - page_to_pfn(page_head);
2883 + else
2884 +diff --git a/mm/internal.h b/mm/internal.h
2885 +index 1432feec62df0..08323e622bbd1 100644
2886 +--- a/mm/internal.h
2887 ++++ b/mm/internal.h
2888 +@@ -379,27 +379,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
2889 + extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
2890 +
2891 + /*
2892 +- * At what user virtual address is page expected in @vma?
2893 ++ * At what user virtual address is page expected in vma?
2894 ++ * Returns -EFAULT if all of the page is outside the range of vma.
2895 ++ * If page is a compound head, the entire compound page is considered.
2896 + */
2897 + static inline unsigned long
2898 +-__vma_address(struct page *page, struct vm_area_struct *vma)
2899 ++vma_address(struct page *page, struct vm_area_struct *vma)
2900 + {
2901 +- pgoff_t pgoff = page_to_pgoff(page);
2902 +- return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
2903 ++ pgoff_t pgoff;
2904 ++ unsigned long address;
2905 ++
2906 ++ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
2907 ++ pgoff = page_to_pgoff(page);
2908 ++ if (pgoff >= vma->vm_pgoff) {
2909 ++ address = vma->vm_start +
2910 ++ ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
2911 ++ /* Check for address beyond vma (or wrapped through 0?) */
2912 ++ if (address < vma->vm_start || address >= vma->vm_end)
2913 ++ address = -EFAULT;
2914 ++ } else if (PageHead(page) &&
2915 ++ pgoff + compound_nr(page) - 1 >= vma->vm_pgoff) {
2916 ++ /* Test above avoids possibility of wrap to 0 on 32-bit */
2917 ++ address = vma->vm_start;
2918 ++ } else {
2919 ++ address = -EFAULT;
2920 ++ }
2921 ++ return address;
2922 + }
2923 +
2924 ++/*
2925 ++ * Then at what user virtual address will none of the page be found in vma?
2926 ++ * Assumes that vma_address() already returned a good starting address.
2927 ++ * If page is a compound head, the entire compound page is considered.
2928 ++ */
2929 + static inline unsigned long
2930 +-vma_address(struct page *page, struct vm_area_struct *vma)
2931 ++vma_address_end(struct page *page, struct vm_area_struct *vma)
2932 + {
2933 +- unsigned long start, end;
2934 +-
2935 +- start = __vma_address(page, vma);
2936 +- end = start + thp_size(page) - PAGE_SIZE;
2937 +-
2938 +- /* page should be within @vma mapping range */
2939 +- VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma);
2940 +-
2941 +- return max(start, vma->vm_start);
2942 ++ pgoff_t pgoff;
2943 ++ unsigned long address;
2944 ++
2945 ++ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
2946 ++ pgoff = page_to_pgoff(page) + compound_nr(page);
2947 ++ address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
2948 ++ /* Check for address beyond vma (or wrapped through 0?) */
2949 ++ if (address < vma->vm_start || address > vma->vm_end)
2950 ++ address = vma->vm_end;
2951 ++ return address;
2952 + }
2953 +
2954 + static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
2955 +diff --git a/mm/memory-failure.c b/mm/memory-failure.c
2956 +index 704d05057d8c3..4db6f95e55be0 100644
2957 +--- a/mm/memory-failure.c
2958 ++++ b/mm/memory-failure.c
2959 +@@ -658,6 +658,7 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
2960 + */
2961 + static int me_kernel(struct page *p, unsigned long pfn)
2962 + {
2963 ++ unlock_page(p);
2964 + return MF_IGNORED;
2965 + }
2966 +
2967 +@@ -667,6 +668,7 @@ static int me_kernel(struct page *p, unsigned long pfn)
2968 + static int me_unknown(struct page *p, unsigned long pfn)
2969 + {
2970 + pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
2971 ++ unlock_page(p);
2972 + return MF_FAILED;
2973 + }
2974 +
2975 +@@ -675,6 +677,7 @@ static int me_unknown(struct page *p, unsigned long pfn)
2976 + */
2977 + static int me_pagecache_clean(struct page *p, unsigned long pfn)
2978 + {
2979 ++ int ret;
2980 + struct address_space *mapping;
2981 +
2982 + delete_from_lru_cache(p);
2983 +@@ -683,8 +686,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
2984 + * For anonymous pages we're done the only reference left
2985 + * should be the one m_f() holds.
2986 + */
2987 +- if (PageAnon(p))
2988 +- return MF_RECOVERED;
2989 ++ if (PageAnon(p)) {
2990 ++ ret = MF_RECOVERED;
2991 ++ goto out;
2992 ++ }
2993 +
2994 + /*
2995 + * Now truncate the page in the page cache. This is really
2996 +@@ -698,7 +703,8 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
2997 + /*
2998 + * Page has been teared down in the meanwhile
2999 + */
3000 +- return MF_FAILED;
3001 ++ ret = MF_FAILED;
3002 ++ goto out;
3003 + }
3004 +
3005 + /*
3006 +@@ -706,7 +712,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
3007 + *
3008 + * Open: to take i_mutex or not for this? Right now we don't.
3009 + */
3010 +- return truncate_error_page(p, pfn, mapping);
3011 ++ ret = truncate_error_page(p, pfn, mapping);
3012 ++out:
3013 ++ unlock_page(p);
3014 ++ return ret;
3015 + }
3016 +
3017 + /*
3018 +@@ -782,24 +791,26 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
3019 + */
3020 + static int me_swapcache_dirty(struct page *p, unsigned long pfn)
3021 + {
3022 ++ int ret;
3023 ++
3024 + ClearPageDirty(p);
3025 + /* Trigger EIO in shmem: */
3026 + ClearPageUptodate(p);
3027 +
3028 +- if (!delete_from_lru_cache(p))
3029 +- return MF_DELAYED;
3030 +- else
3031 +- return MF_FAILED;
3032 ++ ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;
3033 ++ unlock_page(p);
3034 ++ return ret;
3035 + }
3036 +
3037 + static int me_swapcache_clean(struct page *p, unsigned long pfn)
3038 + {
3039 ++ int ret;
3040 ++
3041 + delete_from_swap_cache(p);
3042 +
3043 +- if (!delete_from_lru_cache(p))
3044 +- return MF_RECOVERED;
3045 +- else
3046 +- return MF_FAILED;
3047 ++ ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;
3048 ++ unlock_page(p);
3049 ++ return ret;
3050 + }
3051 +
3052 + /*
3053 +@@ -820,6 +831,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
3054 + mapping = page_mapping(hpage);
3055 + if (mapping) {
3056 + res = truncate_error_page(hpage, pfn, mapping);
3057 ++ unlock_page(hpage);
3058 + } else {
3059 + res = MF_FAILED;
3060 + unlock_page(hpage);
3061 +@@ -834,7 +846,6 @@ static int me_huge_page(struct page *p, unsigned long pfn)
3062 + page_ref_inc(p);
3063 + res = MF_RECOVERED;
3064 + }
3065 +- lock_page(hpage);
3066 + }
3067 +
3068 + return res;
3069 +@@ -866,6 +877,8 @@ static struct page_state {
3070 + unsigned long mask;
3071 + unsigned long res;
3072 + enum mf_action_page_type type;
3073 ++
3074 ++ /* Callback ->action() has to unlock the relevant page inside it. */
3075 + int (*action)(struct page *p, unsigned long pfn);
3076 + } error_states[] = {
3077 + { reserved, reserved, MF_MSG_KERNEL, me_kernel },
3078 +@@ -929,6 +942,7 @@ static int page_action(struct page_state *ps, struct page *p,
3079 + int result;
3080 + int count;
3081 +
3082 ++ /* page p should be unlocked after returning from ps->action(). */
3083 + result = ps->action(p, pfn);
3084 +
3085 + count = page_count(p) - 1;
3086 +@@ -1313,7 +1327,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
3087 + goto out;
3088 + }
3089 +
3090 +- res = identify_page_state(pfn, p, page_flags);
3091 ++ return identify_page_state(pfn, p, page_flags);
3092 + out:
3093 + unlock_page(head);
3094 + return res;
3095 +@@ -1429,9 +1443,10 @@ int memory_failure(unsigned long pfn, int flags)
3096 + struct page *hpage;
3097 + struct page *orig_head;
3098 + struct dev_pagemap *pgmap;
3099 +- int res;
3100 ++ int res = 0;
3101 + unsigned long page_flags;
3102 + bool retry = true;
3103 ++ static DEFINE_MUTEX(mf_mutex);
3104 +
3105 + if (!sysctl_memory_failure_recovery)
3106 + panic("Memory failure on page %lx", pfn);
3107 +@@ -1449,13 +1464,18 @@ int memory_failure(unsigned long pfn, int flags)
3108 + return -ENXIO;
3109 + }
3110 +
3111 ++ mutex_lock(&mf_mutex);
3112 ++
3113 + try_again:
3114 +- if (PageHuge(p))
3115 +- return memory_failure_hugetlb(pfn, flags);
3116 ++ if (PageHuge(p)) {
3117 ++ res = memory_failure_hugetlb(pfn, flags);
3118 ++ goto unlock_mutex;
3119 ++ }
3120 ++
3121 + if (TestSetPageHWPoison(p)) {
3122 + pr_err("Memory failure: %#lx: already hardware poisoned\n",
3123 + pfn);
3124 +- return 0;
3125 ++ goto unlock_mutex;
3126 + }
3127 +
3128 + orig_head = hpage = compound_head(p);
3129 +@@ -1488,17 +1508,19 @@ try_again:
3130 + res = MF_FAILED;
3131 + }
3132 + action_result(pfn, MF_MSG_BUDDY, res);
3133 +- return res == MF_RECOVERED ? 0 : -EBUSY;
3134 ++ res = res == MF_RECOVERED ? 0 : -EBUSY;
3135 + } else {
3136 + action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
3137 +- return -EBUSY;
3138 ++ res = -EBUSY;
3139 + }
3140 ++ goto unlock_mutex;
3141 + }
3142 +
3143 + if (PageTransHuge(hpage)) {
3144 + if (try_to_split_thp_page(p, "Memory Failure") < 0) {
3145 + action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
3146 +- return -EBUSY;
3147 ++ res = -EBUSY;
3148 ++ goto unlock_mutex;
3149 + }
3150 + VM_BUG_ON_PAGE(!page_count(p), p);
3151 + }
3152 +@@ -1522,7 +1544,7 @@ try_again:
3153 + if (PageCompound(p) && compound_head(p) != orig_head) {
3154 + action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
3155 + res = -EBUSY;
3156 +- goto out;
3157 ++ goto unlock_page;
3158 + }
3159 +
3160 + /*
3161 +@@ -1542,14 +1564,14 @@ try_again:
3162 + num_poisoned_pages_dec();
3163 + unlock_page(p);
3164 + put_page(p);
3165 +- return 0;
3166 ++ goto unlock_mutex;
3167 + }
3168 + if (hwpoison_filter(p)) {
3169 + if (TestClearPageHWPoison(p))
3170 + num_poisoned_pages_dec();
3171 + unlock_page(p);
3172 + put_page(p);
3173 +- return 0;
3174 ++ goto unlock_mutex;
3175 + }
3176 +
3177 + /*
3178 +@@ -1573,7 +1595,7 @@ try_again:
3179 + if (!hwpoison_user_mappings(p, pfn, flags, &p)) {
3180 + action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
3181 + res = -EBUSY;
3182 +- goto out;
3183 ++ goto unlock_page;
3184 + }
3185 +
3186 + /*
3187 +@@ -1582,13 +1604,17 @@ try_again:
3188 + if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
3189 + action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
3190 + res = -EBUSY;
3191 +- goto out;
3192 ++ goto unlock_page;
3193 + }
3194 +
3195 + identify_page_state:
3196 + res = identify_page_state(pfn, p, page_flags);
3197 +-out:
3198 ++ mutex_unlock(&mf_mutex);
3199 ++ return res;
3200 ++unlock_page:
3201 + unlock_page(p);
3202 ++unlock_mutex:
3203 ++ mutex_unlock(&mf_mutex);
3204 + return res;
3205 + }
3206 + EXPORT_SYMBOL_GPL(memory_failure);
3207 +diff --git a/mm/memory.c b/mm/memory.c
3208 +index 14a6c66b37483..36624986130be 100644
3209 +--- a/mm/memory.c
3210 ++++ b/mm/memory.c
3211 +@@ -1361,7 +1361,18 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
3212 + else if (zap_huge_pmd(tlb, vma, pmd, addr))
3213 + goto next;
3214 + /* fall through */
3215 ++ } else if (details && details->single_page &&
3216 ++ PageTransCompound(details->single_page) &&
3217 ++ next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) {
3218 ++ spinlock_t *ptl = pmd_lock(tlb->mm, pmd);
3219 ++ /*
3220 ++ * Take and drop THP pmd lock so that we cannot return
3221 ++ * prematurely, while zap_huge_pmd() has cleared *pmd,
3222 ++ * but not yet decremented compound_mapcount().
3223 ++ */
3224 ++ spin_unlock(ptl);
3225 + }
3226 ++
3227 + /*
3228 + * Here there can be other concurrent MADV_DONTNEED or
3229 + * trans huge page faults running, and if the pmd is
3230 +@@ -3193,6 +3204,36 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
3231 + }
3232 + }
3233 +
3234 ++/**
3235 ++ * unmap_mapping_page() - Unmap single page from processes.
3236 ++ * @page: The locked page to be unmapped.
3237 ++ *
3238 ++ * Unmap this page from any userspace process which still has it mmaped.
3239 ++ * Typically, for efficiency, the range of nearby pages has already been
3240 ++ * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once
3241 ++ * truncation or invalidation holds the lock on a page, it may find that
3242 ++ * the page has been remapped again: and then uses unmap_mapping_page()
3243 ++ * to unmap it finally.
3244 ++ */
3245 ++void unmap_mapping_page(struct page *page)
3246 ++{
3247 ++ struct address_space *mapping = page->mapping;
3248 ++ struct zap_details details = { };
3249 ++
3250 ++ VM_BUG_ON(!PageLocked(page));
3251 ++ VM_BUG_ON(PageTail(page));
3252 ++
3253 ++ details.check_mapping = mapping;
3254 ++ details.first_index = page->index;
3255 ++ details.last_index = page->index + thp_nr_pages(page) - 1;
3256 ++ details.single_page = page;
3257 ++
3258 ++ i_mmap_lock_write(mapping);
3259 ++ if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
3260 ++ unmap_mapping_range_tree(&mapping->i_mmap, &details);
3261 ++ i_mmap_unlock_write(mapping);
3262 ++}
3263 ++
3264 + /**
3265 + * unmap_mapping_pages() - Unmap pages from processes.
3266 + * @mapping: The address space containing pages to be unmapped.
3267 +diff --git a/mm/migrate.c b/mm/migrate.c
3268 +index 773622cffe779..40455e753c5b4 100644
3269 +--- a/mm/migrate.c
3270 ++++ b/mm/migrate.c
3271 +@@ -322,6 +322,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
3272 + goto out;
3273 +
3274 + page = migration_entry_to_page(entry);
3275 ++ page = compound_head(page);
3276 +
3277 + /*
3278 + * Once page cache replacement of page migration started, page_count
3279 +diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
3280 +index 86e3a3688d592..3350faeb199a6 100644
3281 +--- a/mm/page_vma_mapped.c
3282 ++++ b/mm/page_vma_mapped.c
3283 +@@ -116,6 +116,13 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
3284 + return pfn_is_match(pvmw->page, pfn);
3285 + }
3286 +
3287 ++static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
3288 ++{
3289 ++ pvmw->address = (pvmw->address + size) & ~(size - 1);
3290 ++ if (!pvmw->address)
3291 ++ pvmw->address = ULONG_MAX;
3292 ++}
3293 ++
3294 + /**
3295 + * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
3296 + * @pvmw->address
3297 +@@ -144,6 +151,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
3298 + {
3299 + struct mm_struct *mm = pvmw->vma->vm_mm;
3300 + struct page *page = pvmw->page;
3301 ++ unsigned long end;
3302 + pgd_t *pgd;
3303 + p4d_t *p4d;
3304 + pud_t *pud;
3305 +@@ -153,10 +161,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
3306 + if (pvmw->pmd && !pvmw->pte)
3307 + return not_found(pvmw);
3308 +
3309 +- if (pvmw->pte)
3310 +- goto next_pte;
3311 ++ if (unlikely(PageHuge(page))) {
3312 ++ /* The only possible mapping was handled on last iteration */
3313 ++ if (pvmw->pte)
3314 ++ return not_found(pvmw);
3315 +
3316 +- if (unlikely(PageHuge(pvmw->page))) {
3317 + /* when pud is not present, pte will be NULL */
3318 + pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
3319 + if (!pvmw->pte)
3320 +@@ -168,78 +177,108 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
3321 + return not_found(pvmw);
3322 + return true;
3323 + }
3324 +-restart:
3325 +- pgd = pgd_offset(mm, pvmw->address);
3326 +- if (!pgd_present(*pgd))
3327 +- return false;
3328 +- p4d = p4d_offset(pgd, pvmw->address);
3329 +- if (!p4d_present(*p4d))
3330 +- return false;
3331 +- pud = pud_offset(p4d, pvmw->address);
3332 +- if (!pud_present(*pud))
3333 +- return false;
3334 +- pvmw->pmd = pmd_offset(pud, pvmw->address);
3335 ++
3336 + /*
3337 +- * Make sure the pmd value isn't cached in a register by the
3338 +- * compiler and used as a stale value after we've observed a
3339 +- * subsequent update.
3340 ++ * Seek to next pte only makes sense for THP.
3341 ++ * But more important than that optimization, is to filter out
3342 ++ * any PageKsm page: whose page->index misleads vma_address()
3343 ++ * and vma_address_end() to disaster.
3344 + */
3345 +- pmde = READ_ONCE(*pvmw->pmd);
3346 +- if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
3347 +- pvmw->ptl = pmd_lock(mm, pvmw->pmd);
3348 +- if (likely(pmd_trans_huge(*pvmw->pmd))) {
3349 +- if (pvmw->flags & PVMW_MIGRATION)
3350 +- return not_found(pvmw);
3351 +- if (pmd_page(*pvmw->pmd) != page)
3352 +- return not_found(pvmw);
3353 +- return true;
3354 +- } else if (!pmd_present(*pvmw->pmd)) {
3355 +- if (thp_migration_supported()) {
3356 +- if (!(pvmw->flags & PVMW_MIGRATION))
3357 ++ end = PageTransCompound(page) ?
3358 ++ vma_address_end(page, pvmw->vma) :
3359 ++ pvmw->address + PAGE_SIZE;
3360 ++ if (pvmw->pte)
3361 ++ goto next_pte;
3362 ++restart:
3363 ++ do {
3364 ++ pgd = pgd_offset(mm, pvmw->address);
3365 ++ if (!pgd_present(*pgd)) {
3366 ++ step_forward(pvmw, PGDIR_SIZE);
3367 ++ continue;
3368 ++ }
3369 ++ p4d = p4d_offset(pgd, pvmw->address);
3370 ++ if (!p4d_present(*p4d)) {
3371 ++ step_forward(pvmw, P4D_SIZE);
3372 ++ continue;
3373 ++ }
3374 ++ pud = pud_offset(p4d, pvmw->address);
3375 ++ if (!pud_present(*pud)) {
3376 ++ step_forward(pvmw, PUD_SIZE);
3377 ++ continue;
3378 ++ }
3379 ++
3380 ++ pvmw->pmd = pmd_offset(pud, pvmw->address);
3381 ++ /*
3382 ++ * Make sure the pmd value isn't cached in a register by the
3383 ++ * compiler and used as a stale value after we've observed a
3384 ++ * subsequent update.
3385 ++ */
3386 ++ pmde = READ_ONCE(*pvmw->pmd);
3387 ++
3388 ++ if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
3389 ++ pvmw->ptl = pmd_lock(mm, pvmw->pmd);
3390 ++ pmde = *pvmw->pmd;
3391 ++ if (likely(pmd_trans_huge(pmde))) {
3392 ++ if (pvmw->flags & PVMW_MIGRATION)
3393 + return not_found(pvmw);
3394 +- if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) {
3395 +- swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd);
3396 ++ if (pmd_page(pmde) != page)
3397 ++ return not_found(pvmw);
3398 ++ return true;
3399 ++ }
3400 ++ if (!pmd_present(pmde)) {
3401 ++ swp_entry_t entry;
3402 +
3403 +- if (migration_entry_to_page(entry) != page)
3404 +- return not_found(pvmw);
3405 +- return true;
3406 +- }
3407 ++ if (!thp_migration_supported() ||
3408 ++ !(pvmw->flags & PVMW_MIGRATION))
3409 ++ return not_found(pvmw);
3410 ++ entry = pmd_to_swp_entry(pmde);
3411 ++ if (!is_migration_entry(entry) ||
3412 ++ migration_entry_to_page(entry) != page)
3413 ++ return not_found(pvmw);
3414 ++ return true;
3415 + }
3416 +- return not_found(pvmw);
3417 +- } else {
3418 + /* THP pmd was split under us: handle on pte level */
3419 + spin_unlock(pvmw->ptl);
3420 + pvmw->ptl = NULL;
3421 ++ } else if (!pmd_present(pmde)) {
3422 ++ /*
3423 ++ * If PVMW_SYNC, take and drop THP pmd lock so that we
3424 ++ * cannot return prematurely, while zap_huge_pmd() has
3425 ++ * cleared *pmd but not decremented compound_mapcount().
3426 ++ */
3427 ++ if ((pvmw->flags & PVMW_SYNC) &&
3428 ++ PageTransCompound(page)) {
3429 ++ spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
3430 ++
3431 ++ spin_unlock(ptl);
3432 ++ }
3433 ++ step_forward(pvmw, PMD_SIZE);
3434 ++ continue;
3435 + }
3436 +- } else if (!pmd_present(pmde)) {
3437 +- return false;
3438 +- }
3439 +- if (!map_pte(pvmw))
3440 +- goto next_pte;
3441 +- while (1) {
3442 ++ if (!map_pte(pvmw))
3443 ++ goto next_pte;
3444 ++this_pte:
3445 + if (check_pte(pvmw))
3446 + return true;
3447 + next_pte:
3448 +- /* Seek to next pte only makes sense for THP */
3449 +- if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
3450 +- return not_found(pvmw);
3451 + do {
3452 + pvmw->address += PAGE_SIZE;
3453 +- if (pvmw->address >= pvmw->vma->vm_end ||
3454 +- pvmw->address >=
3455 +- __vma_address(pvmw->page, pvmw->vma) +
3456 +- thp_size(pvmw->page))
3457 ++ if (pvmw->address >= end)
3458 + return not_found(pvmw);
3459 + /* Did we cross page table boundary? */
3460 +- if (pvmw->address % PMD_SIZE == 0) {
3461 +- pte_unmap(pvmw->pte);
3462 ++ if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
3463 + if (pvmw->ptl) {
3464 + spin_unlock(pvmw->ptl);
3465 + pvmw->ptl = NULL;
3466 + }
3467 ++ pte_unmap(pvmw->pte);
3468 ++ pvmw->pte = NULL;
3469 + goto restart;
3470 +- } else {
3471 +- pvmw->pte++;
3472 ++ }
3473 ++ pvmw->pte++;
3474 ++ if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) {
3475 ++ pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
3476 ++ spin_lock(pvmw->ptl);
3477 + }
3478 + } while (pte_none(*pvmw->pte));
3479 +
3480 +@@ -247,7 +286,10 @@ next_pte:
3481 + pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
3482 + spin_lock(pvmw->ptl);
3483 + }
3484 +- }
3485 ++ goto this_pte;
3486 ++ } while (pvmw->address < end);
3487 ++
3488 ++ return false;
3489 + }
3490 +
3491 + /**
3492 +@@ -266,14 +308,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
3493 + .vma = vma,
3494 + .flags = PVMW_SYNC,
3495 + };
3496 +- unsigned long start, end;
3497 +-
3498 +- start = __vma_address(page, vma);
3499 +- end = start + thp_size(page) - PAGE_SIZE;
3500 +
3501 +- if (unlikely(end < vma->vm_start || start >= vma->vm_end))
3502 ++ pvmw.address = vma_address(page, vma);
3503 ++ if (pvmw.address == -EFAULT)
3504 + return 0;
3505 +- pvmw.address = max(start, vma->vm_start);
3506 + if (!page_vma_mapped_walk(&pvmw))
3507 + return 0;
3508 + page_vma_mapped_walk_done(&pvmw);
3509 +diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
3510 +index c2210e1cdb515..4e640baf97948 100644
3511 +--- a/mm/pgtable-generic.c
3512 ++++ b/mm/pgtable-generic.c
3513 +@@ -135,9 +135,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
3514 + {
3515 + pmd_t pmd;
3516 + VM_BUG_ON(address & ~HPAGE_PMD_MASK);
3517 +- VM_BUG_ON(!pmd_present(*pmdp));
3518 +- /* Below assumes pmd_present() is true */
3519 +- VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
3520 ++ VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
3521 ++ !pmd_devmap(*pmdp));
3522 + pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
3523 + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
3524 + return pmd;
3525 +diff --git a/mm/rmap.c b/mm/rmap.c
3526 +index b0fc27e77d6d7..3665d062cc9ce 100644
3527 +--- a/mm/rmap.c
3528 ++++ b/mm/rmap.c
3529 +@@ -707,7 +707,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
3530 + */
3531 + unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
3532 + {
3533 +- unsigned long address;
3534 + if (PageAnon(page)) {
3535 + struct anon_vma *page__anon_vma = page_anon_vma(page);
3536 + /*
3537 +@@ -717,15 +716,13 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
3538 + if (!vma->anon_vma || !page__anon_vma ||
3539 + vma->anon_vma->root != page__anon_vma->root)
3540 + return -EFAULT;
3541 +- } else if (page->mapping) {
3542 +- if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
3543 +- return -EFAULT;
3544 +- } else
3545 ++ } else if (!vma->vm_file) {
3546 + return -EFAULT;
3547 +- address = __vma_address(page, vma);
3548 +- if (unlikely(address < vma->vm_start || address >= vma->vm_end))
3549 ++ } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
3550 + return -EFAULT;
3551 +- return address;
3552 ++ }
3553 ++
3554 ++ return vma_address(page, vma);
3555 + }
3556 +
3557 + pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
3558 +@@ -919,7 +916,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
3559 + */
3560 + mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
3561 + 0, vma, vma->vm_mm, address,
3562 +- min(vma->vm_end, address + page_size(page)));
3563 ++ vma_address_end(page, vma));
3564 + mmu_notifier_invalidate_range_start(&range);
3565 +
3566 + while (page_vma_mapped_walk(&pvmw)) {
3567 +@@ -1405,6 +1402,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
3568 + struct mmu_notifier_range range;
3569 + enum ttu_flags flags = (enum ttu_flags)(long)arg;
3570 +
3571 ++ /*
3572 ++ * When racing against e.g. zap_pte_range() on another cpu,
3573 ++ * in between its ptep_get_and_clear_full() and page_remove_rmap(),
3574 ++ * try_to_unmap() may return false when it is about to become true,
3575 ++ * if page table locking is skipped: use TTU_SYNC to wait for that.
3576 ++ */
3577 ++ if (flags & TTU_SYNC)
3578 ++ pvmw.flags = PVMW_SYNC;
3579 ++
3580 + /* munlock has nothing to gain from examining un-locked vmas */
3581 + if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
3582 + return true;
3583 +@@ -1426,9 +1432,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
3584 + * Note that the page can not be free in this function as call of
3585 + * try_to_unmap() must hold a reference on the page.
3586 + */
3587 ++ range.end = PageKsm(page) ?
3588 ++ address + PAGE_SIZE : vma_address_end(page, vma);
3589 + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
3590 +- address,
3591 +- min(vma->vm_end, address + page_size(page)));
3592 ++ address, range.end);
3593 + if (PageHuge(page)) {
3594 + /*
3595 + * If sharing is possible, start and end will be adjusted
3596 +@@ -1777,7 +1784,13 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
3597 + else
3598 + rmap_walk(page, &rwc);
3599 +
3600 +- return !page_mapcount(page) ? true : false;
3601 ++ /*
3602 ++ * When racing against e.g. zap_pte_range() on another cpu,
3603 ++ * in between its ptep_get_and_clear_full() and page_remove_rmap(),
3604 ++ * try_to_unmap() may return false when it is about to become true,
3605 ++ * if page table locking is skipped: use TTU_SYNC to wait for that.
3606 ++ */
3607 ++ return !page_mapcount(page);
3608 + }
3609 +
3610 + /**
3611 +@@ -1874,6 +1887,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
3612 + struct vm_area_struct *vma = avc->vma;
3613 + unsigned long address = vma_address(page, vma);
3614 +
3615 ++ VM_BUG_ON_VMA(address == -EFAULT, vma);
3616 + cond_resched();
3617 +
3618 + if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
3619 +@@ -1928,6 +1942,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
3620 + pgoff_start, pgoff_end) {
3621 + unsigned long address = vma_address(page, vma);
3622 +
3623 ++ VM_BUG_ON_VMA(address == -EFAULT, vma);
3624 + cond_resched();
3625 +
3626 + if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
3627 +diff --git a/mm/truncate.c b/mm/truncate.c
3628 +index 455944264663e..bf092be0a6f01 100644
3629 +--- a/mm/truncate.c
3630 ++++ b/mm/truncate.c
3631 +@@ -168,13 +168,10 @@ void do_invalidatepage(struct page *page, unsigned int offset,
3632 + * its lock, b) when a concurrent invalidate_mapping_pages got there first and
3633 + * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
3634 + */
3635 +-static void
3636 +-truncate_cleanup_page(struct address_space *mapping, struct page *page)
3637 ++static void truncate_cleanup_page(struct page *page)
3638 + {
3639 +- if (page_mapped(page)) {
3640 +- unsigned int nr = thp_nr_pages(page);
3641 +- unmap_mapping_pages(mapping, page->index, nr, false);
3642 +- }
3643 ++ if (page_mapped(page))
3644 ++ unmap_mapping_page(page);
3645 +
3646 + if (page_has_private(page))
3647 + do_invalidatepage(page, 0, thp_size(page));
3648 +@@ -219,7 +216,7 @@ int truncate_inode_page(struct address_space *mapping, struct page *page)
3649 + if (page->mapping != mapping)
3650 + return -EIO;
3651 +
3652 +- truncate_cleanup_page(mapping, page);
3653 ++ truncate_cleanup_page(page);
3654 + delete_from_page_cache(page);
3655 + return 0;
3656 + }
3657 +@@ -326,7 +323,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
3658 + index = indices[pagevec_count(&pvec) - 1] + 1;
3659 + truncate_exceptional_pvec_entries(mapping, &pvec, indices);
3660 + for (i = 0; i < pagevec_count(&pvec); i++)
3661 +- truncate_cleanup_page(mapping, pvec.pages[i]);
3662 ++ truncate_cleanup_page(pvec.pages[i]);
3663 + delete_from_page_cache_batch(mapping, &pvec);
3664 + for (i = 0; i < pagevec_count(&pvec); i++)
3665 + unlock_page(pvec.pages[i]);
3666 +@@ -652,6 +649,16 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
3667 + continue;
3668 + }
3669 +
3670 ++ if (!did_range_unmap && page_mapped(page)) {
3671 ++ /*
3672 ++ * If page is mapped, before taking its lock,
3673 ++ * zap the rest of the file in one hit.
3674 ++ */
3675 ++ unmap_mapping_pages(mapping, index,
3676 ++ (1 + end - index), false);
3677 ++ did_range_unmap = 1;
3678 ++ }
3679 ++
3680 + lock_page(page);
3681 + WARN_ON(page_to_index(page) != index);
3682 + if (page->mapping != mapping) {
3683 +@@ -659,23 +666,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
3684 + continue;
3685 + }
3686 + wait_on_page_writeback(page);
3687 +- if (page_mapped(page)) {
3688 +- if (!did_range_unmap) {
3689 +- /*
3690 +- * Zap the rest of the file in one hit.
3691 +- */
3692 +- unmap_mapping_pages(mapping, index,
3693 +- (1 + end - index), false);
3694 +- did_range_unmap = 1;
3695 +- } else {
3696 +- /*
3697 +- * Just zap this page
3698 +- */
3699 +- unmap_mapping_pages(mapping, index,
3700 +- 1, false);
3701 +- }
3702 +- }
3703 ++
3704 ++ if (page_mapped(page))
3705 ++ unmap_mapping_page(page);
3706 + BUG_ON(page_mapped(page));
3707 ++
3708 + ret2 = do_launder_page(mapping, page);
3709 + if (ret2 == 0) {
3710 + if (!invalidate_complete_page2(mapping, page))
3711 +diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
3712 +index 2603966da904d..e910890a868c1 100644
3713 +--- a/net/ethtool/ioctl.c
3714 ++++ b/net/ethtool/ioctl.c
3715 +@@ -1421,7 +1421,7 @@ static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr,
3716 + if (eeprom.offset + eeprom.len > total_len)
3717 + return -EINVAL;
3718 +
3719 +- data = kmalloc(PAGE_SIZE, GFP_USER);
3720 ++ data = kzalloc(PAGE_SIZE, GFP_USER);
3721 + if (!data)
3722 + return -ENOMEM;
3723 +
3724 +@@ -1486,7 +1486,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
3725 + if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
3726 + return -EINVAL;
3727 +
3728 +- data = kmalloc(PAGE_SIZE, GFP_USER);
3729 ++ data = kzalloc(PAGE_SIZE, GFP_USER);
3730 + if (!data)
3731 + return -ENOMEM;
3732 +
3733 +@@ -1765,7 +1765,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
3734 + return -EFAULT;
3735 +
3736 + test.len = test_len;
3737 +- data = kmalloc_array(test_len, sizeof(u64), GFP_USER);
3738 ++ data = kcalloc(test_len, sizeof(u64), GFP_USER);
3739 + if (!data)
3740 + return -ENOMEM;
3741 +
3742 +@@ -2281,7 +2281,7 @@ static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr)
3743 + ret = ethtool_tunable_valid(&tuna);
3744 + if (ret)
3745 + return ret;
3746 +- data = kmalloc(tuna.len, GFP_USER);
3747 ++ data = kzalloc(tuna.len, GFP_USER);
3748 + if (!data)
3749 + return -ENOMEM;
3750 + ret = ops->get_tunable(dev, &tuna, data);
3751 +@@ -2473,7 +2473,7 @@ static int get_phy_tunable(struct net_device *dev, void __user *useraddr)
3752 + ret = ethtool_phy_tunable_valid(&tuna);
3753 + if (ret)
3754 + return ret;
3755 +- data = kmalloc(tuna.len, GFP_USER);
3756 ++ data = kzalloc(tuna.len, GFP_USER);
3757 + if (!data)
3758 + return -ENOMEM;
3759 + if (phy_drv_tunable) {
3760 +diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
3761 +index 1355e6c0d5677..faa7856c7fb07 100644
3762 +--- a/net/ipv4/af_inet.c
3763 ++++ b/net/ipv4/af_inet.c
3764 +@@ -575,7 +575,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
3765 + return err;
3766 + }
3767 +
3768 +- if (!inet_sk(sk)->inet_num && inet_autobind(sk))
3769 ++ if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk))
3770 + return -EAGAIN;
3771 + return sk->sk_prot->connect(sk, uaddr, addr_len);
3772 + }
3773 +@@ -803,7 +803,7 @@ int inet_send_prepare(struct sock *sk)
3774 + sock_rps_record_flow(sk);
3775 +
3776 + /* We may need to bind the socket. */
3777 +- if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
3778 ++ if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind &&
3779 + inet_autobind(sk))
3780 + return -EAGAIN;
3781 +
3782 +diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
3783 +index 2e35f68da40a7..1c6429c353a96 100644
3784 +--- a/net/ipv4/devinet.c
3785 ++++ b/net/ipv4/devinet.c
3786 +@@ -1989,7 +1989,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
3787 + return -EAFNOSUPPORT;
3788 +
3789 + if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
3790 +- BUG();
3791 ++ return -EINVAL;
3792 +
3793 + if (tb[IFLA_INET_CONF]) {
3794 + nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
3795 +diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
3796 +index 8b943f85fff9d..ea22768f76b8a 100644
3797 +--- a/net/ipv4/ping.c
3798 ++++ b/net/ipv4/ping.c
3799 +@@ -952,6 +952,7 @@ bool ping_rcv(struct sk_buff *skb)
3800 + struct sock *sk;
3801 + struct net *net = dev_net(skb->dev);
3802 + struct icmphdr *icmph = icmp_hdr(skb);
3803 ++ bool rc = false;
3804 +
3805 + /* We assume the packet has already been checked by icmp_rcv */
3806 +
3807 +@@ -966,14 +967,15 @@ bool ping_rcv(struct sk_buff *skb)
3808 + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
3809 +
3810 + pr_debug("rcv on socket %p\n", sk);
3811 +- if (skb2)
3812 +- ping_queue_rcv_skb(sk, skb2);
3813 ++ if (skb2 && !ping_queue_rcv_skb(sk, skb2))
3814 ++ rc = true;
3815 + sock_put(sk);
3816 +- return true;
3817 + }
3818 +- pr_debug("no socket, dropping\n");
3819 +
3820 +- return false;
3821 ++ if (!rc)
3822 ++ pr_debug("no socket, dropping\n");
3823 ++
3824 ++ return rc;
3825 + }
3826 + EXPORT_SYMBOL_GPL(ping_rcv);
3827 +
3828 +diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
3829 +index a9e53f5942fae..eab0a46983c0b 100644
3830 +--- a/net/ipv6/addrconf.c
3831 ++++ b/net/ipv6/addrconf.c
3832 +@@ -5822,7 +5822,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla,
3833 + return -EAFNOSUPPORT;
3834 +
3835 + if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
3836 +- BUG();
3837 ++ return -EINVAL;
3838 +
3839 + if (tb[IFLA_INET6_TOKEN]) {
3840 + err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]),
3841 +diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
3842 +index 02e818d740f60..5ec437e8e7132 100644
3843 +--- a/net/mac80211/ieee80211_i.h
3844 ++++ b/net/mac80211/ieee80211_i.h
3845 +@@ -1442,7 +1442,7 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata)
3846 + rcu_read_lock();
3847 + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
3848 +
3849 +- if (WARN_ON_ONCE(!chanctx_conf)) {
3850 ++ if (!chanctx_conf) {
3851 + rcu_read_unlock();
3852 + return NULL;
3853 + }
3854 +diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
3855 +index 0fe91dc9817eb..437d88822d8f8 100644
3856 +--- a/net/mac80211/mlme.c
3857 ++++ b/net/mac80211/mlme.c
3858 +@@ -4062,10 +4062,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3859 + if (elems.mbssid_config_ie)
3860 + bss_conf->profile_periodicity =
3861 + elems.mbssid_config_ie->profile_periodicity;
3862 ++ else
3863 ++ bss_conf->profile_periodicity = 0;
3864 +
3865 + if (elems.ext_capab_len >= 11 &&
3866 + (elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
3867 + bss_conf->ema_ap = true;
3868 ++ else
3869 ++ bss_conf->ema_ap = false;
3870 +
3871 + /* continue assoc process */
3872 + ifmgd->assoc_data->timeout = jiffies;
3873 +@@ -5802,12 +5806,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
3874 + beacon_ies->data, beacon_ies->len);
3875 + if (elem && elem->datalen >= 3)
3876 + sdata->vif.bss_conf.profile_periodicity = elem->data[2];
3877 ++ else
3878 ++ sdata->vif.bss_conf.profile_periodicity = 0;
3879 +
3880 + elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY,
3881 + beacon_ies->data, beacon_ies->len);
3882 + if (elem && elem->datalen >= 11 &&
3883 + (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
3884 + sdata->vif.bss_conf.ema_ap = true;
3885 ++ else
3886 ++ sdata->vif.bss_conf.ema_ap = false;
3887 + } else {
3888 + assoc_data->timeout = jiffies;
3889 + assoc_data->timeout_started = true;
3890 +diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
3891 +index 59de7a86599dc..cb5cbf02dbac9 100644
3892 +--- a/net/mac80211/rx.c
3893 ++++ b/net/mac80211/rx.c
3894 +@@ -2239,17 +2239,15 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
3895 + sc = le16_to_cpu(hdr->seq_ctrl);
3896 + frag = sc & IEEE80211_SCTL_FRAG;
3897 +
3898 +- if (is_multicast_ether_addr(hdr->addr1)) {
3899 +- I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount);
3900 +- goto out_no_led;
3901 +- }
3902 +-
3903 + if (rx->sta)
3904 + cache = &rx->sta->frags;
3905 +
3906 + if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
3907 + goto out;
3908 +
3909 ++ if (is_multicast_ether_addr(hdr->addr1))
3910 ++ return RX_DROP_MONITOR;
3911 ++
3912 + I802_DEBUG_INC(rx->local->rx_handlers_fragments);
3913 +
3914 + if (skb_linearize(rx->skb))
3915 +@@ -2375,7 +2373,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
3916 +
3917 + out:
3918 + ieee80211_led_rx(rx->local);
3919 +- out_no_led:
3920 + if (rx->sta)
3921 + rx->sta->rx_stats.packets++;
3922 + return RX_CONTINUE;
3923 +diff --git a/net/mac80211/util.c b/net/mac80211/util.c
3924 +index 53755a05f73b5..06342693799eb 100644
3925 +--- a/net/mac80211/util.c
3926 ++++ b/net/mac80211/util.c
3927 +@@ -955,7 +955,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
3928 +
3929 + switch (elem->data[0]) {
3930 + case WLAN_EID_EXT_HE_MU_EDCA:
3931 +- if (len == sizeof(*elems->mu_edca_param_set)) {
3932 ++ if (len >= sizeof(*elems->mu_edca_param_set)) {
3933 + elems->mu_edca_param_set = data;
3934 + if (crc)
3935 + *crc = crc32_be(*crc, (void *)elem,
3936 +@@ -976,7 +976,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
3937 + }
3938 + break;
3939 + case WLAN_EID_EXT_UORA:
3940 +- if (len == 1)
3941 ++ if (len >= 1)
3942 + elems->uora_element = data;
3943 + break;
3944 + case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME:
3945 +@@ -984,7 +984,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
3946 + elems->max_channel_switch_time = data;
3947 + break;
3948 + case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION:
3949 +- if (len == sizeof(*elems->mbssid_config_ie))
3950 ++ if (len >= sizeof(*elems->mbssid_config_ie))
3951 + elems->mbssid_config_ie = data;
3952 + break;
3953 + case WLAN_EID_EXT_HE_SPR:
3954 +@@ -993,7 +993,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
3955 + elems->he_spr = data;
3956 + break;
3957 + case WLAN_EID_EXT_HE_6GHZ_CAPA:
3958 +- if (len == sizeof(*elems->he_6ghz_capa))
3959 ++ if (len >= sizeof(*elems->he_6ghz_capa))
3960 + elems->he_6ghz_capa = data;
3961 + break;
3962 + }
3963 +@@ -1082,14 +1082,14 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
3964 +
3965 + switch (id) {
3966 + case WLAN_EID_LINK_ID:
3967 +- if (elen + 2 != sizeof(struct ieee80211_tdls_lnkie)) {
3968 ++ if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) {
3969 + elem_parse_failed = true;
3970 + break;
3971 + }
3972 + elems->lnk_id = (void *)(pos - 2);
3973 + break;
3974 + case WLAN_EID_CHAN_SWITCH_TIMING:
3975 +- if (elen != sizeof(struct ieee80211_ch_switch_timing)) {
3976 ++ if (elen < sizeof(struct ieee80211_ch_switch_timing)) {
3977 + elem_parse_failed = true;
3978 + break;
3979 + }
3980 +@@ -1252,7 +1252,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
3981 + elems->sec_chan_offs = (void *)pos;
3982 + break;
3983 + case WLAN_EID_CHAN_SWITCH_PARAM:
3984 +- if (elen !=
3985 ++ if (elen <
3986 + sizeof(*elems->mesh_chansw_params_ie)) {
3987 + elem_parse_failed = true;
3988 + break;
3989 +@@ -1261,7 +1261,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
3990 + break;
3991 + case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
3992 + if (!action ||
3993 +- elen != sizeof(*elems->wide_bw_chansw_ie)) {
3994 ++ elen < sizeof(*elems->wide_bw_chansw_ie)) {
3995 + elem_parse_failed = true;
3996 + break;
3997 + }
3998 +@@ -1280,7 +1280,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
3999 + ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH,
4000 + pos, elen);
4001 + if (ie) {
4002 +- if (ie[1] == sizeof(*elems->wide_bw_chansw_ie))
4003 ++ if (ie[1] >= sizeof(*elems->wide_bw_chansw_ie))
4004 + elems->wide_bw_chansw_ie =
4005 + (void *)(ie + 2);
4006 + else
4007 +@@ -1324,7 +1324,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
4008 + elems->cisco_dtpc_elem = pos;
4009 + break;
4010 + case WLAN_EID_ADDBA_EXT:
4011 +- if (elen != sizeof(struct ieee80211_addba_ext_ie)) {
4012 ++ if (elen < sizeof(struct ieee80211_addba_ext_ie)) {
4013 + elem_parse_failed = true;
4014 + break;
4015 + }
4016 +@@ -1350,7 +1350,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
4017 + elem, elems);
4018 + break;
4019 + case WLAN_EID_S1G_CAPABILITIES:
4020 +- if (elen == sizeof(*elems->s1g_capab))
4021 ++ if (elen >= sizeof(*elems->s1g_capab))
4022 + elems->s1g_capab = (void *)pos;
4023 + else
4024 + elem_parse_failed = true;
4025 +diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
4026 +index c52557ec7fb33..68a4dd2512427 100644
4027 +--- a/net/packet/af_packet.c
4028 ++++ b/net/packet/af_packet.c
4029 +@@ -2683,7 +2683,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
4030 + }
4031 + if (likely(saddr == NULL)) {
4032 + dev = packet_cached_dev_get(po);
4033 +- proto = po->num;
4034 ++ proto = READ_ONCE(po->num);
4035 + } else {
4036 + err = -EINVAL;
4037 + if (msg->msg_namelen < sizeof(struct sockaddr_ll))
4038 +@@ -2896,7 +2896,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
4039 +
4040 + if (likely(saddr == NULL)) {
4041 + dev = packet_cached_dev_get(po);
4042 +- proto = po->num;
4043 ++ proto = READ_ONCE(po->num);
4044 + } else {
4045 + err = -EINVAL;
4046 + if (msg->msg_namelen < sizeof(struct sockaddr_ll))
4047 +@@ -3034,10 +3034,13 @@ static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
4048 + struct sock *sk = sock->sk;
4049 + struct packet_sock *po = pkt_sk(sk);
4050 +
4051 +- if (po->tx_ring.pg_vec)
4052 ++ /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy.
4053 ++ * tpacket_snd() will redo the check safely.
4054 ++ */
4055 ++ if (data_race(po->tx_ring.pg_vec))
4056 + return tpacket_snd(po, msg);
4057 +- else
4058 +- return packet_snd(sock, msg, len);
4059 ++
4060 ++ return packet_snd(sock, msg, len);
4061 + }
4062 +
4063 + /*
4064 +@@ -3168,7 +3171,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
4065 + /* prevents packet_notifier() from calling
4066 + * register_prot_hook()
4067 + */
4068 +- po->num = 0;
4069 ++ WRITE_ONCE(po->num, 0);
4070 + __unregister_prot_hook(sk, true);
4071 + rcu_read_lock();
4072 + dev_curr = po->prot_hook.dev;
4073 +@@ -3178,17 +3181,17 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
4074 + }
4075 +
4076 + BUG_ON(po->running);
4077 +- po->num = proto;
4078 ++ WRITE_ONCE(po->num, proto);
4079 + po->prot_hook.type = proto;
4080 +
4081 + if (unlikely(unlisted)) {
4082 + dev_put(dev);
4083 + po->prot_hook.dev = NULL;
4084 +- po->ifindex = -1;
4085 ++ WRITE_ONCE(po->ifindex, -1);
4086 + packet_cached_dev_reset(po);
4087 + } else {
4088 + po->prot_hook.dev = dev;
4089 +- po->ifindex = dev ? dev->ifindex : 0;
4090 ++ WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
4091 + packet_cached_dev_assign(po, dev);
4092 + }
4093 + }
4094 +@@ -3502,7 +3505,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
4095 + uaddr->sa_family = AF_PACKET;
4096 + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
4097 + rcu_read_lock();
4098 +- dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
4099 ++ dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
4100 + if (dev)
4101 + strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
4102 + rcu_read_unlock();
4103 +@@ -3517,16 +3520,18 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
4104 + struct sock *sk = sock->sk;
4105 + struct packet_sock *po = pkt_sk(sk);
4106 + DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
4107 ++ int ifindex;
4108 +
4109 + if (peer)
4110 + return -EOPNOTSUPP;
4111 +
4112 ++ ifindex = READ_ONCE(po->ifindex);
4113 + sll->sll_family = AF_PACKET;
4114 +- sll->sll_ifindex = po->ifindex;
4115 +- sll->sll_protocol = po->num;
4116 ++ sll->sll_ifindex = ifindex;
4117 ++ sll->sll_protocol = READ_ONCE(po->num);
4118 + sll->sll_pkttype = 0;
4119 + rcu_read_lock();
4120 +- dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
4121 ++ dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
4122 + if (dev) {
4123 + sll->sll_hatype = dev->type;
4124 + sll->sll_halen = dev->addr_len;
4125 +@@ -4105,7 +4110,7 @@ static int packet_notifier(struct notifier_block *this,
4126 + }
4127 + if (msg == NETDEV_UNREGISTER) {
4128 + packet_cached_dev_reset(po);
4129 +- po->ifindex = -1;
4130 ++ WRITE_ONCE(po->ifindex, -1);
4131 + if (po->prot_hook.dev)
4132 + dev_put(po->prot_hook.dev);
4133 + po->prot_hook.dev = NULL;
4134 +@@ -4411,7 +4416,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4135 + was_running = po->running;
4136 + num = po->num;
4137 + if (was_running) {
4138 +- po->num = 0;
4139 ++ WRITE_ONCE(po->num, 0);
4140 + __unregister_prot_hook(sk, false);
4141 + }
4142 + spin_unlock(&po->bind_lock);
4143 +@@ -4446,7 +4451,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4144 +
4145 + spin_lock(&po->bind_lock);
4146 + if (was_running) {
4147 +- po->num = num;
4148 ++ WRITE_ONCE(po->num, num);
4149 + register_prot_hook(sk);
4150 + }
4151 + spin_unlock(&po->bind_lock);
4152 +@@ -4616,8 +4621,8 @@ static int packet_seq_show(struct seq_file *seq, void *v)
4153 + s,
4154 + refcount_read(&s->sk_refcnt),
4155 + s->sk_type,
4156 +- ntohs(po->num),
4157 +- po->ifindex,
4158 ++ ntohs(READ_ONCE(po->num)),
4159 ++ READ_ONCE(po->ifindex),
4160 + po->running,
4161 + atomic_read(&s->sk_rmem_alloc),
4162 + from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
4163 +diff --git a/net/wireless/util.c b/net/wireless/util.c
4164 +index f342b61476754..726e7d2342bd5 100644
4165 +--- a/net/wireless/util.c
4166 ++++ b/net/wireless/util.c
4167 +@@ -1059,6 +1059,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
4168 + case NL80211_IFTYPE_MESH_POINT:
4169 + /* mesh should be handled? */
4170 + break;
4171 ++ case NL80211_IFTYPE_OCB:
4172 ++ cfg80211_leave_ocb(rdev, dev);
4173 ++ break;
4174 + default:
4175 + break;
4176 + }
4177 +diff --git a/scripts/Makefile b/scripts/Makefile
4178 +index c36106bce80ee..9adb6d247818f 100644
4179 +--- a/scripts/Makefile
4180 ++++ b/scripts/Makefile
4181 +@@ -14,6 +14,7 @@ hostprogs-always-$(CONFIG_ASN1) += asn1_compiler
4182 + hostprogs-always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file
4183 + hostprogs-always-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert
4184 + hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert
4185 ++hostprogs-always-$(CONFIG_SYSTEM_REVOCATION_LIST) += extract-cert
4186 +
4187 + HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include
4188 + HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include
4189 +diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h
4190 +index f9b19524da112..1e9baa5c4fc6e 100644
4191 +--- a/scripts/recordmcount.h
4192 ++++ b/scripts/recordmcount.h
4193 +@@ -192,15 +192,20 @@ static unsigned int get_symindex(Elf_Sym const *sym, Elf32_Word const *symtab,
4194 + Elf32_Word const *symtab_shndx)
4195 + {
4196 + unsigned long offset;
4197 ++ unsigned short shndx = w2(sym->st_shndx);
4198 + int index;
4199 +
4200 +- if (sym->st_shndx != SHN_XINDEX)
4201 +- return w2(sym->st_shndx);
4202 ++ if (shndx > SHN_UNDEF && shndx < SHN_LORESERVE)
4203 ++ return shndx;
4204 +
4205 +- offset = (unsigned long)sym - (unsigned long)symtab;
4206 +- index = offset / sizeof(*sym);
4207 ++ if (shndx == SHN_XINDEX) {
4208 ++ offset = (unsigned long)sym - (unsigned long)symtab;
4209 ++ index = offset / sizeof(*sym);
4210 +
4211 +- return w(symtab_shndx[index]);
4212 ++ return w(symtab_shndx[index]);
4213 ++ }
4214 ++
4215 ++ return 0;
4216 + }
4217 +
4218 + static unsigned int get_shnum(Elf_Ehdr const *ehdr, Elf_Shdr const *shdr0)
4219 +diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c
4220 +index c5ba695c10e3a..5604bd57c9907 100644
4221 +--- a/security/integrity/platform_certs/keyring_handler.c
4222 ++++ b/security/integrity/platform_certs/keyring_handler.c
4223 +@@ -55,6 +55,15 @@ static __init void uefi_blacklist_binary(const char *source,
4224 + uefi_blacklist_hash(source, data, len, "bin:", 4);
4225 + }
4226 +
4227 ++/*
4228 ++ * Add an X509 cert to the revocation list.
4229 ++ */
4230 ++static __init void uefi_revocation_list_x509(const char *source,
4231 ++ const void *data, size_t len)
4232 ++{
4233 ++ add_key_to_revocation_list(data, len);
4234 ++}
4235 ++
4236 + /*
4237 + * Return the appropriate handler for particular signature list types found in
4238 + * the UEFI db and MokListRT tables.
4239 +@@ -76,5 +85,7 @@ __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type)
4240 + return uefi_blacklist_x509_tbs;
4241 + if (efi_guidcmp(*sig_type, efi_cert_sha256_guid) == 0)
4242 + return uefi_blacklist_binary;
4243 ++ if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0)
4244 ++ return uefi_revocation_list_x509;
4245 + return 0;
4246 + }
4247 +diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c
4248 +index ee4b4c666854f..f290f78c3f301 100644
4249 +--- a/security/integrity/platform_certs/load_uefi.c
4250 ++++ b/security/integrity/platform_certs/load_uefi.c
4251 +@@ -132,8 +132,9 @@ static int __init load_moklist_certs(void)
4252 + static int __init load_uefi_certs(void)
4253 + {
4254 + efi_guid_t secure_var = EFI_IMAGE_SECURITY_DATABASE_GUID;
4255 +- void *db = NULL, *dbx = NULL;
4256 +- unsigned long dbsize = 0, dbxsize = 0;
4257 ++ efi_guid_t mok_var = EFI_SHIM_LOCK_GUID;
4258 ++ void *db = NULL, *dbx = NULL, *mokx = NULL;
4259 ++ unsigned long dbsize = 0, dbxsize = 0, mokxsize = 0;
4260 + efi_status_t status;
4261 + int rc = 0;
4262 +
4263 +@@ -175,6 +176,21 @@ static int __init load_uefi_certs(void)
4264 + kfree(dbx);
4265 + }
4266 +
4267 ++ mokx = get_cert_list(L"MokListXRT", &mok_var, &mokxsize, &status);
4268 ++ if (!mokx) {
4269 ++ if (status == EFI_NOT_FOUND)
4270 ++ pr_debug("mokx variable wasn't found\n");
4271 ++ else
4272 ++ pr_info("Couldn't get mokx list\n");
4273 ++ } else {
4274 ++ rc = parse_efi_signature_list("UEFI:MokListXRT",
4275 ++ mokx, mokxsize,
4276 ++ get_handler_for_dbx);
4277 ++ if (rc)
4278 ++ pr_err("Couldn't parse mokx signatures %d\n", rc);
4279 ++ kfree(mokx);
4280 ++ }
4281 ++
4282 + /* Load the MokListRT certs */
4283 + rc = load_moklist_certs();
4284 +
4285 +diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
4286 +index 58b5a349d3baf..ea3158b0d551d 100644
4287 +--- a/tools/testing/selftests/bpf/test_verifier.c
4288 ++++ b/tools/testing/selftests/bpf/test_verifier.c
4289 +@@ -1147,7 +1147,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
4290 + }
4291 + }
4292 +
4293 +- if (test->insn_processed) {
4294 ++ if (!unpriv && test->insn_processed) {
4295 + uint32_t insn_processed;
4296 + char *proc;
4297 +
4298 +diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c
4299 +index ca8fdb1b3f015..7d7ebee5cc7a8 100644
4300 +--- a/tools/testing/selftests/bpf/verifier/and.c
4301 ++++ b/tools/testing/selftests/bpf/verifier/and.c
4302 +@@ -61,6 +61,8 @@
4303 + BPF_MOV64_IMM(BPF_REG_0, 0),
4304 + BPF_EXIT_INSN(),
4305 + },
4306 ++ .errstr_unpriv = "R1 !read_ok",
4307 ++ .result_unpriv = REJECT,
4308 + .result = ACCEPT,
4309 + .retval = 0
4310 + },
4311 +diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
4312 +index 8a1caf46ffbc3..e061e8799ce23 100644
4313 +--- a/tools/testing/selftests/bpf/verifier/bounds.c
4314 ++++ b/tools/testing/selftests/bpf/verifier/bounds.c
4315 +@@ -508,6 +508,8 @@
4316 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
4317 + BPF_EXIT_INSN(),
4318 + },
4319 ++ .errstr_unpriv = "R0 invalid mem access 'inv'",
4320 ++ .result_unpriv = REJECT,
4321 + .result = ACCEPT
4322 + },
4323 + {
4324 +@@ -528,6 +530,8 @@
4325 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
4326 + BPF_EXIT_INSN(),
4327 + },
4328 ++ .errstr_unpriv = "R0 invalid mem access 'inv'",
4329 ++ .result_unpriv = REJECT,
4330 + .result = ACCEPT
4331 + },
4332 + {
4333 +@@ -569,6 +573,8 @@
4334 + BPF_MOV64_IMM(BPF_REG_0, 0),
4335 + BPF_EXIT_INSN(),
4336 + },
4337 ++ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
4338 ++ .result_unpriv = REJECT,
4339 + .fixup_map_hash_8b = { 3 },
4340 + .result = ACCEPT,
4341 + },
4342 +@@ -589,6 +595,8 @@
4343 + BPF_MOV64_IMM(BPF_REG_0, 0),
4344 + BPF_EXIT_INSN(),
4345 + },
4346 ++ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
4347 ++ .result_unpriv = REJECT,
4348 + .fixup_map_hash_8b = { 3 },
4349 + .result = ACCEPT,
4350 + },
4351 +@@ -609,6 +617,8 @@
4352 + BPF_MOV64_IMM(BPF_REG_0, 0),
4353 + BPF_EXIT_INSN(),
4354 + },
4355 ++ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
4356 ++ .result_unpriv = REJECT,
4357 + .fixup_map_hash_8b = { 3 },
4358 + .result = ACCEPT,
4359 + },
4360 +@@ -674,6 +684,8 @@
4361 + BPF_MOV64_IMM(BPF_REG_0, 0),
4362 + BPF_EXIT_INSN(),
4363 + },
4364 ++ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
4365 ++ .result_unpriv = REJECT,
4366 + .fixup_map_hash_8b = { 3 },
4367 + .result = ACCEPT,
4368 + },
4369 +@@ -695,6 +707,8 @@
4370 + BPF_MOV64_IMM(BPF_REG_0, 0),
4371 + BPF_EXIT_INSN(),
4372 + },
4373 ++ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
4374 ++ .result_unpriv = REJECT,
4375 + .fixup_map_hash_8b = { 3 },
4376 + .result = ACCEPT,
4377 + },
4378 +diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
4379 +index 5cf361d8eb1cc..721ec9391be5a 100644
4380 +--- a/tools/testing/selftests/bpf/verifier/dead_code.c
4381 ++++ b/tools/testing/selftests/bpf/verifier/dead_code.c
4382 +@@ -8,6 +8,8 @@
4383 + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4),
4384 + BPF_EXIT_INSN(),
4385 + },
4386 ++ .errstr_unpriv = "R9 !read_ok",
4387 ++ .result_unpriv = REJECT,
4388 + .result = ACCEPT,
4389 + .retval = 7,
4390 + },
4391 +diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
4392 +index bd5cae4a7f733..1c857b2fbdf0a 100644
4393 +--- a/tools/testing/selftests/bpf/verifier/jmp32.c
4394 ++++ b/tools/testing/selftests/bpf/verifier/jmp32.c
4395 +@@ -87,6 +87,8 @@
4396 + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
4397 + BPF_EXIT_INSN(),
4398 + },
4399 ++ .errstr_unpriv = "R9 !read_ok",
4400 ++ .result_unpriv = REJECT,
4401 + .result = ACCEPT,
4402 + },
4403 + {
4404 +@@ -150,6 +152,8 @@
4405 + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
4406 + BPF_EXIT_INSN(),
4407 + },
4408 ++ .errstr_unpriv = "R9 !read_ok",
4409 ++ .result_unpriv = REJECT,
4410 + .result = ACCEPT,
4411 + },
4412 + {
4413 +@@ -213,6 +217,8 @@
4414 + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
4415 + BPF_EXIT_INSN(),
4416 + },
4417 ++ .errstr_unpriv = "R9 !read_ok",
4418 ++ .result_unpriv = REJECT,
4419 + .result = ACCEPT,
4420 + },
4421 + {
4422 +@@ -280,6 +286,8 @@
4423 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
4424 + BPF_EXIT_INSN(),
4425 + },
4426 ++ .errstr_unpriv = "R0 invalid mem access 'inv'",
4427 ++ .result_unpriv = REJECT,
4428 + .result = ACCEPT,
4429 + .retval = 2,
4430 + },
4431 +@@ -348,6 +356,8 @@
4432 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
4433 + BPF_EXIT_INSN(),
4434 + },
4435 ++ .errstr_unpriv = "R0 invalid mem access 'inv'",
4436 ++ .result_unpriv = REJECT,
4437 + .result = ACCEPT,
4438 + .retval = 2,
4439 + },
4440 +@@ -416,6 +426,8 @@
4441 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
4442 + BPF_EXIT_INSN(),
4443 + },
4444 ++ .errstr_unpriv = "R0 invalid mem access 'inv'",
4445 ++ .result_unpriv = REJECT,
4446 + .result = ACCEPT,
4447 + .retval = 2,
4448 + },
4449 +@@ -484,6 +496,8 @@
4450 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
4451 + BPF_EXIT_INSN(),
4452 + },
4453 ++ .errstr_unpriv = "R0 invalid mem access 'inv'",
4454 ++ .result_unpriv = REJECT,
4455 + .result = ACCEPT,
4456 + .retval = 2,
4457 + },
4458 +@@ -552,6 +566,8 @@
4459 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
4460 + BPF_EXIT_INSN(),
4461 + },
4462 ++ .errstr_unpriv = "R0 invalid mem access 'inv'",
4463 ++ .result_unpriv = REJECT,
4464 + .result = ACCEPT,
4465 + .retval = 2,
4466 + },
4467 +@@ -620,6 +636,8 @@
4468 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
4469 + BPF_EXIT_INSN(),
4470 + },
4471 ++ .errstr_unpriv = "R0 invalid mem access 'inv'",
4472 ++ .result_unpriv = REJECT,
4473 + .result = ACCEPT,
4474 + .retval = 2,
4475 + },
4476 +@@ -688,6 +706,8 @@
4477 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
4478 + BPF_EXIT_INSN(),
4479 + },
4480 ++ .errstr_unpriv = "R0 invalid mem access 'inv'",
4481 ++ .result_unpriv = REJECT,
4482 + .result = ACCEPT,
4483 + .retval = 2,
4484 + },
4485 +@@ -756,6 +776,8 @@
4486 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
4487 + BPF_EXIT_INSN(),
4488 + },
4489 ++ .errstr_unpriv = "R0 invalid mem access 'inv'",
4490 ++ .result_unpriv = REJECT,
4491 + .result = ACCEPT,
4492 + .retval = 2,
4493 + },
4494 +diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c
4495 +index 8dcd4e0383d57..11fc68da735ea 100644
4496 +--- a/tools/testing/selftests/bpf/verifier/jset.c
4497 ++++ b/tools/testing/selftests/bpf/verifier/jset.c
4498 +@@ -82,8 +82,8 @@
4499 + BPF_EXIT_INSN(),
4500 + },
4501 + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
4502 +- .retval_unpriv = 1,
4503 +- .result_unpriv = ACCEPT,
4504 ++ .errstr_unpriv = "R9 !read_ok",
4505 ++ .result_unpriv = REJECT,
4506 + .retval = 1,
4507 + .result = ACCEPT,
4508 + },
4509 +@@ -141,7 +141,8 @@
4510 + BPF_EXIT_INSN(),
4511 + },
4512 + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
4513 +- .result_unpriv = ACCEPT,
4514 ++ .errstr_unpriv = "R9 !read_ok",
4515 ++ .result_unpriv = REJECT,
4516 + .result = ACCEPT,
4517 + },
4518 + {
4519 +@@ -162,6 +163,7 @@
4520 + BPF_EXIT_INSN(),
4521 + },
4522 + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
4523 +- .result_unpriv = ACCEPT,
4524 ++ .errstr_unpriv = "R9 !read_ok",
4525 ++ .result_unpriv = REJECT,
4526 + .result = ACCEPT,
4527 + },
4528 +diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
4529 +index bd436df5cc326..111801aea5e35 100644
4530 +--- a/tools/testing/selftests/bpf/verifier/unpriv.c
4531 ++++ b/tools/testing/selftests/bpf/verifier/unpriv.c
4532 +@@ -420,6 +420,8 @@
4533 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
4534 + BPF_EXIT_INSN(),
4535 + },
4536 ++ .errstr_unpriv = "R7 invalid mem access 'inv'",
4537 ++ .result_unpriv = REJECT,
4538 + .result = ACCEPT,
4539 + .retval = 0,
4540 + },
4541 +diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
4542 +index 7ae2859d495c5..a3e593ddfafc9 100644
4543 +--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
4544 ++++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
4545 +@@ -120,7 +120,7 @@
4546 + .fixup_map_array_48b = { 1 },
4547 + .result = ACCEPT,
4548 + .result_unpriv = REJECT,
4549 +- .errstr_unpriv = "R2 tried to add from different maps, paths or scalars",
4550 ++ .errstr_unpriv = "R2 pointer comparison prohibited",
4551 + .retval = 0,
4552 + },
4553 + {
4554 +@@ -159,7 +159,8 @@
4555 + BPF_MOV64_IMM(BPF_REG_0, 0),
4556 + BPF_EXIT_INSN(),
4557 + // fake-dead code; targeted from branch A to
4558 +- // prevent dead code sanitization
4559 ++ // prevent dead code sanitization, rejected
4560 ++ // via branch B however
4561 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
4562 + BPF_MOV64_IMM(BPF_REG_0, 0),
4563 + BPF_EXIT_INSN(),
4564 +@@ -167,7 +168,7 @@
4565 + .fixup_map_array_48b = { 1 },
4566 + .result = ACCEPT,
4567 + .result_unpriv = REJECT,
4568 +- .errstr_unpriv = "R2 tried to add from different maps, paths or scalars",
4569 ++ .errstr_unpriv = "R0 invalid mem access 'inv'",
4570 + .retval = 0,
4571 + },
4572 + {
4573 +diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
4574 +index 2f0e4365f61bd..8b90256bca96d 100644
4575 +--- a/tools/testing/selftests/kvm/lib/kvm_util.c
4576 ++++ b/tools/testing/selftests/kvm/lib/kvm_util.c
4577 +@@ -58,7 +58,7 @@ int kvm_check_cap(long cap)
4578 + exit(KSFT_SKIP);
4579 +
4580 + ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
4581 +- TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
4582 ++ TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
4583 + " rc: %i errno: %i", ret, errno);
4584 +
4585 + close(kvm_fd);
4586 +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
4587 +index 5cabc6c748db1..4cce5735271ef 100644
4588 +--- a/virt/kvm/kvm_main.c
4589 ++++ b/virt/kvm/kvm_main.c
4590 +@@ -1919,6 +1919,13 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
4591 + return true;
4592 + }
4593 +
4594 ++static int kvm_try_get_pfn(kvm_pfn_t pfn)
4595 ++{
4596 ++ if (kvm_is_reserved_pfn(pfn))
4597 ++ return 1;
4598 ++ return get_page_unless_zero(pfn_to_page(pfn));
4599 ++}
4600 ++
4601 + static int hva_to_pfn_remapped(struct vm_area_struct *vma,
4602 + unsigned long addr, bool *async,
4603 + bool write_fault, bool *writable,
4604 +@@ -1968,13 +1975,21 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
4605 + * Whoever called remap_pfn_range is also going to call e.g.
4606 + * unmap_mapping_range before the underlying pages are freed,
4607 + * causing a call to our MMU notifier.
4608 ++ *
4609 ++ * Certain IO or PFNMAP mappings can be backed with valid
4610 ++ * struct pages, but be allocated without refcounting e.g.,
4611 ++ * tail pages of non-compound higher order allocations, which
4612 ++ * would then underflow the refcount when the caller does the
4613 ++ * required put_page. Don't allow those pages here.
4614 + */
4615 +- kvm_get_pfn(pfn);
4616 ++ if (!kvm_try_get_pfn(pfn))
4617 ++ r = -EFAULT;
4618 +
4619 + out:
4620 + pte_unmap_unlock(ptep, ptl);
4621 + *p_pfn = pfn;
4622 +- return 0;
4623 ++
4624 ++ return r;
4625 + }
4626 +
4627 + /*