Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:5.8 commit in: /
Date: Sun, 01 Nov 2020 20:32:23
Message-Id: 1604262722.5c3df6341bbcb452808a06a1104d6616e72143d0.mpagano@gentoo
1 commit: 5c3df6341bbcb452808a06a1104d6616e72143d0
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Sun Nov 1 20:32:02 2020 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Sun Nov 1 20:32:02 2020 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=5c3df634
7
8 Linux patch 5.8.18
9
10 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>
11
12 0000_README | 4 +
13 1017_linux-5.8.18.patch | 5442 +++++++++++++++++++++++++++++++++++++++++++++++
14 2 files changed, 5446 insertions(+)
15
16 diff --git a/0000_README b/0000_README
17 index 333aabc..a90cff2 100644
18 --- a/0000_README
19 +++ b/0000_README
20 @@ -111,6 +111,10 @@ Patch: 1016_linux-5.8.17.patch
21 From: http://www.kernel.org
22 Desc: Linux 5.8.17
23
24 +Patch: 1017_linux-5.8.18.patch
25 +From: http://www.kernel.org
26 +Desc: Linux 5.8.18
27 +
28 Patch: 1500_XATTR_USER_PREFIX.patch
29 From: https://bugs.gentoo.org/show_bug.cgi?id=470644
30 Desc: Support for namespace user.pax.* on tmpfs.
31
32 diff --git a/1017_linux-5.8.18.patch b/1017_linux-5.8.18.patch
33 new file mode 100644
34 index 0000000..473975b
35 --- /dev/null
36 +++ b/1017_linux-5.8.18.patch
37 @@ -0,0 +1,5442 @@
38 +diff --git a/Makefile b/Makefile
39 +index 9bdb93053ee93..33c45a0cd8582 100644
40 +--- a/Makefile
41 ++++ b/Makefile
42 +@@ -1,7 +1,7 @@
43 + # SPDX-License-Identifier: GPL-2.0
44 + VERSION = 5
45 + PATCHLEVEL = 8
46 +-SUBLEVEL = 17
47 ++SUBLEVEL = 18
48 + EXTRAVERSION =
49 + NAME = Kleptomaniac Octopus
50 +
51 +diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
52 +index d5fe7c9e0be1d..5a34423464188 100644
53 +--- a/arch/arm64/Makefile
54 ++++ b/arch/arm64/Makefile
55 +@@ -10,14 +10,14 @@
56 + #
57 + # Copyright (C) 1995-2001 by Russell King
58 +
59 +-LDFLAGS_vmlinux :=--no-undefined -X
60 ++LDFLAGS_vmlinux :=--no-undefined -X -z norelro
61 + CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
62 +
63 + ifeq ($(CONFIG_RELOCATABLE), y)
64 + # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour
65 + # for relative relocs, since this leads to better Image compression
66 + # with the relocation offsets always being zero.
67 +-LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro \
68 ++LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \
69 + $(call ld-option, --no-apply-dynamic-relocs)
70 + endif
71 +
72 +diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
73 +index 6e8a7eec667e8..d8a2bacf4e0a8 100644
74 +--- a/arch/arm64/kernel/cpu_errata.c
75 ++++ b/arch/arm64/kernel/cpu_errata.c
76 +@@ -457,6 +457,12 @@ out_printmsg:
77 + return required;
78 + }
79 +
80 ++static void cpu_enable_ssbd_mitigation(const struct arm64_cpu_capabilities *cap)
81 ++{
82 ++ if (ssbd_state != ARM64_SSBD_FORCE_DISABLE)
83 ++ cap->matches(cap, SCOPE_LOCAL_CPU);
84 ++}
85 ++
86 + /* known invulnerable cores */
87 + static const struct midr_range arm64_ssb_cpus[] = {
88 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
89 +@@ -599,6 +605,12 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
90 + return (need_wa > 0);
91 + }
92 +
93 ++static void
94 ++cpu_enable_branch_predictor_hardening(const struct arm64_cpu_capabilities *cap)
95 ++{
96 ++ cap->matches(cap, SCOPE_LOCAL_CPU);
97 ++}
98 ++
99 + static const __maybe_unused struct midr_range tx2_family_cpus[] = {
100 + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
101 + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
102 +@@ -890,9 +902,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
103 + },
104 + #endif
105 + {
106 ++ .desc = "Branch predictor hardening",
107 + .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
108 + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
109 + .matches = check_branch_predictor,
110 ++ .cpu_enable = cpu_enable_branch_predictor_hardening,
111 + },
112 + #ifdef CONFIG_HARDEN_EL2_VECTORS
113 + {
114 +@@ -906,6 +920,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
115 + .capability = ARM64_SSBD,
116 + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
117 + .matches = has_ssbd_mitigation,
118 ++ .cpu_enable = cpu_enable_ssbd_mitigation,
119 + .midr_range_list = arm64_ssb_cpus,
120 + },
121 + #ifdef CONFIG_ARM64_ERRATUM_1418040
122 +diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h
123 +index 17c24f14615fb..6839f8fcf76b2 100644
124 +--- a/arch/openrisc/include/asm/uaccess.h
125 ++++ b/arch/openrisc/include/asm/uaccess.h
126 +@@ -164,19 +164,19 @@ struct __large_struct {
127 +
128 + #define __get_user_nocheck(x, ptr, size) \
129 + ({ \
130 +- long __gu_err, __gu_val; \
131 +- __get_user_size(__gu_val, (ptr), (size), __gu_err); \
132 +- (x) = (__force __typeof__(*(ptr)))__gu_val; \
133 ++ long __gu_err; \
134 ++ __get_user_size((x), (ptr), (size), __gu_err); \
135 + __gu_err; \
136 + })
137 +
138 + #define __get_user_check(x, ptr, size) \
139 + ({ \
140 +- long __gu_err = -EFAULT, __gu_val = 0; \
141 +- const __typeof__(*(ptr)) * __gu_addr = (ptr); \
142 +- if (access_ok(__gu_addr, size)) \
143 +- __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
144 +- (x) = (__force __typeof__(*(ptr)))__gu_val; \
145 ++ long __gu_err = -EFAULT; \
146 ++ const __typeof__(*(ptr)) *__gu_addr = (ptr); \
147 ++ if (access_ok(__gu_addr, size)) \
148 ++ __get_user_size((x), __gu_addr, (size), __gu_err); \
149 ++ else \
150 ++ (x) = (__typeof__(*(ptr))) 0; \
151 + __gu_err; \
152 + })
153 +
154 +@@ -190,11 +190,13 @@ do { \
155 + case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break; \
156 + case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break; \
157 + case 8: __get_user_asm2(x, ptr, retval); break; \
158 +- default: (x) = __get_user_bad(); \
159 ++ default: (x) = (__typeof__(*(ptr)))__get_user_bad(); \
160 + } \
161 + } while (0)
162 +
163 + #define __get_user_asm(x, addr, err, op) \
164 ++{ \
165 ++ unsigned long __gu_tmp; \
166 + __asm__ __volatile__( \
167 + "1: "op" %1,0(%2)\n" \
168 + "2:\n" \
169 +@@ -208,10 +210,14 @@ do { \
170 + " .align 2\n" \
171 + " .long 1b,3b\n" \
172 + ".previous" \
173 +- : "=r"(err), "=r"(x) \
174 +- : "r"(addr), "i"(-EFAULT), "0"(err))
175 ++ : "=r"(err), "=r"(__gu_tmp) \
176 ++ : "r"(addr), "i"(-EFAULT), "0"(err)); \
177 ++ (x) = (__typeof__(*(addr)))__gu_tmp; \
178 ++}
179 +
180 + #define __get_user_asm2(x, addr, err) \
181 ++{ \
182 ++ unsigned long long __gu_tmp; \
183 + __asm__ __volatile__( \
184 + "1: l.lwz %1,0(%2)\n" \
185 + "2: l.lwz %H1,4(%2)\n" \
186 +@@ -228,8 +234,11 @@ do { \
187 + " .long 1b,4b\n" \
188 + " .long 2b,4b\n" \
189 + ".previous" \
190 +- : "=r"(err), "=&r"(x) \
191 +- : "r"(addr), "i"(-EFAULT), "0"(err))
192 ++ : "=r"(err), "=&r"(__gu_tmp) \
193 ++ : "r"(addr), "i"(-EFAULT), "0"(err)); \
194 ++ (x) = (__typeof__(*(addr)))( \
195 ++ (__typeof__((x)-(x)))__gu_tmp); \
196 ++}
197 +
198 + /* more complex routines */
199 +
200 +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
201 +index 9fa23eb320ff5..cf78ad7ff0b7c 100644
202 +--- a/arch/powerpc/Kconfig
203 ++++ b/arch/powerpc/Kconfig
204 +@@ -135,7 +135,7 @@ config PPC
205 + select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION)
206 + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
207 + select ARCH_HAS_UACCESS_FLUSHCACHE
208 +- select ARCH_HAS_UACCESS_MCSAFE if PPC64
209 ++ select ARCH_HAS_COPY_MC if PPC64
210 + select ARCH_HAS_UBSAN_SANITIZE_ALL
211 + select ARCH_HAVE_NMI_SAFE_CMPXCHG
212 + select ARCH_KEEP_MEMBLOCK
213 +diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
214 +index b72692702f35f..9bf6dffb40900 100644
215 +--- a/arch/powerpc/include/asm/string.h
216 ++++ b/arch/powerpc/include/asm/string.h
217 +@@ -53,9 +53,7 @@ void *__memmove(void *to, const void *from, __kernel_size_t n);
218 + #ifndef CONFIG_KASAN
219 + #define __HAVE_ARCH_MEMSET32
220 + #define __HAVE_ARCH_MEMSET64
221 +-#define __HAVE_ARCH_MEMCPY_MCSAFE
222 +
223 +-extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz);
224 + extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
225 + extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
226 + extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
227 +diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
228 +index 64c04ab091123..97506441c15b1 100644
229 +--- a/arch/powerpc/include/asm/uaccess.h
230 ++++ b/arch/powerpc/include/asm/uaccess.h
231 +@@ -436,6 +436,32 @@ do { \
232 + extern unsigned long __copy_tofrom_user(void __user *to,
233 + const void __user *from, unsigned long size);
234 +
235 ++#ifdef CONFIG_ARCH_HAS_COPY_MC
236 ++unsigned long __must_check
237 ++copy_mc_generic(void *to, const void *from, unsigned long size);
238 ++
239 ++static inline unsigned long __must_check
240 ++copy_mc_to_kernel(void *to, const void *from, unsigned long size)
241 ++{
242 ++ return copy_mc_generic(to, from, size);
243 ++}
244 ++#define copy_mc_to_kernel copy_mc_to_kernel
245 ++
246 ++static inline unsigned long __must_check
247 ++copy_mc_to_user(void __user *to, const void *from, unsigned long n)
248 ++{
249 ++ if (likely(check_copy_size(from, n, true))) {
250 ++ if (access_ok(to, n)) {
251 ++ allow_write_to_user(to, n);
252 ++ n = copy_mc_generic((void *)to, from, n);
253 ++ prevent_write_to_user(to, n);
254 ++ }
255 ++ }
256 ++
257 ++ return n;
258 ++}
259 ++#endif
260 ++
261 + #ifdef __powerpc64__
262 + static inline unsigned long
263 + raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
264 +@@ -524,20 +550,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
265 + return ret;
266 + }
267 +
268 +-static __always_inline unsigned long __must_check
269 +-copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
270 +-{
271 +- if (likely(check_copy_size(from, n, true))) {
272 +- if (access_ok(to, n)) {
273 +- allow_write_to_user(to, n);
274 +- n = memcpy_mcsafe((void *)to, from, n);
275 +- prevent_write_to_user(to, n);
276 +- }
277 +- }
278 +-
279 +- return n;
280 +-}
281 +-
282 + unsigned long __arch_clear_user(void __user *addr, unsigned long size);
283 +
284 + static inline unsigned long clear_user(void __user *addr, unsigned long size)
285 +diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
286 +index 5e994cda8e401..c254f5f733a86 100644
287 +--- a/arch/powerpc/lib/Makefile
288 ++++ b/arch/powerpc/lib/Makefile
289 +@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
290 + memcpy_power7.o
291 +
292 + obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
293 +- memcpy_64.o memcpy_mcsafe_64.o
294 ++ memcpy_64.o copy_mc_64.o
295 +
296 + obj64-$(CONFIG_SMP) += locks.o
297 + obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
298 +diff --git a/arch/powerpc/lib/copy_mc_64.S b/arch/powerpc/lib/copy_mc_64.S
299 +new file mode 100644
300 +index 0000000000000..88d46c471493b
301 +--- /dev/null
302 ++++ b/arch/powerpc/lib/copy_mc_64.S
303 +@@ -0,0 +1,242 @@
304 ++/* SPDX-License-Identifier: GPL-2.0 */
305 ++/*
306 ++ * Copyright (C) IBM Corporation, 2011
307 ++ * Derived from copyuser_power7.s by Anton Blanchard <anton@××××××.com>
308 ++ * Author - Balbir Singh <bsingharora@×××××.com>
309 ++ */
310 ++#include <asm/ppc_asm.h>
311 ++#include <asm/errno.h>
312 ++#include <asm/export.h>
313 ++
314 ++ .macro err1
315 ++100:
316 ++ EX_TABLE(100b,.Ldo_err1)
317 ++ .endm
318 ++
319 ++ .macro err2
320 ++200:
321 ++ EX_TABLE(200b,.Ldo_err2)
322 ++ .endm
323 ++
324 ++ .macro err3
325 ++300: EX_TABLE(300b,.Ldone)
326 ++ .endm
327 ++
328 ++.Ldo_err2:
329 ++ ld r22,STK_REG(R22)(r1)
330 ++ ld r21,STK_REG(R21)(r1)
331 ++ ld r20,STK_REG(R20)(r1)
332 ++ ld r19,STK_REG(R19)(r1)
333 ++ ld r18,STK_REG(R18)(r1)
334 ++ ld r17,STK_REG(R17)(r1)
335 ++ ld r16,STK_REG(R16)(r1)
336 ++ ld r15,STK_REG(R15)(r1)
337 ++ ld r14,STK_REG(R14)(r1)
338 ++ addi r1,r1,STACKFRAMESIZE
339 ++.Ldo_err1:
340 ++ /* Do a byte by byte copy to get the exact remaining size */
341 ++ mtctr r7
342 ++46:
343 ++err3; lbz r0,0(r4)
344 ++ addi r4,r4,1
345 ++err3; stb r0,0(r3)
346 ++ addi r3,r3,1
347 ++ bdnz 46b
348 ++ li r3,0
349 ++ blr
350 ++
351 ++.Ldone:
352 ++ mfctr r3
353 ++ blr
354 ++
355 ++
356 ++_GLOBAL(copy_mc_generic)
357 ++ mr r7,r5
358 ++ cmpldi r5,16
359 ++ blt .Lshort_copy
360 ++
361 ++.Lcopy:
362 ++ /* Get the source 8B aligned */
363 ++ neg r6,r4
364 ++ mtocrf 0x01,r6
365 ++ clrldi r6,r6,(64-3)
366 ++
367 ++ bf cr7*4+3,1f
368 ++err1; lbz r0,0(r4)
369 ++ addi r4,r4,1
370 ++err1; stb r0,0(r3)
371 ++ addi r3,r3,1
372 ++ subi r7,r7,1
373 ++
374 ++1: bf cr7*4+2,2f
375 ++err1; lhz r0,0(r4)
376 ++ addi r4,r4,2
377 ++err1; sth r0,0(r3)
378 ++ addi r3,r3,2
379 ++ subi r7,r7,2
380 ++
381 ++2: bf cr7*4+1,3f
382 ++err1; lwz r0,0(r4)
383 ++ addi r4,r4,4
384 ++err1; stw r0,0(r3)
385 ++ addi r3,r3,4
386 ++ subi r7,r7,4
387 ++
388 ++3: sub r5,r5,r6
389 ++ cmpldi r5,128
390 ++
391 ++ mflr r0
392 ++ stdu r1,-STACKFRAMESIZE(r1)
393 ++ std r14,STK_REG(R14)(r1)
394 ++ std r15,STK_REG(R15)(r1)
395 ++ std r16,STK_REG(R16)(r1)
396 ++ std r17,STK_REG(R17)(r1)
397 ++ std r18,STK_REG(R18)(r1)
398 ++ std r19,STK_REG(R19)(r1)
399 ++ std r20,STK_REG(R20)(r1)
400 ++ std r21,STK_REG(R21)(r1)
401 ++ std r22,STK_REG(R22)(r1)
402 ++ std r0,STACKFRAMESIZE+16(r1)
403 ++
404 ++ blt 5f
405 ++ srdi r6,r5,7
406 ++ mtctr r6
407 ++
408 ++ /* Now do cacheline (128B) sized loads and stores. */
409 ++ .align 5
410 ++4:
411 ++err2; ld r0,0(r4)
412 ++err2; ld r6,8(r4)
413 ++err2; ld r8,16(r4)
414 ++err2; ld r9,24(r4)
415 ++err2; ld r10,32(r4)
416 ++err2; ld r11,40(r4)
417 ++err2; ld r12,48(r4)
418 ++err2; ld r14,56(r4)
419 ++err2; ld r15,64(r4)
420 ++err2; ld r16,72(r4)
421 ++err2; ld r17,80(r4)
422 ++err2; ld r18,88(r4)
423 ++err2; ld r19,96(r4)
424 ++err2; ld r20,104(r4)
425 ++err2; ld r21,112(r4)
426 ++err2; ld r22,120(r4)
427 ++ addi r4,r4,128
428 ++err2; std r0,0(r3)
429 ++err2; std r6,8(r3)
430 ++err2; std r8,16(r3)
431 ++err2; std r9,24(r3)
432 ++err2; std r10,32(r3)
433 ++err2; std r11,40(r3)
434 ++err2; std r12,48(r3)
435 ++err2; std r14,56(r3)
436 ++err2; std r15,64(r3)
437 ++err2; std r16,72(r3)
438 ++err2; std r17,80(r3)
439 ++err2; std r18,88(r3)
440 ++err2; std r19,96(r3)
441 ++err2; std r20,104(r3)
442 ++err2; std r21,112(r3)
443 ++err2; std r22,120(r3)
444 ++ addi r3,r3,128
445 ++ subi r7,r7,128
446 ++ bdnz 4b
447 ++
448 ++ clrldi r5,r5,(64-7)
449 ++
450 ++ /* Up to 127B to go */
451 ++5: srdi r6,r5,4
452 ++ mtocrf 0x01,r6
453 ++
454 ++6: bf cr7*4+1,7f
455 ++err2; ld r0,0(r4)
456 ++err2; ld r6,8(r4)
457 ++err2; ld r8,16(r4)
458 ++err2; ld r9,24(r4)
459 ++err2; ld r10,32(r4)
460 ++err2; ld r11,40(r4)
461 ++err2; ld r12,48(r4)
462 ++err2; ld r14,56(r4)
463 ++ addi r4,r4,64
464 ++err2; std r0,0(r3)
465 ++err2; std r6,8(r3)
466 ++err2; std r8,16(r3)
467 ++err2; std r9,24(r3)
468 ++err2; std r10,32(r3)
469 ++err2; std r11,40(r3)
470 ++err2; std r12,48(r3)
471 ++err2; std r14,56(r3)
472 ++ addi r3,r3,64
473 ++ subi r7,r7,64
474 ++
475 ++7: ld r14,STK_REG(R14)(r1)
476 ++ ld r15,STK_REG(R15)(r1)
477 ++ ld r16,STK_REG(R16)(r1)
478 ++ ld r17,STK_REG(R17)(r1)
479 ++ ld r18,STK_REG(R18)(r1)
480 ++ ld r19,STK_REG(R19)(r1)
481 ++ ld r20,STK_REG(R20)(r1)
482 ++ ld r21,STK_REG(R21)(r1)
483 ++ ld r22,STK_REG(R22)(r1)
484 ++ addi r1,r1,STACKFRAMESIZE
485 ++
486 ++ /* Up to 63B to go */
487 ++ bf cr7*4+2,8f
488 ++err1; ld r0,0(r4)
489 ++err1; ld r6,8(r4)
490 ++err1; ld r8,16(r4)
491 ++err1; ld r9,24(r4)
492 ++ addi r4,r4,32
493 ++err1; std r0,0(r3)
494 ++err1; std r6,8(r3)
495 ++err1; std r8,16(r3)
496 ++err1; std r9,24(r3)
497 ++ addi r3,r3,32
498 ++ subi r7,r7,32
499 ++
500 ++ /* Up to 31B to go */
501 ++8: bf cr7*4+3,9f
502 ++err1; ld r0,0(r4)
503 ++err1; ld r6,8(r4)
504 ++ addi r4,r4,16
505 ++err1; std r0,0(r3)
506 ++err1; std r6,8(r3)
507 ++ addi r3,r3,16
508 ++ subi r7,r7,16
509 ++
510 ++9: clrldi r5,r5,(64-4)
511 ++
512 ++ /* Up to 15B to go */
513 ++.Lshort_copy:
514 ++ mtocrf 0x01,r5
515 ++ bf cr7*4+0,12f
516 ++err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
517 ++err1; lwz r6,4(r4)
518 ++ addi r4,r4,8
519 ++err1; stw r0,0(r3)
520 ++err1; stw r6,4(r3)
521 ++ addi r3,r3,8
522 ++ subi r7,r7,8
523 ++
524 ++12: bf cr7*4+1,13f
525 ++err1; lwz r0,0(r4)
526 ++ addi r4,r4,4
527 ++err1; stw r0,0(r3)
528 ++ addi r3,r3,4
529 ++ subi r7,r7,4
530 ++
531 ++13: bf cr7*4+2,14f
532 ++err1; lhz r0,0(r4)
533 ++ addi r4,r4,2
534 ++err1; sth r0,0(r3)
535 ++ addi r3,r3,2
536 ++ subi r7,r7,2
537 ++
538 ++14: bf cr7*4+3,15f
539 ++err1; lbz r0,0(r4)
540 ++err1; stb r0,0(r3)
541 ++
542 ++15: li r3,0
543 ++ blr
544 ++
545 ++EXPORT_SYMBOL_GPL(copy_mc_generic);
546 +diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/memcpy_mcsafe_64.S
547 +deleted file mode 100644
548 +index cb882d9a6d8a3..0000000000000
549 +--- a/arch/powerpc/lib/memcpy_mcsafe_64.S
550 ++++ /dev/null
551 +@@ -1,242 +0,0 @@
552 +-/* SPDX-License-Identifier: GPL-2.0 */
553 +-/*
554 +- * Copyright (C) IBM Corporation, 2011
555 +- * Derived from copyuser_power7.s by Anton Blanchard <anton@××××××.com>
556 +- * Author - Balbir Singh <bsingharora@×××××.com>
557 +- */
558 +-#include <asm/ppc_asm.h>
559 +-#include <asm/errno.h>
560 +-#include <asm/export.h>
561 +-
562 +- .macro err1
563 +-100:
564 +- EX_TABLE(100b,.Ldo_err1)
565 +- .endm
566 +-
567 +- .macro err2
568 +-200:
569 +- EX_TABLE(200b,.Ldo_err2)
570 +- .endm
571 +-
572 +- .macro err3
573 +-300: EX_TABLE(300b,.Ldone)
574 +- .endm
575 +-
576 +-.Ldo_err2:
577 +- ld r22,STK_REG(R22)(r1)
578 +- ld r21,STK_REG(R21)(r1)
579 +- ld r20,STK_REG(R20)(r1)
580 +- ld r19,STK_REG(R19)(r1)
581 +- ld r18,STK_REG(R18)(r1)
582 +- ld r17,STK_REG(R17)(r1)
583 +- ld r16,STK_REG(R16)(r1)
584 +- ld r15,STK_REG(R15)(r1)
585 +- ld r14,STK_REG(R14)(r1)
586 +- addi r1,r1,STACKFRAMESIZE
587 +-.Ldo_err1:
588 +- /* Do a byte by byte copy to get the exact remaining size */
589 +- mtctr r7
590 +-46:
591 +-err3; lbz r0,0(r4)
592 +- addi r4,r4,1
593 +-err3; stb r0,0(r3)
594 +- addi r3,r3,1
595 +- bdnz 46b
596 +- li r3,0
597 +- blr
598 +-
599 +-.Ldone:
600 +- mfctr r3
601 +- blr
602 +-
603 +-
604 +-_GLOBAL(memcpy_mcsafe)
605 +- mr r7,r5
606 +- cmpldi r5,16
607 +- blt .Lshort_copy
608 +-
609 +-.Lcopy:
610 +- /* Get the source 8B aligned */
611 +- neg r6,r4
612 +- mtocrf 0x01,r6
613 +- clrldi r6,r6,(64-3)
614 +-
615 +- bf cr7*4+3,1f
616 +-err1; lbz r0,0(r4)
617 +- addi r4,r4,1
618 +-err1; stb r0,0(r3)
619 +- addi r3,r3,1
620 +- subi r7,r7,1
621 +-
622 +-1: bf cr7*4+2,2f
623 +-err1; lhz r0,0(r4)
624 +- addi r4,r4,2
625 +-err1; sth r0,0(r3)
626 +- addi r3,r3,2
627 +- subi r7,r7,2
628 +-
629 +-2: bf cr7*4+1,3f
630 +-err1; lwz r0,0(r4)
631 +- addi r4,r4,4
632 +-err1; stw r0,0(r3)
633 +- addi r3,r3,4
634 +- subi r7,r7,4
635 +-
636 +-3: sub r5,r5,r6
637 +- cmpldi r5,128
638 +-
639 +- mflr r0
640 +- stdu r1,-STACKFRAMESIZE(r1)
641 +- std r14,STK_REG(R14)(r1)
642 +- std r15,STK_REG(R15)(r1)
643 +- std r16,STK_REG(R16)(r1)
644 +- std r17,STK_REG(R17)(r1)
645 +- std r18,STK_REG(R18)(r1)
646 +- std r19,STK_REG(R19)(r1)
647 +- std r20,STK_REG(R20)(r1)
648 +- std r21,STK_REG(R21)(r1)
649 +- std r22,STK_REG(R22)(r1)
650 +- std r0,STACKFRAMESIZE+16(r1)
651 +-
652 +- blt 5f
653 +- srdi r6,r5,7
654 +- mtctr r6
655 +-
656 +- /* Now do cacheline (128B) sized loads and stores. */
657 +- .align 5
658 +-4:
659 +-err2; ld r0,0(r4)
660 +-err2; ld r6,8(r4)
661 +-err2; ld r8,16(r4)
662 +-err2; ld r9,24(r4)
663 +-err2; ld r10,32(r4)
664 +-err2; ld r11,40(r4)
665 +-err2; ld r12,48(r4)
666 +-err2; ld r14,56(r4)
667 +-err2; ld r15,64(r4)
668 +-err2; ld r16,72(r4)
669 +-err2; ld r17,80(r4)
670 +-err2; ld r18,88(r4)
671 +-err2; ld r19,96(r4)
672 +-err2; ld r20,104(r4)
673 +-err2; ld r21,112(r4)
674 +-err2; ld r22,120(r4)
675 +- addi r4,r4,128
676 +-err2; std r0,0(r3)
677 +-err2; std r6,8(r3)
678 +-err2; std r8,16(r3)
679 +-err2; std r9,24(r3)
680 +-err2; std r10,32(r3)
681 +-err2; std r11,40(r3)
682 +-err2; std r12,48(r3)
683 +-err2; std r14,56(r3)
684 +-err2; std r15,64(r3)
685 +-err2; std r16,72(r3)
686 +-err2; std r17,80(r3)
687 +-err2; std r18,88(r3)
688 +-err2; std r19,96(r3)
689 +-err2; std r20,104(r3)
690 +-err2; std r21,112(r3)
691 +-err2; std r22,120(r3)
692 +- addi r3,r3,128
693 +- subi r7,r7,128
694 +- bdnz 4b
695 +-
696 +- clrldi r5,r5,(64-7)
697 +-
698 +- /* Up to 127B to go */
699 +-5: srdi r6,r5,4
700 +- mtocrf 0x01,r6
701 +-
702 +-6: bf cr7*4+1,7f
703 +-err2; ld r0,0(r4)
704 +-err2; ld r6,8(r4)
705 +-err2; ld r8,16(r4)
706 +-err2; ld r9,24(r4)
707 +-err2; ld r10,32(r4)
708 +-err2; ld r11,40(r4)
709 +-err2; ld r12,48(r4)
710 +-err2; ld r14,56(r4)
711 +- addi r4,r4,64
712 +-err2; std r0,0(r3)
713 +-err2; std r6,8(r3)
714 +-err2; std r8,16(r3)
715 +-err2; std r9,24(r3)
716 +-err2; std r10,32(r3)
717 +-err2; std r11,40(r3)
718 +-err2; std r12,48(r3)
719 +-err2; std r14,56(r3)
720 +- addi r3,r3,64
721 +- subi r7,r7,64
722 +-
723 +-7: ld r14,STK_REG(R14)(r1)
724 +- ld r15,STK_REG(R15)(r1)
725 +- ld r16,STK_REG(R16)(r1)
726 +- ld r17,STK_REG(R17)(r1)
727 +- ld r18,STK_REG(R18)(r1)
728 +- ld r19,STK_REG(R19)(r1)
729 +- ld r20,STK_REG(R20)(r1)
730 +- ld r21,STK_REG(R21)(r1)
731 +- ld r22,STK_REG(R22)(r1)
732 +- addi r1,r1,STACKFRAMESIZE
733 +-
734 +- /* Up to 63B to go */
735 +- bf cr7*4+2,8f
736 +-err1; ld r0,0(r4)
737 +-err1; ld r6,8(r4)
738 +-err1; ld r8,16(r4)
739 +-err1; ld r9,24(r4)
740 +- addi r4,r4,32
741 +-err1; std r0,0(r3)
742 +-err1; std r6,8(r3)
743 +-err1; std r8,16(r3)
744 +-err1; std r9,24(r3)
745 +- addi r3,r3,32
746 +- subi r7,r7,32
747 +-
748 +- /* Up to 31B to go */
749 +-8: bf cr7*4+3,9f
750 +-err1; ld r0,0(r4)
751 +-err1; ld r6,8(r4)
752 +- addi r4,r4,16
753 +-err1; std r0,0(r3)
754 +-err1; std r6,8(r3)
755 +- addi r3,r3,16
756 +- subi r7,r7,16
757 +-
758 +-9: clrldi r5,r5,(64-4)
759 +-
760 +- /* Up to 15B to go */
761 +-.Lshort_copy:
762 +- mtocrf 0x01,r5
763 +- bf cr7*4+0,12f
764 +-err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
765 +-err1; lwz r6,4(r4)
766 +- addi r4,r4,8
767 +-err1; stw r0,0(r3)
768 +-err1; stw r6,4(r3)
769 +- addi r3,r3,8
770 +- subi r7,r7,8
771 +-
772 +-12: bf cr7*4+1,13f
773 +-err1; lwz r0,0(r4)
774 +- addi r4,r4,4
775 +-err1; stw r0,0(r3)
776 +- addi r3,r3,4
777 +- subi r7,r7,4
778 +-
779 +-13: bf cr7*4+2,14f
780 +-err1; lhz r0,0(r4)
781 +- addi r4,r4,2
782 +-err1; sth r0,0(r3)
783 +- addi r3,r3,2
784 +- subi r7,r7,2
785 +-
786 +-14: bf cr7*4+3,15f
787 +-err1; lbz r0,0(r4)
788 +-err1; stb r0,0(r3)
789 +-
790 +-15: li r3,0
791 +- blr
792 +-
793 +-EXPORT_SYMBOL_GPL(memcpy_mcsafe);
794 +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
795 +index 883da0abf7790..1f4104f8852b8 100644
796 +--- a/arch/x86/Kconfig
797 ++++ b/arch/x86/Kconfig
798 +@@ -75,7 +75,7 @@ config X86
799 + select ARCH_HAS_PTE_DEVMAP if X86_64
800 + select ARCH_HAS_PTE_SPECIAL
801 + select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
802 +- select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE
803 ++ select ARCH_HAS_COPY_MC if X86_64
804 + select ARCH_HAS_SET_MEMORY
805 + select ARCH_HAS_SET_DIRECT_MAP
806 + select ARCH_HAS_STRICT_KERNEL_RWX
807 +diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
808 +index 0dd319e6e5b49..ec98b400e38f9 100644
809 +--- a/arch/x86/Kconfig.debug
810 ++++ b/arch/x86/Kconfig.debug
811 +@@ -59,7 +59,7 @@ config EARLY_PRINTK_USB_XDBC
812 + You should normally say N here, unless you want to debug early
813 + crashes or need a very simple printk logging facility.
814 +
815 +-config MCSAFE_TEST
816 ++config COPY_MC_TEST
817 + def_bool n
818 +
819 + config EFI_PGT_DUMP
820 +diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
821 +index 26c36357c4c9c..a023cbe21230a 100644
822 +--- a/arch/x86/events/amd/ibs.c
823 ++++ b/arch/x86/events/amd/ibs.c
824 +@@ -89,6 +89,7 @@ struct perf_ibs {
825 + u64 max_period;
826 + unsigned long offset_mask[1];
827 + int offset_max;
828 ++ unsigned int fetch_count_reset_broken : 1;
829 + struct cpu_perf_ibs __percpu *pcpu;
830 +
831 + struct attribute **format_attrs;
832 +@@ -363,7 +364,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
833 + static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
834 + struct hw_perf_event *hwc, u64 config)
835 + {
836 +- wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
837 ++ u64 tmp = hwc->config | config;
838 ++
839 ++ if (perf_ibs->fetch_count_reset_broken)
840 ++ wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask);
841 ++
842 ++ wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask);
843 + }
844 +
845 + /*
846 +@@ -733,6 +739,13 @@ static __init void perf_event_ibs_init(void)
847 + {
848 + struct attribute **attr = ibs_op_format_attrs;
849 +
850 ++ /*
851 ++ * Some chips fail to reset the fetch count when it is written; instead
852 ++ * they need a 0-1 transition of IbsFetchEn.
853 ++ */
854 ++ if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
855 ++ perf_ibs_fetch.fetch_count_reset_broken = 1;
856 ++
857 + perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
858 +
859 + if (ibs_caps & IBS_CAPS_OPCNT) {
860 +diff --git a/arch/x86/include/asm/copy_mc_test.h b/arch/x86/include/asm/copy_mc_test.h
861 +new file mode 100644
862 +index 0000000000000..e4991ba967266
863 +--- /dev/null
864 ++++ b/arch/x86/include/asm/copy_mc_test.h
865 +@@ -0,0 +1,75 @@
866 ++/* SPDX-License-Identifier: GPL-2.0 */
867 ++#ifndef _COPY_MC_TEST_H_
868 ++#define _COPY_MC_TEST_H_
869 ++
870 ++#ifndef __ASSEMBLY__
871 ++#ifdef CONFIG_COPY_MC_TEST
872 ++extern unsigned long copy_mc_test_src;
873 ++extern unsigned long copy_mc_test_dst;
874 ++
875 ++static inline void copy_mc_inject_src(void *addr)
876 ++{
877 ++ if (addr)
878 ++ copy_mc_test_src = (unsigned long) addr;
879 ++ else
880 ++ copy_mc_test_src = ~0UL;
881 ++}
882 ++
883 ++static inline void copy_mc_inject_dst(void *addr)
884 ++{
885 ++ if (addr)
886 ++ copy_mc_test_dst = (unsigned long) addr;
887 ++ else
888 ++ copy_mc_test_dst = ~0UL;
889 ++}
890 ++#else /* CONFIG_COPY_MC_TEST */
891 ++static inline void copy_mc_inject_src(void *addr)
892 ++{
893 ++}
894 ++
895 ++static inline void copy_mc_inject_dst(void *addr)
896 ++{
897 ++}
898 ++#endif /* CONFIG_COPY_MC_TEST */
899 ++
900 ++#else /* __ASSEMBLY__ */
901 ++#include <asm/export.h>
902 ++
903 ++#ifdef CONFIG_COPY_MC_TEST
904 ++.macro COPY_MC_TEST_CTL
905 ++ .pushsection .data
906 ++ .align 8
907 ++ .globl copy_mc_test_src
908 ++ copy_mc_test_src:
909 ++ .quad 0
910 ++ EXPORT_SYMBOL_GPL(copy_mc_test_src)
911 ++ .globl copy_mc_test_dst
912 ++ copy_mc_test_dst:
913 ++ .quad 0
914 ++ EXPORT_SYMBOL_GPL(copy_mc_test_dst)
915 ++ .popsection
916 ++.endm
917 ++
918 ++.macro COPY_MC_TEST_SRC reg count target
919 ++ leaq \count(\reg), %r9
920 ++ cmp copy_mc_test_src, %r9
921 ++ ja \target
922 ++.endm
923 ++
924 ++.macro COPY_MC_TEST_DST reg count target
925 ++ leaq \count(\reg), %r9
926 ++ cmp copy_mc_test_dst, %r9
927 ++ ja \target
928 ++.endm
929 ++#else
930 ++.macro COPY_MC_TEST_CTL
931 ++.endm
932 ++
933 ++.macro COPY_MC_TEST_SRC reg count target
934 ++.endm
935 ++
936 ++.macro COPY_MC_TEST_DST reg count target
937 ++.endm
938 ++#endif /* CONFIG_COPY_MC_TEST */
939 ++#endif /* __ASSEMBLY__ */
940 ++#endif /* _COPY_MC_TEST_H_ */
941 +diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
942 +index cf503824529ce..9b9112e4379ab 100644
943 +--- a/arch/x86/include/asm/mce.h
944 ++++ b/arch/x86/include/asm/mce.h
945 +@@ -174,6 +174,15 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb);
946 +
947 + extern int mce_p5_enabled;
948 +
949 ++#ifdef CONFIG_ARCH_HAS_COPY_MC
950 ++extern void enable_copy_mc_fragile(void);
951 ++unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt);
952 ++#else
953 ++static inline void enable_copy_mc_fragile(void)
954 ++{
955 ++}
956 ++#endif
957 ++
958 + #ifdef CONFIG_X86_MCE
959 + int mcheck_init(void);
960 + void mcheck_cpu_init(struct cpuinfo_x86 *c);
961 +diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h
962 +deleted file mode 100644
963 +index eb59804b6201c..0000000000000
964 +--- a/arch/x86/include/asm/mcsafe_test.h
965 ++++ /dev/null
966 +@@ -1,75 +0,0 @@
967 +-/* SPDX-License-Identifier: GPL-2.0 */
968 +-#ifndef _MCSAFE_TEST_H_
969 +-#define _MCSAFE_TEST_H_
970 +-
971 +-#ifndef __ASSEMBLY__
972 +-#ifdef CONFIG_MCSAFE_TEST
973 +-extern unsigned long mcsafe_test_src;
974 +-extern unsigned long mcsafe_test_dst;
975 +-
976 +-static inline void mcsafe_inject_src(void *addr)
977 +-{
978 +- if (addr)
979 +- mcsafe_test_src = (unsigned long) addr;
980 +- else
981 +- mcsafe_test_src = ~0UL;
982 +-}
983 +-
984 +-static inline void mcsafe_inject_dst(void *addr)
985 +-{
986 +- if (addr)
987 +- mcsafe_test_dst = (unsigned long) addr;
988 +- else
989 +- mcsafe_test_dst = ~0UL;
990 +-}
991 +-#else /* CONFIG_MCSAFE_TEST */
992 +-static inline void mcsafe_inject_src(void *addr)
993 +-{
994 +-}
995 +-
996 +-static inline void mcsafe_inject_dst(void *addr)
997 +-{
998 +-}
999 +-#endif /* CONFIG_MCSAFE_TEST */
1000 +-
1001 +-#else /* __ASSEMBLY__ */
1002 +-#include <asm/export.h>
1003 +-
1004 +-#ifdef CONFIG_MCSAFE_TEST
1005 +-.macro MCSAFE_TEST_CTL
1006 +- .pushsection .data
1007 +- .align 8
1008 +- .globl mcsafe_test_src
1009 +- mcsafe_test_src:
1010 +- .quad 0
1011 +- EXPORT_SYMBOL_GPL(mcsafe_test_src)
1012 +- .globl mcsafe_test_dst
1013 +- mcsafe_test_dst:
1014 +- .quad 0
1015 +- EXPORT_SYMBOL_GPL(mcsafe_test_dst)
1016 +- .popsection
1017 +-.endm
1018 +-
1019 +-.macro MCSAFE_TEST_SRC reg count target
1020 +- leaq \count(\reg), %r9
1021 +- cmp mcsafe_test_src, %r9
1022 +- ja \target
1023 +-.endm
1024 +-
1025 +-.macro MCSAFE_TEST_DST reg count target
1026 +- leaq \count(\reg), %r9
1027 +- cmp mcsafe_test_dst, %r9
1028 +- ja \target
1029 +-.endm
1030 +-#else
1031 +-.macro MCSAFE_TEST_CTL
1032 +-.endm
1033 +-
1034 +-.macro MCSAFE_TEST_SRC reg count target
1035 +-.endm
1036 +-
1037 +-.macro MCSAFE_TEST_DST reg count target
1038 +-.endm
1039 +-#endif /* CONFIG_MCSAFE_TEST */
1040 +-#endif /* __ASSEMBLY__ */
1041 +-#endif /* _MCSAFE_TEST_H_ */
1042 +diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
1043 +index 75314c3dbe471..6e450827f677a 100644
1044 +--- a/arch/x86/include/asm/string_64.h
1045 ++++ b/arch/x86/include/asm/string_64.h
1046 +@@ -82,38 +82,6 @@ int strcmp(const char *cs, const char *ct);
1047 +
1048 + #endif
1049 +
1050 +-#define __HAVE_ARCH_MEMCPY_MCSAFE 1
1051 +-__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
1052 +- size_t cnt);
1053 +-DECLARE_STATIC_KEY_FALSE(mcsafe_key);
1054 +-
1055 +-/**
1056 +- * memcpy_mcsafe - copy memory with indication if a machine check happened
1057 +- *
1058 +- * @dst: destination address
1059 +- * @src: source address
1060 +- * @cnt: number of bytes to copy
1061 +- *
1062 +- * Low level memory copy function that catches machine checks
1063 +- * We only call into the "safe" function on systems that can
1064 +- * actually do machine check recovery. Everyone else can just
1065 +- * use memcpy().
1066 +- *
1067 +- * Return 0 for success, or number of bytes not copied if there was an
1068 +- * exception.
1069 +- */
1070 +-static __always_inline __must_check unsigned long
1071 +-memcpy_mcsafe(void *dst, const void *src, size_t cnt)
1072 +-{
1073 +-#ifdef CONFIG_X86_MCE
1074 +- if (static_branch_unlikely(&mcsafe_key))
1075 +- return __memcpy_mcsafe(dst, src, cnt);
1076 +- else
1077 +-#endif
1078 +- memcpy(dst, src, cnt);
1079 +- return 0;
1080 +-}
1081 +-
1082 + #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
1083 + #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
1084 + void __memcpy_flushcache(void *dst, const void *src, size_t cnt);
1085 +diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
1086 +index 2f3e8f2a958f6..9bfca52b46411 100644
1087 +--- a/arch/x86/include/asm/uaccess.h
1088 ++++ b/arch/x86/include/asm/uaccess.h
1089 +@@ -455,6 +455,15 @@ extern __must_check long strnlen_user(const char __user *str, long n);
1090 + unsigned long __must_check clear_user(void __user *mem, unsigned long len);
1091 + unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
1092 +
1093 ++#ifdef CONFIG_ARCH_HAS_COPY_MC
1094 ++unsigned long __must_check
1095 ++copy_mc_to_kernel(void *to, const void *from, unsigned len);
1096 ++#define copy_mc_to_kernel copy_mc_to_kernel
1097 ++
1098 ++unsigned long __must_check
1099 ++copy_mc_to_user(void *to, const void *from, unsigned len);
1100 ++#endif
1101 ++
1102 + /*
1103 + * movsl can be slow when source and dest are not both 8-byte aligned
1104 + */
1105 +diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
1106 +index bc10e3dc64fed..e7265a552f4f0 100644
1107 +--- a/arch/x86/include/asm/uaccess_64.h
1108 ++++ b/arch/x86/include/asm/uaccess_64.h
1109 +@@ -46,22 +46,6 @@ copy_user_generic(void *to, const void *from, unsigned len)
1110 + return ret;
1111 + }
1112 +
1113 +-static __always_inline __must_check unsigned long
1114 +-copy_to_user_mcsafe(void *to, const void *from, unsigned len)
1115 +-{
1116 +- unsigned long ret;
1117 +-
1118 +- __uaccess_begin();
1119 +- /*
1120 +- * Note, __memcpy_mcsafe() is explicitly used since it can
1121 +- * handle exceptions / faults. memcpy_mcsafe() may fall back to
1122 +- * memcpy() which lacks this handling.
1123 +- */
1124 +- ret = __memcpy_mcsafe(to, from, len);
1125 +- __uaccess_end();
1126 +- return ret;
1127 +-}
1128 +-
1129 + static __always_inline __must_check unsigned long
1130 + raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
1131 + {
1132 +@@ -102,8 +86,4 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
1133 + kasan_check_write(dst, size);
1134 + return __copy_user_flushcache(dst, src, size);
1135 + }
1136 +-
1137 +-unsigned long
1138 +-mcsafe_handle_tail(char *to, char *from, unsigned len);
1139 +-
1140 + #endif /* _ASM_X86_UACCESS_64_H */
1141 +diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
1142 +index 07673a034d39c..69b2bb305a5a7 100644
1143 +--- a/arch/x86/kernel/cpu/mce/core.c
1144 ++++ b/arch/x86/kernel/cpu/mce/core.c
1145 +@@ -40,7 +40,6 @@
1146 + #include <linux/debugfs.h>
1147 + #include <linux/irq_work.h>
1148 + #include <linux/export.h>
1149 +-#include <linux/jump_label.h>
1150 + #include <linux/set_memory.h>
1151 + #include <linux/task_work.h>
1152 + #include <linux/hardirq.h>
1153 +@@ -2122,7 +2121,7 @@ void mce_disable_bank(int bank)
1154 + and older.
1155 + * mce=nobootlog Don't log MCEs from before booting.
1156 + * mce=bios_cmci_threshold Don't program the CMCI threshold
1157 +- * mce=recovery force enable memcpy_mcsafe()
1158 ++ * mce=recovery force enable copy_mc_fragile()
1159 + */
1160 + static int __init mcheck_enable(char *str)
1161 + {
1162 +@@ -2730,13 +2729,10 @@ static void __init mcheck_debugfs_init(void)
1163 + static void __init mcheck_debugfs_init(void) { }
1164 + #endif
1165 +
1166 +-DEFINE_STATIC_KEY_FALSE(mcsafe_key);
1167 +-EXPORT_SYMBOL_GPL(mcsafe_key);
1168 +-
1169 + static int __init mcheck_late_init(void)
1170 + {
1171 + if (mca_cfg.recovery)
1172 +- static_branch_inc(&mcsafe_key);
1173 ++ enable_copy_mc_fragile();
1174 +
1175 + mcheck_debugfs_init();
1176 +
1177 +diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
1178 +index 896d74cb5081a..e0296983a2386 100644
1179 +--- a/arch/x86/kernel/quirks.c
1180 ++++ b/arch/x86/kernel/quirks.c
1181 +@@ -8,6 +8,7 @@
1182 +
1183 + #include <asm/hpet.h>
1184 + #include <asm/setup.h>
1185 ++#include <asm/mce.h>
1186 +
1187 + #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
1188 +
1189 +@@ -624,10 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
1190 + DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
1191 + amd_disable_seq_and_redirect_scrub);
1192 +
1193 +-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
1194 +-#include <linux/jump_label.h>
1195 +-#include <asm/string_64.h>
1196 +-
1197 + /* Ivy Bridge, Haswell, Broadwell */
1198 + static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
1199 + {
1200 +@@ -636,7 +633,7 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
1201 + pci_read_config_dword(pdev, 0x84, &capid0);
1202 +
1203 + if (capid0 & 0x10)
1204 +- static_branch_inc(&mcsafe_key);
1205 ++ enable_copy_mc_fragile();
1206 + }
1207 +
1208 + /* Skylake */
1209 +@@ -653,7 +650,7 @@ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
1210 + * enabled, so memory machine check recovery is also enabled.
1211 + */
1212 + if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
1213 +- static_branch_inc(&mcsafe_key);
1214 ++ enable_copy_mc_fragile();
1215 +
1216 + }
1217 + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
1218 +@@ -661,7 +658,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_
1219 + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
1220 + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
1221 + #endif
1222 +-#endif
1223 +
1224 + bool x86_apple_machine;
1225 + EXPORT_SYMBOL(x86_apple_machine);
1226 +diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
1227 +index 69cc823109740..d43df8de75a6a 100644
1228 +--- a/arch/x86/kernel/traps.c
1229 ++++ b/arch/x86/kernel/traps.c
1230 +@@ -196,7 +196,7 @@ static __always_inline void __user *error_get_trap_addr(struct pt_regs *regs)
1231 +
1232 + DEFINE_IDTENTRY(exc_divide_error)
1233 + {
1234 +- do_error_trap(regs, 0, "divide_error", X86_TRAP_DE, SIGFPE,
1235 ++ do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE,
1236 + FPE_INTDIV, error_get_trap_addr(regs));
1237 + }
1238 +
1239 +diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
1240 +index 6110bce7237bd..02c3cec7e5157 100644
1241 +--- a/arch/x86/lib/Makefile
1242 ++++ b/arch/x86/lib/Makefile
1243 +@@ -44,6 +44,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
1244 + lib-y := delay.o misc.o cmdline.o cpu.o
1245 + lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
1246 + lib-y += memcpy_$(BITS).o
1247 ++lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o
1248 + lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
1249 + lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
1250 + lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
1251 +diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
1252 +new file mode 100644
1253 +index 0000000000000..c13e8c9ee926b
1254 +--- /dev/null
1255 ++++ b/arch/x86/lib/copy_mc.c
1256 +@@ -0,0 +1,96 @@
1257 ++// SPDX-License-Identifier: GPL-2.0
1258 ++/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
1259 ++
1260 ++#include <linux/jump_label.h>
1261 ++#include <linux/uaccess.h>
1262 ++#include <linux/export.h>
1263 ++#include <linux/string.h>
1264 ++#include <linux/types.h>
1265 ++
1266 ++#include <asm/mce.h>
1267 ++
1268 ++#ifdef CONFIG_X86_MCE
1269 ++/*
1270 ++ * See COPY_MC_TEST for self-test of the copy_mc_fragile()
1271 ++ * implementation.
1272 ++ */
1273 ++static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key);
1274 ++
1275 ++void enable_copy_mc_fragile(void)
1276 ++{
1277 ++ static_branch_inc(&copy_mc_fragile_key);
1278 ++}
1279 ++#define copy_mc_fragile_enabled (static_branch_unlikely(&copy_mc_fragile_key))
1280 ++
1281 ++/*
1282 ++ * Similar to copy_user_handle_tail, probe for the write fault point, or
1283 ++ * source exception point.
1284 ++ */
1285 ++__visible notrace unsigned long
1286 ++copy_mc_fragile_handle_tail(char *to, char *from, unsigned len)
1287 ++{
1288 ++ for (; len; --len, to++, from++)
1289 ++ if (copy_mc_fragile(to, from, 1))
1290 ++ break;
1291 ++ return len;
1292 ++}
1293 ++#else
1294 ++/*
1295 ++ * No point in doing careful copying, or consulting a static key when
1296 ++ * there is no #MC handler in the CONFIG_X86_MCE=n case.
1297 ++ */
1298 ++void enable_copy_mc_fragile(void)
1299 ++{
1300 ++}
1301 ++#define copy_mc_fragile_enabled (0)
1302 ++#endif
1303 ++
1304 ++unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned len);
1305 ++
1306 ++/**
1307 ++ * copy_mc_to_kernel - memory copy that handles source exceptions
1308 ++ *
1309 ++ * @dst: destination address
1310 ++ * @src: source address
1311 ++ * @len: number of bytes to copy
1312 ++ *
1313 ++ * Call into the 'fragile' version on systems that benefit from avoiding
1314 ++ * corner case poison consumption scenarios, For example, accessing
1315 ++ * poison across 2 cachelines with a single instruction. Almost all
1316 ++ * other uses case can use copy_mc_enhanced_fast_string() for a fast
1317 ++ * recoverable copy, or fallback to plain memcpy.
1318 ++ *
1319 ++ * Return 0 for success, or number of bytes not copied if there was an
1320 ++ * exception.
1321 ++ */
1322 ++unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
1323 ++{
1324 ++ if (copy_mc_fragile_enabled)
1325 ++ return copy_mc_fragile(dst, src, len);
1326 ++ if (static_cpu_has(X86_FEATURE_ERMS))
1327 ++ return copy_mc_enhanced_fast_string(dst, src, len);
1328 ++ memcpy(dst, src, len);
1329 ++ return 0;
1330 ++}
1331 ++EXPORT_SYMBOL_GPL(copy_mc_to_kernel);
1332 ++
1333 ++unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len)
1334 ++{
1335 ++ unsigned long ret;
1336 ++
1337 ++ if (copy_mc_fragile_enabled) {
1338 ++ __uaccess_begin();
1339 ++ ret = copy_mc_fragile(dst, src, len);
1340 ++ __uaccess_end();
1341 ++ return ret;
1342 ++ }
1343 ++
1344 ++ if (static_cpu_has(X86_FEATURE_ERMS)) {
1345 ++ __uaccess_begin();
1346 ++ ret = copy_mc_enhanced_fast_string(dst, src, len);
1347 ++ __uaccess_end();
1348 ++ return ret;
1349 ++ }
1350 ++
1351 ++ return copy_user_generic(dst, src, len);
1352 ++}
1353 +diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S
1354 +new file mode 100644
1355 +index 0000000000000..892d8915f609e
1356 +--- /dev/null
1357 ++++ b/arch/x86/lib/copy_mc_64.S
1358 +@@ -0,0 +1,163 @@
1359 ++/* SPDX-License-Identifier: GPL-2.0-only */
1360 ++/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
1361 ++
1362 ++#include <linux/linkage.h>
1363 ++#include <asm/copy_mc_test.h>
1364 ++#include <asm/export.h>
1365 ++#include <asm/asm.h>
1366 ++
1367 ++#ifndef CONFIG_UML
1368 ++
1369 ++#ifdef CONFIG_X86_MCE
1370 ++COPY_MC_TEST_CTL
1371 ++
1372 ++/*
1373 ++ * copy_mc_fragile - copy memory with indication if an exception / fault happened
1374 ++ *
1375 ++ * The 'fragile' version is opted into by platform quirks and takes
1376 ++ * pains to avoid unrecoverable corner cases like 'fast-string'
1377 ++ * instruction sequences, and consuming poison across a cacheline
1378 ++ * boundary. The non-fragile version is equivalent to memcpy()
1379 ++ * regardless of CPU machine-check-recovery capability.
1380 ++ */
1381 ++SYM_FUNC_START(copy_mc_fragile)
1382 ++ cmpl $8, %edx
1383 ++ /* Less than 8 bytes? Go to byte copy loop */
1384 ++ jb .L_no_whole_words
1385 ++
1386 ++ /* Check for bad alignment of source */
1387 ++ testl $7, %esi
1388 ++ /* Already aligned */
1389 ++ jz .L_8byte_aligned
1390 ++
1391 ++ /* Copy one byte at a time until source is 8-byte aligned */
1392 ++ movl %esi, %ecx
1393 ++ andl $7, %ecx
1394 ++ subl $8, %ecx
1395 ++ negl %ecx
1396 ++ subl %ecx, %edx
1397 ++.L_read_leading_bytes:
1398 ++ movb (%rsi), %al
1399 ++ COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes
1400 ++ COPY_MC_TEST_DST %rdi 1 .E_leading_bytes
1401 ++.L_write_leading_bytes:
1402 ++ movb %al, (%rdi)
1403 ++ incq %rsi
1404 ++ incq %rdi
1405 ++ decl %ecx
1406 ++ jnz .L_read_leading_bytes
1407 ++
1408 ++.L_8byte_aligned:
1409 ++ movl %edx, %ecx
1410 ++ andl $7, %edx
1411 ++ shrl $3, %ecx
1412 ++ jz .L_no_whole_words
1413 ++
1414 ++.L_read_words:
1415 ++ movq (%rsi), %r8
1416 ++ COPY_MC_TEST_SRC %rsi 8 .E_read_words
1417 ++ COPY_MC_TEST_DST %rdi 8 .E_write_words
1418 ++.L_write_words:
1419 ++ movq %r8, (%rdi)
1420 ++ addq $8, %rsi
1421 ++ addq $8, %rdi
1422 ++ decl %ecx
1423 ++ jnz .L_read_words
1424 ++
1425 ++ /* Any trailing bytes? */
1426 ++.L_no_whole_words:
1427 ++ andl %edx, %edx
1428 ++ jz .L_done_memcpy_trap
1429 ++
1430 ++ /* Copy trailing bytes */
1431 ++ movl %edx, %ecx
1432 ++.L_read_trailing_bytes:
1433 ++ movb (%rsi), %al
1434 ++ COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes
1435 ++ COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes
1436 ++.L_write_trailing_bytes:
1437 ++ movb %al, (%rdi)
1438 ++ incq %rsi
1439 ++ incq %rdi
1440 ++ decl %ecx
1441 ++ jnz .L_read_trailing_bytes
1442 ++
1443 ++ /* Copy successful. Return zero */
1444 ++.L_done_memcpy_trap:
1445 ++ xorl %eax, %eax
1446 ++.L_done:
1447 ++ ret
1448 ++SYM_FUNC_END(copy_mc_fragile)
1449 ++EXPORT_SYMBOL_GPL(copy_mc_fragile)
1450 ++
1451 ++ .section .fixup, "ax"
1452 ++ /*
1453 ++ * Return number of bytes not copied for any failure. Note that
1454 ++ * there is no "tail" handling since the source buffer is 8-byte
1455 ++ * aligned and poison is cacheline aligned.
1456 ++ */
1457 ++.E_read_words:
1458 ++ shll $3, %ecx
1459 ++.E_leading_bytes:
1460 ++ addl %edx, %ecx
1461 ++.E_trailing_bytes:
1462 ++ mov %ecx, %eax
1463 ++ jmp .L_done
1464 ++
1465 ++ /*
1466 ++ * For write fault handling, given the destination is unaligned,
1467 ++ * we handle faults on multi-byte writes with a byte-by-byte
1468 ++ * copy up to the write-protected page.
1469 ++ */
1470 ++.E_write_words:
1471 ++ shll $3, %ecx
1472 ++ addl %edx, %ecx
1473 ++ movl %ecx, %edx
1474 ++ jmp copy_mc_fragile_handle_tail
1475 ++
1476 ++ .previous
1477 ++
1478 ++ _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
1479 ++ _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
1480 ++ _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
1481 ++ _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
1482 ++ _ASM_EXTABLE(.L_write_words, .E_write_words)
1483 ++ _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
1484 ++#endif /* CONFIG_X86_MCE */
1485 ++
1486 ++/*
1487 ++ * copy_mc_enhanced_fast_string - memory copy with exception handling
1488 ++ *
1489 ++ * Fast string copy + fault / exception handling. If the CPU does
1490 ++ * support machine check exception recovery, but does not support
1491 ++ * recovering from fast-string exceptions then this CPU needs to be
1492 ++ * added to the copy_mc_fragile_key set of quirks. Otherwise, absent any
1493 ++ * machine check recovery support this version should be no slower than
1494 ++ * standard memcpy.
1495 ++ */
1496 ++SYM_FUNC_START(copy_mc_enhanced_fast_string)
1497 ++ movq %rdi, %rax
1498 ++ movq %rdx, %rcx
1499 ++.L_copy:
1500 ++ rep movsb
1501 ++ /* Copy successful. Return zero */
1502 ++ xorl %eax, %eax
1503 ++ ret
1504 ++SYM_FUNC_END(copy_mc_enhanced_fast_string)
1505 ++
1506 ++ .section .fixup, "ax"
1507 ++.E_copy:
1508 ++ /*
1509 ++ * On fault %rcx is updated such that the copy instruction could
1510 ++ * optionally be restarted at the fault position, i.e. it
1511 ++ * contains 'bytes remaining'. A non-zero return indicates error
1512 ++ * to copy_mc_generic() users, or indicate short transfers to
1513 ++ * user-copy routines.
1514 ++ */
1515 ++ movq %rcx, %rax
1516 ++ ret
1517 ++
1518 ++ .previous
1519 ++
1520 ++ _ASM_EXTABLE_FAULT(.L_copy, .E_copy)
1521 ++#endif /* !CONFIG_UML */
1522 +diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
1523 +index bbcc05bcefadb..037faac46b0cc 100644
1524 +--- a/arch/x86/lib/memcpy_64.S
1525 ++++ b/arch/x86/lib/memcpy_64.S
1526 +@@ -4,7 +4,6 @@
1527 + #include <linux/linkage.h>
1528 + #include <asm/errno.h>
1529 + #include <asm/cpufeatures.h>
1530 +-#include <asm/mcsafe_test.h>
1531 + #include <asm/alternative-asm.h>
1532 + #include <asm/export.h>
1533 +
1534 +@@ -187,117 +186,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
1535 + SYM_FUNC_END(memcpy_orig)
1536 +
1537 + .popsection
1538 +-
1539 +-#ifndef CONFIG_UML
1540 +-
1541 +-MCSAFE_TEST_CTL
1542 +-
1543 +-/*
1544 +- * __memcpy_mcsafe - memory copy with machine check exception handling
1545 +- * Note that we only catch machine checks when reading the source addresses.
1546 +- * Writes to target are posted and don't generate machine checks.
1547 +- */
1548 +-SYM_FUNC_START(__memcpy_mcsafe)
1549 +- cmpl $8, %edx
1550 +- /* Less than 8 bytes? Go to byte copy loop */
1551 +- jb .L_no_whole_words
1552 +-
1553 +- /* Check for bad alignment of source */
1554 +- testl $7, %esi
1555 +- /* Already aligned */
1556 +- jz .L_8byte_aligned
1557 +-
1558 +- /* Copy one byte at a time until source is 8-byte aligned */
1559 +- movl %esi, %ecx
1560 +- andl $7, %ecx
1561 +- subl $8, %ecx
1562 +- negl %ecx
1563 +- subl %ecx, %edx
1564 +-.L_read_leading_bytes:
1565 +- movb (%rsi), %al
1566 +- MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
1567 +- MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
1568 +-.L_write_leading_bytes:
1569 +- movb %al, (%rdi)
1570 +- incq %rsi
1571 +- incq %rdi
1572 +- decl %ecx
1573 +- jnz .L_read_leading_bytes
1574 +-
1575 +-.L_8byte_aligned:
1576 +- movl %edx, %ecx
1577 +- andl $7, %edx
1578 +- shrl $3, %ecx
1579 +- jz .L_no_whole_words
1580 +-
1581 +-.L_read_words:
1582 +- movq (%rsi), %r8
1583 +- MCSAFE_TEST_SRC %rsi 8 .E_read_words
1584 +- MCSAFE_TEST_DST %rdi 8 .E_write_words
1585 +-.L_write_words:
1586 +- movq %r8, (%rdi)
1587 +- addq $8, %rsi
1588 +- addq $8, %rdi
1589 +- decl %ecx
1590 +- jnz .L_read_words
1591 +-
1592 +- /* Any trailing bytes? */
1593 +-.L_no_whole_words:
1594 +- andl %edx, %edx
1595 +- jz .L_done_memcpy_trap
1596 +-
1597 +- /* Copy trailing bytes */
1598 +- movl %edx, %ecx
1599 +-.L_read_trailing_bytes:
1600 +- movb (%rsi), %al
1601 +- MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
1602 +- MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
1603 +-.L_write_trailing_bytes:
1604 +- movb %al, (%rdi)
1605 +- incq %rsi
1606 +- incq %rdi
1607 +- decl %ecx
1608 +- jnz .L_read_trailing_bytes
1609 +-
1610 +- /* Copy successful. Return zero */
1611 +-.L_done_memcpy_trap:
1612 +- xorl %eax, %eax
1613 +-.L_done:
1614 +- ret
1615 +-SYM_FUNC_END(__memcpy_mcsafe)
1616 +-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
1617 +-
1618 +- .section .fixup, "ax"
1619 +- /*
1620 +- * Return number of bytes not copied for any failure. Note that
1621 +- * there is no "tail" handling since the source buffer is 8-byte
1622 +- * aligned and poison is cacheline aligned.
1623 +- */
1624 +-.E_read_words:
1625 +- shll $3, %ecx
1626 +-.E_leading_bytes:
1627 +- addl %edx, %ecx
1628 +-.E_trailing_bytes:
1629 +- mov %ecx, %eax
1630 +- jmp .L_done
1631 +-
1632 +- /*
1633 +- * For write fault handling, given the destination is unaligned,
1634 +- * we handle faults on multi-byte writes with a byte-by-byte
1635 +- * copy up to the write-protected page.
1636 +- */
1637 +-.E_write_words:
1638 +- shll $3, %ecx
1639 +- addl %edx, %ecx
1640 +- movl %ecx, %edx
1641 +- jmp mcsafe_handle_tail
1642 +-
1643 +- .previous
1644 +-
1645 +- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
1646 +- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
1647 +- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
1648 +- _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
1649 +- _ASM_EXTABLE(.L_write_words, .E_write_words)
1650 +- _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
1651 +-#endif
1652 +diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
1653 +index 1847e993ac63a..508c81e97ab10 100644
1654 +--- a/arch/x86/lib/usercopy_64.c
1655 ++++ b/arch/x86/lib/usercopy_64.c
1656 +@@ -56,27 +56,6 @@ unsigned long clear_user(void __user *to, unsigned long n)
1657 + }
1658 + EXPORT_SYMBOL(clear_user);
1659 +
1660 +-/*
1661 +- * Similar to copy_user_handle_tail, probe for the write fault point,
1662 +- * but reuse __memcpy_mcsafe in case a new read error is encountered.
1663 +- * clac() is handled in _copy_to_iter_mcsafe().
1664 +- */
1665 +-__visible notrace unsigned long
1666 +-mcsafe_handle_tail(char *to, char *from, unsigned len)
1667 +-{
1668 +- for (; len; --len, to++, from++) {
1669 +- /*
1670 +- * Call the assembly routine back directly since
1671 +- * memcpy_mcsafe() may silently fallback to memcpy.
1672 +- */
1673 +- unsigned long rem = __memcpy_mcsafe(to, from, 1);
1674 +-
1675 +- if (rem)
1676 +- break;
1677 +- }
1678 +- return len;
1679 +-}
1680 +-
1681 + #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
1682 + /**
1683 + * clean_cache_range - write back a cache range with CLWB
1684 +diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
1685 +index 00c62115f39cd..0aaf31917061d 100644
1686 +--- a/arch/x86/pci/intel_mid_pci.c
1687 ++++ b/arch/x86/pci/intel_mid_pci.c
1688 +@@ -33,6 +33,7 @@
1689 + #include <asm/hw_irq.h>
1690 + #include <asm/io_apic.h>
1691 + #include <asm/intel-mid.h>
1692 ++#include <asm/acpi.h>
1693 +
1694 + #define PCIE_CAP_OFFSET 0x100
1695 +
1696 +diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
1697 +index c46b9f2e732ff..6e39eda00c2c9 100644
1698 +--- a/arch/x86/xen/enlighten_pv.c
1699 ++++ b/arch/x86/xen/enlighten_pv.c
1700 +@@ -1438,6 +1438,15 @@ asmlinkage __visible void __init xen_start_kernel(void)
1701 + x86_init.mpparse.get_smp_config = x86_init_uint_noop;
1702 +
1703 + xen_boot_params_init_edd();
1704 ++
1705 ++#ifdef CONFIG_ACPI
1706 ++ /*
1707 ++ * Disable selecting "Firmware First mode" for correctable
1708 ++ * memory errors, as this is the duty of the hypervisor to
1709 ++ * decide.
1710 ++ */
1711 ++ acpi_disable_cmcff = 1;
1712 ++#endif
1713 + }
1714 +
1715 + if (!boot_params.screen_info.orig_video_isVGA)
1716 +diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
1717 +index d991dd46e89cc..98b8baa47dc5e 100644
1718 +--- a/drivers/ata/ahci.h
1719 ++++ b/drivers/ata/ahci.h
1720 +@@ -240,6 +240,8 @@ enum {
1721 + as default lpm_policy */
1722 + AHCI_HFLAG_SUSPEND_PHYS = (1 << 26), /* handle PHYs during
1723 + suspend/resume */
1724 ++ AHCI_HFLAG_IGN_NOTSUPP_POWER_ON = (1 << 27), /* ignore -EOPNOTSUPP
1725 ++ from phy_power_on() */
1726 +
1727 + /* ap->flags bits */
1728 +
1729 +diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
1730 +index d4bba3ace45d7..3ad46d26d9d51 100644
1731 +--- a/drivers/ata/ahci_mvebu.c
1732 ++++ b/drivers/ata/ahci_mvebu.c
1733 +@@ -227,7 +227,7 @@ static const struct ahci_mvebu_plat_data ahci_mvebu_armada_380_plat_data = {
1734 +
1735 + static const struct ahci_mvebu_plat_data ahci_mvebu_armada_3700_plat_data = {
1736 + .plat_config = ahci_mvebu_armada_3700_config,
1737 +- .flags = AHCI_HFLAG_SUSPEND_PHYS,
1738 ++ .flags = AHCI_HFLAG_SUSPEND_PHYS | AHCI_HFLAG_IGN_NOTSUPP_POWER_ON,
1739 + };
1740 +
1741 + static const struct of_device_id ahci_mvebu_of_match[] = {
1742 +diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
1743 +index 129556fcf6be7..a1cbb894e5f0a 100644
1744 +--- a/drivers/ata/libahci_platform.c
1745 ++++ b/drivers/ata/libahci_platform.c
1746 +@@ -59,7 +59,7 @@ int ahci_platform_enable_phys(struct ahci_host_priv *hpriv)
1747 + }
1748 +
1749 + rc = phy_power_on(hpriv->phys[i]);
1750 +- if (rc) {
1751 ++ if (rc && !(rc == -EOPNOTSUPP && (hpriv->flags & AHCI_HFLAG_IGN_NOTSUPP_POWER_ON))) {
1752 + phy_exit(hpriv->phys[i]);
1753 + goto disable_phys;
1754 + }
1755 +diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
1756 +index 141ac600b64c8..44b0ed8f6bb8a 100644
1757 +--- a/drivers/ata/sata_rcar.c
1758 ++++ b/drivers/ata/sata_rcar.c
1759 +@@ -120,7 +120,7 @@
1760 + /* Descriptor table word 0 bit (when DTA32M = 1) */
1761 + #define SATA_RCAR_DTEND BIT(0)
1762 +
1763 +-#define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFEUL
1764 ++#define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFFUL
1765 +
1766 + /* Gen2 Physical Layer Control Registers */
1767 + #define RCAR_GEN2_PHY_CTL1_REG 0x1704
1768 +diff --git a/drivers/base/firmware_loader/fallback_platform.c b/drivers/base/firmware_loader/fallback_platform.c
1769 +index 685edb7dd05a7..6958ab1a80593 100644
1770 +--- a/drivers/base/firmware_loader/fallback_platform.c
1771 ++++ b/drivers/base/firmware_loader/fallback_platform.c
1772 +@@ -17,7 +17,7 @@ int firmware_fallback_platform(struct fw_priv *fw_priv, u32 opt_flags)
1773 + if (!(opt_flags & FW_OPT_FALLBACK_PLATFORM))
1774 + return -ENOENT;
1775 +
1776 +- rc = security_kernel_load_data(LOADING_FIRMWARE_EFI_EMBEDDED);
1777 ++ rc = security_kernel_load_data(LOADING_FIRMWARE);
1778 + if (rc)
1779 + return rc;
1780 +
1781 +diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c
1782 +index bad8e90ba168d..62fbc7df022bc 100644
1783 +--- a/drivers/crypto/chelsio/chtls/chtls_cm.c
1784 ++++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
1785 +@@ -772,14 +772,13 @@ static int chtls_pass_open_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
1786 + if (rpl->status != CPL_ERR_NONE) {
1787 + pr_info("Unexpected PASS_OPEN_RPL status %u for STID %u\n",
1788 + rpl->status, stid);
1789 +- return CPL_RET_BUF_DONE;
1790 ++ } else {
1791 ++ cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
1792 ++ sock_put(listen_ctx->lsk);
1793 ++ kfree(listen_ctx);
1794 ++ module_put(THIS_MODULE);
1795 + }
1796 +- cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
1797 +- sock_put(listen_ctx->lsk);
1798 +- kfree(listen_ctx);
1799 +- module_put(THIS_MODULE);
1800 +-
1801 +- return 0;
1802 ++ return CPL_RET_BUF_DONE;
1803 + }
1804 +
1805 + static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
1806 +@@ -796,15 +795,13 @@ static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
1807 + if (rpl->status != CPL_ERR_NONE) {
1808 + pr_info("Unexpected CLOSE_LISTSRV_RPL status %u for STID %u\n",
1809 + rpl->status, stid);
1810 +- return CPL_RET_BUF_DONE;
1811 ++ } else {
1812 ++ cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
1813 ++ sock_put(listen_ctx->lsk);
1814 ++ kfree(listen_ctx);
1815 ++ module_put(THIS_MODULE);
1816 + }
1817 +-
1818 +- cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
1819 +- sock_put(listen_ctx->lsk);
1820 +- kfree(listen_ctx);
1821 +- module_put(THIS_MODULE);
1822 +-
1823 +- return 0;
1824 ++ return CPL_RET_BUF_DONE;
1825 + }
1826 +
1827 + static void chtls_purge_wr_queue(struct sock *sk)
1828 +@@ -1513,7 +1510,6 @@ static void add_to_reap_list(struct sock *sk)
1829 + struct chtls_sock *csk = sk->sk_user_data;
1830 +
1831 + local_bh_disable();
1832 +- bh_lock_sock(sk);
1833 + release_tcp_port(sk); /* release the port immediately */
1834 +
1835 + spin_lock(&reap_list_lock);
1836 +@@ -1522,7 +1518,6 @@ static void add_to_reap_list(struct sock *sk)
1837 + if (!csk->passive_reap_next)
1838 + schedule_work(&reap_task);
1839 + spin_unlock(&reap_list_lock);
1840 +- bh_unlock_sock(sk);
1841 + local_bh_enable();
1842 + }
1843 +
1844 +diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c
1845 +index 9fb5ca6682ea2..188d871f6b8cd 100644
1846 +--- a/drivers/crypto/chelsio/chtls/chtls_io.c
1847 ++++ b/drivers/crypto/chelsio/chtls/chtls_io.c
1848 +@@ -1585,6 +1585,7 @@ skip_copy:
1849 + tp->urg_data = 0;
1850 +
1851 + if ((avail + offset) >= skb->len) {
1852 ++ struct sk_buff *next_skb;
1853 + if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
1854 + tp->copied_seq += skb->len;
1855 + hws->rcvpld = skb->hdr_len;
1856 +@@ -1595,8 +1596,10 @@ skip_copy:
1857 + chtls_free_skb(sk, skb);
1858 + buffers_freed++;
1859 + hws->copied_seq = 0;
1860 +- if (copied >= target &&
1861 +- !skb_peek(&sk->sk_receive_queue))
1862 ++ next_skb = skb_peek(&sk->sk_receive_queue);
1863 ++ if (copied >= target && !next_skb)
1864 ++ break;
1865 ++ if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR)
1866 + break;
1867 + }
1868 + } while (len > 0);
1869 +diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
1870 +index e5bfac79e5ac9..04f5d79d42653 100644
1871 +--- a/drivers/firmware/efi/libstub/arm64-stub.c
1872 ++++ b/drivers/firmware/efi/libstub/arm64-stub.c
1873 +@@ -62,10 +62,12 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
1874 + status = efi_get_random_bytes(sizeof(phys_seed),
1875 + (u8 *)&phys_seed);
1876 + if (status == EFI_NOT_FOUND) {
1877 +- efi_info("EFI_RNG_PROTOCOL unavailable, no randomness supplied\n");
1878 ++ efi_info("EFI_RNG_PROTOCOL unavailable, KASLR will be disabled\n");
1879 ++ efi_nokaslr = true;
1880 + } else if (status != EFI_SUCCESS) {
1881 +- efi_err("efi_get_random_bytes() failed\n");
1882 +- return status;
1883 ++ efi_err("efi_get_random_bytes() failed (0x%lx), KASLR will be disabled\n",
1884 ++ status);
1885 ++ efi_nokaslr = true;
1886 + }
1887 + } else {
1888 + efi_info("KASLR disabled on kernel command line\n");
1889 +diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
1890 +index 11ecf3c4640eb..368cd60000eec 100644
1891 +--- a/drivers/firmware/efi/libstub/fdt.c
1892 ++++ b/drivers/firmware/efi/libstub/fdt.c
1893 +@@ -136,7 +136,7 @@ static efi_status_t update_fdt(void *orig_fdt, unsigned long orig_fdt_size,
1894 + if (status)
1895 + goto fdt_set_fail;
1896 +
1897 +- if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
1898 ++ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) {
1899 + efi_status_t efi_status;
1900 +
1901 + efi_status = efi_get_random_bytes(sizeof(fdt_val64),
1902 +@@ -145,8 +145,6 @@ static efi_status_t update_fdt(void *orig_fdt, unsigned long orig_fdt_size,
1903 + status = fdt_setprop_var(fdt, node, "kaslr-seed", fdt_val64);
1904 + if (status)
1905 + goto fdt_set_fail;
1906 +- } else if (efi_status != EFI_NOT_FOUND) {
1907 +- return efi_status;
1908 + }
1909 + }
1910 +
1911 +diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
1912 +index e7532e7d74e91..0e1f11669b072 100644
1913 +--- a/drivers/gpu/drm/i915/i915_debugfs.c
1914 ++++ b/drivers/gpu/drm/i915/i915_debugfs.c
1915 +@@ -323,6 +323,7 @@ static void print_context_stats(struct seq_file *m,
1916 + }
1917 + i915_gem_context_unlock_engines(ctx);
1918 +
1919 ++ mutex_lock(&ctx->mutex);
1920 + if (!IS_ERR_OR_NULL(ctx->file_priv)) {
1921 + struct file_stats stats = {
1922 + .vm = rcu_access_pointer(ctx->vm),
1923 +@@ -343,6 +344,7 @@ static void print_context_stats(struct seq_file *m,
1924 +
1925 + print_file_stats(m, name, stats);
1926 + }
1927 ++ mutex_unlock(&ctx->mutex);
1928 +
1929 + spin_lock(&i915->gem.contexts.lock);
1930 + list_safe_reset_next(ctx, cn, link);
1931 +diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
1932 +index 3a98439bba832..0abce004a9591 100644
1933 +--- a/drivers/infiniband/core/addr.c
1934 ++++ b/drivers/infiniband/core/addr.c
1935 +@@ -647,13 +647,12 @@ static void process_one_req(struct work_struct *_work)
1936 + req->callback = NULL;
1937 +
1938 + spin_lock_bh(&lock);
1939 ++ /*
1940 ++ * Although the work will normally have been canceled by the workqueue,
1941 ++ * it can still be requeued as long as it is on the req_list.
1942 ++ */
1943 ++ cancel_delayed_work(&req->work);
1944 + if (!list_empty(&req->list)) {
1945 +- /*
1946 +- * Although the work will normally have been canceled by the
1947 +- * workqueue, it can still be requeued as long as it is on the
1948 +- * req_list.
1949 +- */
1950 +- cancel_delayed_work(&req->work);
1951 + list_del_init(&req->list);
1952 + kfree(req);
1953 + }
1954 +diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
1955 +index 1533419f18758..de467a1303db3 100644
1956 +--- a/drivers/md/dm-writecache.c
1957 ++++ b/drivers/md/dm-writecache.c
1958 +@@ -49,7 +49,7 @@ do { \
1959 + #define pmem_assign(dest, src) ((dest) = (src))
1960 + #endif
1961 +
1962 +-#if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && defined(DM_WRITECACHE_HAS_PMEM)
1963 ++#if IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && defined(DM_WRITECACHE_HAS_PMEM)
1964 + #define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
1965 + #endif
1966 +
1967 +@@ -992,7 +992,8 @@ static void writecache_resume(struct dm_target *ti)
1968 + }
1969 + wc->freelist_size = 0;
1970 +
1971 +- r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t));
1972 ++ r = copy_mc_to_kernel(&sb_seq_count, &sb(wc)->seq_count,
1973 ++ sizeof(uint64_t));
1974 + if (r) {
1975 + writecache_error(wc, r, "hardware memory error when reading superblock: %d", r);
1976 + sb_seq_count = cpu_to_le64(0);
1977 +@@ -1008,7 +1009,8 @@ static void writecache_resume(struct dm_target *ti)
1978 + e->seq_count = -1;
1979 + continue;
1980 + }
1981 +- r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry));
1982 ++ r = copy_mc_to_kernel(&wme, memory_entry(wc, e),
1983 ++ sizeof(struct wc_memory_entry));
1984 + if (r) {
1985 + writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d",
1986 + (unsigned long)b, r);
1987 +@@ -1206,7 +1208,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data
1988 +
1989 + if (rw == READ) {
1990 + int r;
1991 +- r = memcpy_mcsafe(buf, data, size);
1992 ++ r = copy_mc_to_kernel(buf, data, size);
1993 + flush_dcache_page(bio_page(bio));
1994 + if (unlikely(r)) {
1995 + writecache_error(wc, r, "hardware memory error when reading data: %d", r);
1996 +@@ -2349,7 +2351,7 @@ invalid_optional:
1997 + }
1998 + }
1999 +
2000 +- r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
2001 ++ r = copy_mc_to_kernel(&s, sb(wc), sizeof(struct wc_memory_superblock));
2002 + if (r) {
2003 + ti->error = "Hardware memory error when reading superblock";
2004 + goto bad;
2005 +@@ -2360,7 +2362,8 @@ invalid_optional:
2006 + ti->error = "Unable to initialize device";
2007 + goto bad;
2008 + }
2009 +- r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
2010 ++ r = copy_mc_to_kernel(&s, sb(wc),
2011 ++ sizeof(struct wc_memory_superblock));
2012 + if (r) {
2013 + ti->error = "Hardware memory error when reading superblock";
2014 + goto bad;
2015 +diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c
2016 +index 82246f7aec6fb..e39b118b945f8 100644
2017 +--- a/drivers/misc/cardreader/rtsx_pcr.c
2018 ++++ b/drivers/misc/cardreader/rtsx_pcr.c
2019 +@@ -1172,10 +1172,6 @@ void rtsx_pci_init_ocp(struct rtsx_pcr *pcr)
2020 + rtsx_pci_write_register(pcr, REG_OCPGLITCH,
2021 + SD_OCP_GLITCH_MASK, pcr->hw_param.ocp_glitch);
2022 + rtsx_pci_enable_ocp(pcr);
2023 +- } else {
2024 +- /* OC power down */
2025 +- rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN,
2026 +- OC_POWER_DOWN);
2027 + }
2028 + }
2029 + }
2030 +diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
2031 +index 25a9dd9c0c1b5..2ba899f5659ff 100644
2032 +--- a/drivers/misc/cxl/pci.c
2033 ++++ b/drivers/misc/cxl/pci.c
2034 +@@ -393,8 +393,8 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
2035 + *capp_unit_id = get_capp_unit_id(np, *phb_index);
2036 + of_node_put(np);
2037 + if (!*capp_unit_id) {
2038 +- pr_err("cxl: invalid capp unit id (phb_index: %d)\n",
2039 +- *phb_index);
2040 ++ pr_err("cxl: No capp unit found for PHB[%lld,%d]. Make sure the adapter is on a capi-compatible slot\n",
2041 ++ *chipid, *phb_index);
2042 + return -ENODEV;
2043 + }
2044 +
2045 +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
2046 +index dd07db656a5c3..f3c125d50d7a0 100644
2047 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
2048 ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
2049 +@@ -1158,16 +1158,6 @@ static void bnxt_queue_sp_work(struct bnxt *bp)
2050 + schedule_work(&bp->sp_task);
2051 + }
2052 +
2053 +-static void bnxt_cancel_sp_work(struct bnxt *bp)
2054 +-{
2055 +- if (BNXT_PF(bp)) {
2056 +- flush_workqueue(bnxt_pf_wq);
2057 +- } else {
2058 +- cancel_work_sync(&bp->sp_task);
2059 +- cancel_delayed_work_sync(&bp->fw_reset_task);
2060 +- }
2061 +-}
2062 +-
2063 + static void bnxt_sched_reset(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
2064 + {
2065 + if (!rxr->bnapi->in_reset) {
2066 +@@ -4198,7 +4188,8 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
2067 + u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM;
2068 + u16 dst = BNXT_HWRM_CHNL_CHIMP;
2069 +
2070 +- if (BNXT_NO_FW_ACCESS(bp))
2071 ++ if (BNXT_NO_FW_ACCESS(bp) &&
2072 ++ le16_to_cpu(req->req_type) != HWRM_FUNC_RESET)
2073 + return -EBUSY;
2074 +
2075 + if (msg_len > BNXT_HWRM_MAX_REQ_LEN) {
2076 +@@ -9247,7 +9238,10 @@ int bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
2077 + {
2078 + int rc = 0;
2079 +
2080 +- rc = __bnxt_open_nic(bp, irq_re_init, link_re_init);
2081 ++ if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state))
2082 ++ rc = -EIO;
2083 ++ if (!rc)
2084 ++ rc = __bnxt_open_nic(bp, irq_re_init, link_re_init);
2085 + if (rc) {
2086 + netdev_err(bp->dev, "nic open fail (rc: %x)\n", rc);
2087 + dev_close(bp->dev);
2088 +@@ -11505,15 +11499,17 @@ static void bnxt_remove_one(struct pci_dev *pdev)
2089 + if (BNXT_PF(bp))
2090 + bnxt_sriov_disable(bp);
2091 +
2092 +- clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
2093 +- bnxt_cancel_sp_work(bp);
2094 +- bp->sp_event = 0;
2095 +-
2096 +- bnxt_dl_fw_reporters_destroy(bp, true);
2097 + if (BNXT_PF(bp))
2098 + devlink_port_type_clear(&bp->dl_port);
2099 + pci_disable_pcie_error_reporting(pdev);
2100 + unregister_netdev(dev);
2101 ++ clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
2102 ++ /* Flush any pending tasks */
2103 ++ cancel_work_sync(&bp->sp_task);
2104 ++ cancel_delayed_work_sync(&bp->fw_reset_task);
2105 ++ bp->sp_event = 0;
2106 ++
2107 ++ bnxt_dl_fw_reporters_destroy(bp, true);
2108 + bnxt_dl_unregister(bp);
2109 + bnxt_shutdown_tc(bp);
2110 +
2111 +@@ -12238,6 +12234,9 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
2112 + return PCI_ERS_RESULT_DISCONNECT;
2113 + }
2114 +
2115 ++ if (state == pci_channel_io_frozen)
2116 ++ set_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state);
2117 ++
2118 + if (netif_running(netdev))
2119 + bnxt_close(netdev);
2120 +
2121 +@@ -12264,7 +12263,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
2122 + {
2123 + struct net_device *netdev = pci_get_drvdata(pdev);
2124 + struct bnxt *bp = netdev_priv(netdev);
2125 +- int err = 0;
2126 ++ int err = 0, off;
2127 + pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT;
2128 +
2129 + netdev_info(bp->dev, "PCI Slot Reset\n");
2130 +@@ -12276,6 +12275,20 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
2131 + "Cannot re-enable PCI device after reset.\n");
2132 + } else {
2133 + pci_set_master(pdev);
2134 ++ /* Upon fatal error, our device internal logic that latches to
2135 ++ * BAR value is getting reset and will restore only upon
2136 ++ * rewritting the BARs.
2137 ++ *
2138 ++ * As pci_restore_state() does not re-write the BARs if the
2139 ++ * value is same as saved value earlier, driver needs to
2140 ++ * write the BARs to 0 to force restore, in case of fatal error.
2141 ++ */
2142 ++ if (test_and_clear_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN,
2143 ++ &bp->state)) {
2144 ++ for (off = PCI_BASE_ADDRESS_0;
2145 ++ off <= PCI_BASE_ADDRESS_5; off += 4)
2146 ++ pci_write_config_dword(bp->pdev, off, 0);
2147 ++ }
2148 + pci_restore_state(pdev);
2149 + pci_save_state(pdev);
2150 +
2151 +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
2152 +index 440b43c8068f1..a80ac2ae57a68 100644
2153 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
2154 ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
2155 +@@ -1672,6 +1672,7 @@ struct bnxt {
2156 + #define BNXT_STATE_ABORT_ERR 5
2157 + #define BNXT_STATE_FW_FATAL_COND 6
2158 + #define BNXT_STATE_DRV_REGISTERED 7
2159 ++#define BNXT_STATE_PCI_CHANNEL_IO_FROZEN 8
2160 +
2161 + #define BNXT_NO_FW_ACCESS(bp) \
2162 + (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \
2163 +diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
2164 +index ff0d82e2535da..fd33c888046b9 100644
2165 +--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
2166 ++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
2167 +@@ -145,13 +145,13 @@ static int configure_filter_smac(struct adapter *adap, struct filter_entry *f)
2168 + int err;
2169 +
2170 + /* do a set-tcb for smac-sel and CWR bit.. */
2171 +- err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1);
2172 +- if (err)
2173 +- goto smac_err;
2174 +-
2175 + err = set_tcb_field(adap, f, f->tid, TCB_SMAC_SEL_W,
2176 + TCB_SMAC_SEL_V(TCB_SMAC_SEL_M),
2177 + TCB_SMAC_SEL_V(f->smt->idx), 1);
2178 ++ if (err)
2179 ++ goto smac_err;
2180 ++
2181 ++ err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1);
2182 + if (!err)
2183 + return 0;
2184 +
2185 +@@ -865,6 +865,7 @@ int set_filter_wr(struct adapter *adapter, int fidx)
2186 + FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
2187 + FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
2188 + FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
2189 ++ FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
2190 + FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
2191 + f->fs.newvlan == VLAN_REWRITE) |
2192 + FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
2193 +@@ -882,7 +883,7 @@ int set_filter_wr(struct adapter *adapter, int fidx)
2194 + FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
2195 + FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
2196 + FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
2197 +- fwr->smac_sel = 0;
2198 ++ fwr->smac_sel = f->smt->idx;
2199 + fwr->rx_chan_rx_rpl_iq =
2200 + htons(FW_FILTER_WR_RX_CHAN_V(0) |
2201 + FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
2202 +@@ -1321,11 +1322,8 @@ static void mk_act_open_req6(struct filter_entry *f, struct sk_buff *skb,
2203 + TX_QUEUE_V(f->fs.nat_mode) |
2204 + T5_OPT_2_VALID_F |
2205 + RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
2206 +- CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
2207 +- (f->fs.dirsteer << 1)) |
2208 + PACE_V((f->fs.maskhash) |
2209 +- ((f->fs.dirsteerhash) << 1)) |
2210 +- CCTRL_ECN_V(f->fs.action == FILTER_SWITCH));
2211 ++ ((f->fs.dirsteerhash) << 1)));
2212 + }
2213 +
2214 + static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb,
2215 +@@ -1361,11 +1359,8 @@ static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb,
2216 + TX_QUEUE_V(f->fs.nat_mode) |
2217 + T5_OPT_2_VALID_F |
2218 + RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
2219 +- CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
2220 +- (f->fs.dirsteer << 1)) |
2221 + PACE_V((f->fs.maskhash) |
2222 +- ((f->fs.dirsteerhash) << 1)) |
2223 +- CCTRL_ECN_V(f->fs.action == FILTER_SWITCH));
2224 ++ ((f->fs.dirsteerhash) << 1)));
2225 + }
2226 +
2227 + static int cxgb4_set_hash_filter(struct net_device *dev,
2228 +@@ -2037,6 +2032,20 @@ void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl)
2229 + }
2230 + return;
2231 + }
2232 ++ switch (f->fs.action) {
2233 ++ case FILTER_PASS:
2234 ++ if (f->fs.dirsteer)
2235 ++ set_tcb_tflag(adap, f, tid,
2236 ++ TF_DIRECT_STEER_S, 1, 1);
2237 ++ break;
2238 ++ case FILTER_DROP:
2239 ++ set_tcb_tflag(adap, f, tid, TF_DROP_S, 1, 1);
2240 ++ break;
2241 ++ case FILTER_SWITCH:
2242 ++ set_tcb_tflag(adap, f, tid, TF_LPBK_S, 1, 1);
2243 ++ break;
2244 ++ }
2245 ++
2246 + break;
2247 +
2248 + default:
2249 +@@ -2104,22 +2113,11 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
2250 + if (ctx)
2251 + ctx->result = 0;
2252 + } else if (ret == FW_FILTER_WR_FLT_ADDED) {
2253 +- int err = 0;
2254 +-
2255 +- if (f->fs.newsmac)
2256 +- err = configure_filter_smac(adap, f);
2257 +-
2258 +- if (!err) {
2259 +- f->pending = 0; /* async setup completed */
2260 +- f->valid = 1;
2261 +- if (ctx) {
2262 +- ctx->result = 0;
2263 +- ctx->tid = idx;
2264 +- }
2265 +- } else {
2266 +- clear_filter(adap, f);
2267 +- if (ctx)
2268 +- ctx->result = err;
2269 ++ f->pending = 0; /* async setup completed */
2270 ++ f->valid = 1;
2271 ++ if (ctx) {
2272 ++ ctx->result = 0;
2273 ++ ctx->tid = idx;
2274 + }
2275 + } else {
2276 + /* Something went wrong. Issue a warning about the
2277 +diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
2278 +index 50232e063f49e..92473dda55d9f 100644
2279 +--- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
2280 ++++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
2281 +@@ -50,6 +50,10 @@
2282 + #define TCB_T_FLAGS_M 0xffffffffffffffffULL
2283 + #define TCB_T_FLAGS_V(x) ((__u64)(x) << TCB_T_FLAGS_S)
2284 +
2285 ++#define TF_DROP_S 22
2286 ++#define TF_DIRECT_STEER_S 23
2287 ++#define TF_LPBK_S 59
2288 ++
2289 + #define TF_CCTRL_ECE_S 60
2290 + #define TF_CCTRL_CWR_S 61
2291 + #define TF_CCTRL_RFR_S 62
2292 +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
2293 +index 9162856de1b19..ab15f1c588b3a 100644
2294 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
2295 ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
2296 +@@ -3146,8 +3146,8 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
2297 + hclgevf_uninit_msi(hdev);
2298 + }
2299 +
2300 +- hclgevf_pci_uninit(hdev);
2301 + hclgevf_cmd_uninit(hdev);
2302 ++ hclgevf_pci_uninit(hdev);
2303 + hclgevf_uninit_mac_list(hdev);
2304 + }
2305 +
2306 +diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
2307 +index 7ef3369953b6a..c3ec9ceed833e 100644
2308 +--- a/drivers/net/ethernet/ibm/ibmveth.c
2309 ++++ b/drivers/net/ethernet/ibm/ibmveth.c
2310 +@@ -1031,12 +1031,6 @@ static int ibmveth_is_packet_unsupported(struct sk_buff *skb,
2311 + ret = -EOPNOTSUPP;
2312 + }
2313 +
2314 +- if (!ether_addr_equal(ether_header->h_source, netdev->dev_addr)) {
2315 +- netdev_dbg(netdev, "source packet MAC address does not match veth device's, dropping packet.\n");
2316 +- netdev->stats.tx_dropped++;
2317 +- ret = -EOPNOTSUPP;
2318 +- }
2319 +-
2320 + return ret;
2321 + }
2322 +
2323 +diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
2324 +index 3e0aab04d86fb..f96bb3dab5a8b 100644
2325 +--- a/drivers/net/ethernet/ibm/ibmvnic.c
2326 ++++ b/drivers/net/ethernet/ibm/ibmvnic.c
2327 +@@ -1828,9 +1828,13 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
2328 + int rc;
2329 +
2330 + rc = 0;
2331 +- ether_addr_copy(adapter->mac_addr, addr->sa_data);
2332 +- if (adapter->state != VNIC_PROBED)
2333 ++ if (!is_valid_ether_addr(addr->sa_data))
2334 ++ return -EADDRNOTAVAIL;
2335 ++
2336 ++ if (adapter->state != VNIC_PROBED) {
2337 ++ ether_addr_copy(adapter->mac_addr, addr->sa_data);
2338 + rc = __ibmvnic_set_mac(netdev, addr->sa_data);
2339 ++ }
2340 +
2341 + return rc;
2342 + }
2343 +diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
2344 +index 71b6185b49042..42726fdf5a3af 100644
2345 +--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
2346 ++++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
2347 +@@ -1483,6 +1483,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
2348 + if (!reload)
2349 + devlink_resources_unregister(devlink, NULL);
2350 + mlxsw_core->bus->fini(mlxsw_core->bus_priv);
2351 ++ if (!reload)
2352 ++ devlink_free(devlink);
2353 +
2354 + return;
2355 +
2356 +diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
2357 +index b1feef473b746..ed89e669ddd5b 100644
2358 +--- a/drivers/net/ethernet/realtek/r8169_main.c
2359 ++++ b/drivers/net/ethernet/realtek/r8169_main.c
2360 +@@ -4559,7 +4559,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
2361 + }
2362 +
2363 + rtl_irq_disable(tp);
2364 +- napi_schedule_irqoff(&tp->napi);
2365 ++ napi_schedule(&tp->napi);
2366 + out:
2367 + rtl_ack_events(tp, status);
2368 +
2369 +@@ -4727,7 +4727,7 @@ static int rtl_open(struct net_device *dev)
2370 + rtl_request_firmware(tp);
2371 +
2372 + retval = request_irq(pci_irq_vector(pdev, 0), rtl8169_interrupt,
2373 +- IRQF_NO_THREAD | IRQF_SHARED, dev->name, tp);
2374 ++ IRQF_SHARED, dev->name, tp);
2375 + if (retval < 0)
2376 + goto err_release_fw_2;
2377 +
2378 +diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
2379 +index 99f7aae102ce1..6c58ba186b2cb 100644
2380 +--- a/drivers/net/ethernet/renesas/ravb_main.c
2381 ++++ b/drivers/net/ethernet/renesas/ravb_main.c
2382 +@@ -1747,12 +1747,16 @@ static int ravb_hwtstamp_get(struct net_device *ndev, struct ifreq *req)
2383 + config.flags = 0;
2384 + config.tx_type = priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON :
2385 + HWTSTAMP_TX_OFF;
2386 +- if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_V2_L2_EVENT)
2387 ++ switch (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE) {
2388 ++ case RAVB_RXTSTAMP_TYPE_V2_L2_EVENT:
2389 + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
2390 +- else if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_ALL)
2391 ++ break;
2392 ++ case RAVB_RXTSTAMP_TYPE_ALL:
2393 + config.rx_filter = HWTSTAMP_FILTER_ALL;
2394 +- else
2395 ++ break;
2396 ++ default:
2397 + config.rx_filter = HWTSTAMP_FILTER_NONE;
2398 ++ }
2399 +
2400 + return copy_to_user(req->ifr_data, &config, sizeof(config)) ?
2401 + -EFAULT : 0;
2402 +diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
2403 +index 8e47d0112e5dc..10f910f8cbe52 100644
2404 +--- a/drivers/net/gtp.c
2405 ++++ b/drivers/net/gtp.c
2406 +@@ -663,10 +663,6 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
2407 +
2408 + gtp = netdev_priv(dev);
2409 +
2410 +- err = gtp_encap_enable(gtp, data);
2411 +- if (err < 0)
2412 +- return err;
2413 +-
2414 + if (!data[IFLA_GTP_PDP_HASHSIZE]) {
2415 + hashsize = 1024;
2416 + } else {
2417 +@@ -677,12 +673,16 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
2418 +
2419 + err = gtp_hashtable_new(gtp, hashsize);
2420 + if (err < 0)
2421 +- goto out_encap;
2422 ++ return err;
2423 ++
2424 ++ err = gtp_encap_enable(gtp, data);
2425 ++ if (err < 0)
2426 ++ goto out_hashtable;
2427 +
2428 + err = register_netdevice(dev);
2429 + if (err < 0) {
2430 + netdev_dbg(dev, "failed to register new netdev %d\n", err);
2431 +- goto out_hashtable;
2432 ++ goto out_encap;
2433 + }
2434 +
2435 + gn = net_generic(dev_net(dev), gtp_net_id);
2436 +@@ -693,11 +693,11 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
2437 +
2438 + return 0;
2439 +
2440 ++out_encap:
2441 ++ gtp_encap_disable(gtp);
2442 + out_hashtable:
2443 + kfree(gtp->addr_hash);
2444 + kfree(gtp->tid_hash);
2445 +-out_encap:
2446 +- gtp_encap_disable(gtp);
2447 + return err;
2448 + }
2449 +
2450 +diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c
2451 +index bdbfeed359db3..41e9af35a5820 100644
2452 +--- a/drivers/net/ipa/gsi_trans.c
2453 ++++ b/drivers/net/ipa/gsi_trans.c
2454 +@@ -398,15 +398,24 @@ void gsi_trans_cmd_add(struct gsi_trans *trans, void *buf, u32 size,
2455 +
2456 + /* assert(which < trans->tre_count); */
2457 +
2458 +- /* Set the page information for the buffer. We also need to fill in
2459 +- * the DMA address and length for the buffer (something dma_map_sg()
2460 +- * normally does).
2461 ++ /* Commands are quite different from data transfer requests.
2462 ++ * Their payloads come from a pool whose memory is allocated
2463 ++ * using dma_alloc_coherent(). We therefore do *not* map them
2464 ++ * for DMA (unlike what we do for pages and skbs).
2465 ++ *
2466 ++ * When a transaction completes, the SGL is normally unmapped.
2467 ++ * A command transaction has direction DMA_NONE, which tells
2468 ++ * gsi_trans_complete() to skip the unmapping step.
2469 ++ *
2470 ++ * The only things we use directly in a command scatter/gather
2471 ++ * entry are the DMA address and length. We still need the SG
2472 ++ * table flags to be maintained though, so assign a NULL page
2473 ++ * pointer for that purpose.
2474 + */
2475 + sg = &trans->sgl[which];
2476 +-
2477 +- sg_set_buf(sg, buf, size);
2478 ++ sg_assign_page(sg, NULL);
2479 + sg_dma_address(sg) = addr;
2480 +- sg_dma_len(sg) = sg->length;
2481 ++ sg_dma_len(sg) = size;
2482 +
2483 + info = &trans->info[which];
2484 + info->opcode = opcode;
2485 +diff --git a/drivers/net/wireless/intersil/p54/p54pci.c b/drivers/net/wireless/intersil/p54/p54pci.c
2486 +index 80ad0b7eaef43..f8c6027cab6b4 100644
2487 +--- a/drivers/net/wireless/intersil/p54/p54pci.c
2488 ++++ b/drivers/net/wireless/intersil/p54/p54pci.c
2489 +@@ -329,10 +329,12 @@ static void p54p_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
2490 + struct p54p_desc *desc;
2491 + dma_addr_t mapping;
2492 + u32 idx, i;
2493 ++ __le32 device_addr;
2494 +
2495 + spin_lock_irqsave(&priv->lock, flags);
2496 + idx = le32_to_cpu(ring_control->host_idx[1]);
2497 + i = idx % ARRAY_SIZE(ring_control->tx_data);
2498 ++ device_addr = ((struct p54_hdr *)skb->data)->req_id;
2499 +
2500 + mapping = pci_map_single(priv->pdev, skb->data, skb->len,
2501 + PCI_DMA_TODEVICE);
2502 +@@ -346,7 +348,7 @@ static void p54p_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
2503 +
2504 + desc = &ring_control->tx_data[i];
2505 + desc->host_addr = cpu_to_le32(mapping);
2506 +- desc->device_addr = ((struct p54_hdr *)skb->data)->req_id;
2507 ++ desc->device_addr = device_addr;
2508 + desc->len = cpu_to_le16(skb->len);
2509 + desc->flags = 0;
2510 +
2511 +diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
2512 +index 45964acba9443..22d865ba6353d 100644
2513 +--- a/drivers/nvdimm/claim.c
2514 ++++ b/drivers/nvdimm/claim.c
2515 +@@ -268,7 +268,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
2516 + if (rw == READ) {
2517 + if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align)))
2518 + return -EIO;
2519 +- if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0)
2520 ++ if (copy_mc_to_kernel(buf, nsio->addr + offset, size) != 0)
2521 + return -EIO;
2522 + return 0;
2523 + }
2524 +diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
2525 +index d25e66fd942dd..5a4f588605caf 100644
2526 +--- a/drivers/nvdimm/pmem.c
2527 ++++ b/drivers/nvdimm/pmem.c
2528 +@@ -125,7 +125,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
2529 + while (len) {
2530 + mem = kmap_atomic(page);
2531 + chunk = min_t(unsigned int, len, PAGE_SIZE - off);
2532 +- rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
2533 ++ rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk);
2534 + kunmap_atomic(mem);
2535 + if (rem)
2536 + return BLK_STS_IOERR;
2537 +@@ -305,7 +305,7 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
2538 +
2539 + /*
2540 + * Use the 'no check' versions of copy_from_iter_flushcache() and
2541 +- * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds
2542 ++ * copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds
2543 + * checking, both file offset and device offset, is handled by
2544 + * dax_iomap_actor()
2545 + */
2546 +@@ -318,7 +318,7 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
2547 + static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
2548 + void *addr, size_t bytes, struct iov_iter *i)
2549 + {
2550 +- return _copy_to_iter_mcsafe(addr, bytes, i);
2551 ++ return _copy_mc_to_iter(addr, bytes, i);
2552 + }
2553 +
2554 + static const struct dax_operations pmem_dax_ops = {
2555 +diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
2556 +index d5f58684d962c..c79326e699e82 100644
2557 +--- a/drivers/pci/controller/pci-aardvark.c
2558 ++++ b/drivers/pci/controller/pci-aardvark.c
2559 +@@ -1068,7 +1068,9 @@ static int advk_pcie_enable_phy(struct advk_pcie *pcie)
2560 + }
2561 +
2562 + ret = phy_power_on(pcie->phy);
2563 +- if (ret) {
2564 ++ if (ret == -EOPNOTSUPP) {
2565 ++ dev_warn(&pcie->pdev->dev, "PHY unsupported by firmware\n");
2566 ++ } else if (ret) {
2567 + phy_exit(pcie->phy);
2568 + return ret;
2569 + }
2570 +diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
2571 +index 1a138be8bd6a0..810f25a476321 100644
2572 +--- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
2573 ++++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
2574 +@@ -26,7 +26,6 @@
2575 + #define COMPHY_SIP_POWER_ON 0x82000001
2576 + #define COMPHY_SIP_POWER_OFF 0x82000002
2577 + #define COMPHY_SIP_PLL_LOCK 0x82000003
2578 +-#define COMPHY_FW_NOT_SUPPORTED (-1)
2579 +
2580 + #define COMPHY_FW_MODE_SATA 0x1
2581 + #define COMPHY_FW_MODE_SGMII 0x2
2582 +@@ -112,10 +111,19 @@ static int mvebu_a3700_comphy_smc(unsigned long function, unsigned long lane,
2583 + unsigned long mode)
2584 + {
2585 + struct arm_smccc_res res;
2586 ++ s32 ret;
2587 +
2588 + arm_smccc_smc(function, lane, mode, 0, 0, 0, 0, 0, &res);
2589 ++ ret = res.a0;
2590 +
2591 +- return res.a0;
2592 ++ switch (ret) {
2593 ++ case SMCCC_RET_SUCCESS:
2594 ++ return 0;
2595 ++ case SMCCC_RET_NOT_SUPPORTED:
2596 ++ return -EOPNOTSUPP;
2597 ++ default:
2598 ++ return -EINVAL;
2599 ++ }
2600 + }
2601 +
2602 + static int mvebu_a3700_comphy_get_fw_mode(int lane, int port,
2603 +@@ -220,7 +228,7 @@ static int mvebu_a3700_comphy_power_on(struct phy *phy)
2604 + }
2605 +
2606 + ret = mvebu_a3700_comphy_smc(COMPHY_SIP_POWER_ON, lane->id, fw_param);
2607 +- if (ret == COMPHY_FW_NOT_SUPPORTED)
2608 ++ if (ret == -EOPNOTSUPP)
2609 + dev_err(lane->dev,
2610 + "unsupported SMC call, try updating your firmware\n");
2611 +
2612 +diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
2613 +index e41367f36ee1c..53ad127b100fe 100644
2614 +--- a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
2615 ++++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
2616 +@@ -123,7 +123,6 @@
2617 +
2618 + #define COMPHY_SIP_POWER_ON 0x82000001
2619 + #define COMPHY_SIP_POWER_OFF 0x82000002
2620 +-#define COMPHY_FW_NOT_SUPPORTED (-1)
2621 +
2622 + /*
2623 + * A lane is described by the following bitfields:
2624 +@@ -273,10 +272,19 @@ static int mvebu_comphy_smc(unsigned long function, unsigned long phys,
2625 + unsigned long lane, unsigned long mode)
2626 + {
2627 + struct arm_smccc_res res;
2628 ++ s32 ret;
2629 +
2630 + arm_smccc_smc(function, phys, lane, mode, 0, 0, 0, 0, &res);
2631 ++ ret = res.a0;
2632 +
2633 +- return res.a0;
2634 ++ switch (ret) {
2635 ++ case SMCCC_RET_SUCCESS:
2636 ++ return 0;
2637 ++ case SMCCC_RET_NOT_SUPPORTED:
2638 ++ return -EOPNOTSUPP;
2639 ++ default:
2640 ++ return -EINVAL;
2641 ++ }
2642 + }
2643 +
2644 + static int mvebu_comphy_get_mode(bool fw_mode, int lane, int port,
2645 +@@ -819,7 +827,7 @@ static int mvebu_comphy_power_on(struct phy *phy)
2646 + if (!ret)
2647 + return ret;
2648 +
2649 +- if (ret == COMPHY_FW_NOT_SUPPORTED)
2650 ++ if (ret == -EOPNOTSUPP)
2651 + dev_err(priv->dev,
2652 + "unsupported SMC call, try updating your firmware\n");
2653 +
2654 +diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
2655 +index a8d1edcf252c7..64e801a3a0206 100644
2656 +--- a/drivers/tty/serial/amba-pl011.c
2657 ++++ b/drivers/tty/serial/amba-pl011.c
2658 +@@ -308,8 +308,9 @@ static void pl011_write(unsigned int val, const struct uart_amba_port *uap,
2659 + */
2660 + static int pl011_fifo_to_tty(struct uart_amba_port *uap)
2661 + {
2662 +- u16 status;
2663 + unsigned int ch, flag, fifotaken;
2664 ++ int sysrq;
2665 ++ u16 status;
2666 +
2667 + for (fifotaken = 0; fifotaken != 256; fifotaken++) {
2668 + status = pl011_read(uap, REG_FR);
2669 +@@ -344,10 +345,12 @@ static int pl011_fifo_to_tty(struct uart_amba_port *uap)
2670 + flag = TTY_FRAME;
2671 + }
2672 +
2673 +- if (uart_handle_sysrq_char(&uap->port, ch & 255))
2674 +- continue;
2675 ++ spin_unlock(&uap->port.lock);
2676 ++ sysrq = uart_handle_sysrq_char(&uap->port, ch & 255);
2677 ++ spin_lock(&uap->port.lock);
2678 +
2679 +- uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag);
2680 ++ if (!sysrq)
2681 ++ uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag);
2682 + }
2683 +
2684 + return fifotaken;
2685 +diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
2686 +index ffdf6da016c21..2bb800ca5f0ca 100644
2687 +--- a/drivers/tty/serial/qcom_geni_serial.c
2688 ++++ b/drivers/tty/serial/qcom_geni_serial.c
2689 +@@ -954,7 +954,7 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport,
2690 + sampling_rate = UART_OVERSAMPLING;
2691 + /* Sampling rate is halved for IP versions >= 2.5 */
2692 + ver = geni_se_get_qup_hw_version(&port->se);
2693 +- if (GENI_SE_VERSION_MAJOR(ver) >= 2 && GENI_SE_VERSION_MINOR(ver) >= 5)
2694 ++ if (ver >= QUP_SE_VERSION_2_5)
2695 + sampling_rate /= 2;
2696 +
2697 + clk_rate = get_clk_div_rate(baud, sampling_rate, &clk_div);
2698 +diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
2699 +index 64a9025a87bee..1f32db7b72b2c 100644
2700 +--- a/drivers/xen/gntdev.c
2701 ++++ b/drivers/xen/gntdev.c
2702 +@@ -720,17 +720,18 @@ struct gntdev_copy_batch {
2703 + s16 __user *status[GNTDEV_COPY_BATCH];
2704 + unsigned int nr_ops;
2705 + unsigned int nr_pages;
2706 ++ bool writeable;
2707 + };
2708 +
2709 + static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt,
2710 +- bool writeable, unsigned long *gfn)
2711 ++ unsigned long *gfn)
2712 + {
2713 + unsigned long addr = (unsigned long)virt;
2714 + struct page *page;
2715 + unsigned long xen_pfn;
2716 + int ret;
2717 +
2718 +- ret = get_user_pages_fast(addr, 1, writeable ? FOLL_WRITE : 0, &page);
2719 ++ ret = get_user_pages_fast(addr, 1, batch->writeable ? FOLL_WRITE : 0, &page);
2720 + if (ret < 0)
2721 + return ret;
2722 +
2723 +@@ -746,9 +747,13 @@ static void gntdev_put_pages(struct gntdev_copy_batch *batch)
2724 + {
2725 + unsigned int i;
2726 +
2727 +- for (i = 0; i < batch->nr_pages; i++)
2728 ++ for (i = 0; i < batch->nr_pages; i++) {
2729 ++ if (batch->writeable && !PageDirty(batch->pages[i]))
2730 ++ set_page_dirty_lock(batch->pages[i]);
2731 + put_page(batch->pages[i]);
2732 ++ }
2733 + batch->nr_pages = 0;
2734 ++ batch->writeable = false;
2735 + }
2736 +
2737 + static int gntdev_copy(struct gntdev_copy_batch *batch)
2738 +@@ -837,8 +842,9 @@ static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch,
2739 + virt = seg->source.virt + copied;
2740 + off = (unsigned long)virt & ~XEN_PAGE_MASK;
2741 + len = min(len, (size_t)XEN_PAGE_SIZE - off);
2742 ++ batch->writeable = false;
2743 +
2744 +- ret = gntdev_get_page(batch, virt, false, &gfn);
2745 ++ ret = gntdev_get_page(batch, virt, &gfn);
2746 + if (ret < 0)
2747 + return ret;
2748 +
2749 +@@ -856,8 +862,9 @@ static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch,
2750 + virt = seg->dest.virt + copied;
2751 + off = (unsigned long)virt & ~XEN_PAGE_MASK;
2752 + len = min(len, (size_t)XEN_PAGE_SIZE - off);
2753 ++ batch->writeable = true;
2754 +
2755 +- ret = gntdev_get_page(batch, virt, true, &gfn);
2756 ++ ret = gntdev_get_page(batch, virt, &gfn);
2757 + if (ret < 0)
2758 + return ret;
2759 +
2760 +diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
2761 +index 28bb5689333a5..15880a68faadc 100644
2762 +--- a/fs/efivarfs/super.c
2763 ++++ b/fs/efivarfs/super.c
2764 +@@ -141,6 +141,9 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
2765 +
2766 + name[len + EFI_VARIABLE_GUID_LEN+1] = '\0';
2767 +
2768 ++ /* replace invalid slashes like kobject_set_name_vargs does for /sys/firmware/efi/vars. */
2769 ++ strreplace(name, '/', '!');
2770 ++
2771 + inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0,
2772 + is_removable);
2773 + if (!inode)
2774 +diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
2775 +index 87e437e7b34f2..f86e3247febc1 100644
2776 +--- a/fs/erofs/xattr.c
2777 ++++ b/fs/erofs/xattr.c
2778 +@@ -473,8 +473,6 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler,
2779 + return -EOPNOTSUPP;
2780 + break;
2781 + case EROFS_XATTR_INDEX_TRUSTED:
2782 +- if (!capable(CAP_SYS_ADMIN))
2783 +- return -EPERM;
2784 + break;
2785 + case EROFS_XATTR_INDEX_SECURITY:
2786 + break;
2787 +diff --git a/fs/exec.c b/fs/exec.c
2788 +index e6e8a9a703278..78976a3260c6a 100644
2789 +--- a/fs/exec.c
2790 ++++ b/fs/exec.c
2791 +@@ -62,6 +62,7 @@
2792 + #include <linux/oom.h>
2793 + #include <linux/compat.h>
2794 + #include <linux/vmalloc.h>
2795 ++#include <linux/io_uring.h>
2796 +
2797 + #include <linux/uaccess.h>
2798 + #include <asm/mmu_context.h>
2799 +@@ -1847,6 +1848,11 @@ static int __do_execve_file(int fd, struct filename *filename,
2800 + * further execve() calls fail. */
2801 + current->flags &= ~PF_NPROC_EXCEEDED;
2802 +
2803 ++ /*
2804 ++ * Cancel any io_uring activity across execve
2805 ++ */
2806 ++ io_uring_task_cancel();
2807 ++
2808 + retval = unshare_files(&displaced);
2809 + if (retval)
2810 + goto out_ret;
2811 +diff --git a/fs/file.c b/fs/file.c
2812 +index abb8b7081d7a4..8e2c532bb02e3 100644
2813 +--- a/fs/file.c
2814 ++++ b/fs/file.c
2815 +@@ -18,6 +18,7 @@
2816 + #include <linux/bitops.h>
2817 + #include <linux/spinlock.h>
2818 + #include <linux/rcupdate.h>
2819 ++#include <linux/io_uring.h>
2820 +
2821 + unsigned int sysctl_nr_open __read_mostly = 1024*1024;
2822 + unsigned int sysctl_nr_open_min = BITS_PER_LONG;
2823 +@@ -439,6 +440,7 @@ void exit_files(struct task_struct *tsk)
2824 + struct files_struct * files = tsk->files;
2825 +
2826 + if (files) {
2827 ++ io_uring_files_cancel(files);
2828 + task_lock(tsk);
2829 + tsk->files = NULL;
2830 + task_unlock(tsk);
2831 +diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
2832 +index 02b3c36b36766..5078a6ca7dfcd 100644
2833 +--- a/fs/fuse/dev.c
2834 ++++ b/fs/fuse/dev.c
2835 +@@ -785,15 +785,16 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
2836 + struct page *newpage;
2837 + struct pipe_buffer *buf = cs->pipebufs;
2838 +
2839 ++ get_page(oldpage);
2840 + err = unlock_request(cs->req);
2841 + if (err)
2842 +- return err;
2843 ++ goto out_put_old;
2844 +
2845 + fuse_copy_finish(cs);
2846 +
2847 + err = pipe_buf_confirm(cs->pipe, buf);
2848 + if (err)
2849 +- return err;
2850 ++ goto out_put_old;
2851 +
2852 + BUG_ON(!cs->nr_segs);
2853 + cs->currbuf = buf;
2854 +@@ -833,7 +834,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
2855 + err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
2856 + if (err) {
2857 + unlock_page(newpage);
2858 +- return err;
2859 ++ goto out_put_old;
2860 + }
2861 +
2862 + get_page(newpage);
2863 +@@ -852,14 +853,19 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
2864 + if (err) {
2865 + unlock_page(newpage);
2866 + put_page(newpage);
2867 +- return err;
2868 ++ goto out_put_old;
2869 + }
2870 +
2871 + unlock_page(oldpage);
2872 ++ /* Drop ref for ap->pages[] array */
2873 + put_page(oldpage);
2874 + cs->len = 0;
2875 +
2876 +- return 0;
2877 ++ err = 0;
2878 ++out_put_old:
2879 ++ /* Drop ref obtained in this function */
2880 ++ put_page(oldpage);
2881 ++ return err;
2882 +
2883 + out_fallback_unlock:
2884 + unlock_page(newpage);
2885 +@@ -868,10 +874,10 @@ out_fallback:
2886 + cs->offset = buf->offset;
2887 +
2888 + err = lock_request(cs->req);
2889 +- if (err)
2890 +- return err;
2891 ++ if (!err)
2892 ++ err = 1;
2893 +
2894 +- return 1;
2895 ++ goto out_put_old;
2896 + }
2897 +
2898 + static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
2899 +@@ -883,14 +889,16 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
2900 + if (cs->nr_segs >= cs->pipe->max_usage)
2901 + return -EIO;
2902 +
2903 ++ get_page(page);
2904 + err = unlock_request(cs->req);
2905 +- if (err)
2906 ++ if (err) {
2907 ++ put_page(page);
2908 + return err;
2909 ++ }
2910 +
2911 + fuse_copy_finish(cs);
2912 +
2913 + buf = cs->pipebufs;
2914 +- get_page(page);
2915 + buf->page = page;
2916 + buf->offset = offset;
2917 + buf->len = count;
2918 +diff --git a/fs/io-wq.c b/fs/io-wq.c
2919 +index cb9e5a444fba7..56a229621a831 100644
2920 +--- a/fs/io-wq.c
2921 ++++ b/fs/io-wq.c
2922 +@@ -60,6 +60,7 @@ struct io_worker {
2923 + const struct cred *cur_creds;
2924 + const struct cred *saved_creds;
2925 + struct files_struct *restore_files;
2926 ++ struct nsproxy *restore_nsproxy;
2927 + struct fs_struct *restore_fs;
2928 + };
2929 +
2930 +@@ -87,7 +88,7 @@ enum {
2931 + */
2932 + struct io_wqe {
2933 + struct {
2934 +- spinlock_t lock;
2935 ++ raw_spinlock_t lock;
2936 + struct io_wq_work_list work_list;
2937 + unsigned long hash_map;
2938 + unsigned flags;
2939 +@@ -148,11 +149,12 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
2940 +
2941 + if (current->files != worker->restore_files) {
2942 + __acquire(&wqe->lock);
2943 +- spin_unlock_irq(&wqe->lock);
2944 ++ raw_spin_unlock_irq(&wqe->lock);
2945 + dropped_lock = true;
2946 +
2947 + task_lock(current);
2948 + current->files = worker->restore_files;
2949 ++ current->nsproxy = worker->restore_nsproxy;
2950 + task_unlock(current);
2951 + }
2952 +
2953 +@@ -166,7 +168,7 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
2954 + if (worker->mm) {
2955 + if (!dropped_lock) {
2956 + __acquire(&wqe->lock);
2957 +- spin_unlock_irq(&wqe->lock);
2958 ++ raw_spin_unlock_irq(&wqe->lock);
2959 + dropped_lock = true;
2960 + }
2961 + __set_current_state(TASK_RUNNING);
2962 +@@ -200,7 +202,6 @@ static void io_worker_exit(struct io_worker *worker)
2963 + {
2964 + struct io_wqe *wqe = worker->wqe;
2965 + struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
2966 +- unsigned nr_workers;
2967 +
2968 + /*
2969 + * If we're not at zero, someone else is holding a brief reference
2970 +@@ -220,23 +221,19 @@ static void io_worker_exit(struct io_worker *worker)
2971 + worker->flags = 0;
2972 + preempt_enable();
2973 +
2974 +- spin_lock_irq(&wqe->lock);
2975 ++ raw_spin_lock_irq(&wqe->lock);
2976 + hlist_nulls_del_rcu(&worker->nulls_node);
2977 + list_del_rcu(&worker->all_list);
2978 + if (__io_worker_unuse(wqe, worker)) {
2979 + __release(&wqe->lock);
2980 +- spin_lock_irq(&wqe->lock);
2981 ++ raw_spin_lock_irq(&wqe->lock);
2982 + }
2983 + acct->nr_workers--;
2984 +- nr_workers = wqe->acct[IO_WQ_ACCT_BOUND].nr_workers +
2985 +- wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers;
2986 +- spin_unlock_irq(&wqe->lock);
2987 +-
2988 +- /* all workers gone, wq exit can proceed */
2989 +- if (!nr_workers && refcount_dec_and_test(&wqe->wq->refs))
2990 +- complete(&wqe->wq->done);
2991 ++ raw_spin_unlock_irq(&wqe->lock);
2992 +
2993 + kfree_rcu(worker, rcu);
2994 ++ if (refcount_dec_and_test(&wqe->wq->refs))
2995 ++ complete(&wqe->wq->done);
2996 + }
2997 +
2998 + static inline bool io_wqe_run_queue(struct io_wqe *wqe)
2999 +@@ -318,6 +315,7 @@ static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
3000 +
3001 + worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
3002 + worker->restore_files = current->files;
3003 ++ worker->restore_nsproxy = current->nsproxy;
3004 + worker->restore_fs = current->fs;
3005 + io_wqe_inc_running(wqe, worker);
3006 + }
3007 +@@ -454,6 +452,7 @@ static void io_impersonate_work(struct io_worker *worker,
3008 + if (work->files && current->files != work->files) {
3009 + task_lock(current);
3010 + current->files = work->files;
3011 ++ current->nsproxy = work->nsproxy;
3012 + task_unlock(current);
3013 + }
3014 + if (work->fs && current->fs != work->fs)
3015 +@@ -504,7 +503,7 @@ get_next:
3016 + else if (!wq_list_empty(&wqe->work_list))
3017 + wqe->flags |= IO_WQE_FLAG_STALLED;
3018 +
3019 +- spin_unlock_irq(&wqe->lock);
3020 ++ raw_spin_unlock_irq(&wqe->lock);
3021 + if (!work)
3022 + break;
3023 + io_assign_current_work(worker, work);
3024 +@@ -539,7 +538,7 @@ get_next:
3025 + io_wqe_enqueue(wqe, linked);
3026 +
3027 + if (hash != -1U && !next_hashed) {
3028 +- spin_lock_irq(&wqe->lock);
3029 ++ raw_spin_lock_irq(&wqe->lock);
3030 + wqe->hash_map &= ~BIT_ULL(hash);
3031 + wqe->flags &= ~IO_WQE_FLAG_STALLED;
3032 + /* dependent work is not hashed */
3033 +@@ -547,11 +546,11 @@ get_next:
3034 + /* skip unnecessary unlock-lock wqe->lock */
3035 + if (!work)
3036 + goto get_next;
3037 +- spin_unlock_irq(&wqe->lock);
3038 ++ raw_spin_unlock_irq(&wqe->lock);
3039 + }
3040 + } while (work);
3041 +
3042 +- spin_lock_irq(&wqe->lock);
3043 ++ raw_spin_lock_irq(&wqe->lock);
3044 + } while (1);
3045 + }
3046 +
3047 +@@ -566,7 +565,7 @@ static int io_wqe_worker(void *data)
3048 + while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
3049 + set_current_state(TASK_INTERRUPTIBLE);
3050 + loop:
3051 +- spin_lock_irq(&wqe->lock);
3052 ++ raw_spin_lock_irq(&wqe->lock);
3053 + if (io_wqe_run_queue(wqe)) {
3054 + __set_current_state(TASK_RUNNING);
3055 + io_worker_handle_work(worker);
3056 +@@ -577,7 +576,7 @@ loop:
3057 + __release(&wqe->lock);
3058 + goto loop;
3059 + }
3060 +- spin_unlock_irq(&wqe->lock);
3061 ++ raw_spin_unlock_irq(&wqe->lock);
3062 + if (signal_pending(current))
3063 + flush_signals(current);
3064 + if (schedule_timeout(WORKER_IDLE_TIMEOUT))
3065 +@@ -589,11 +588,11 @@ loop:
3066 + }
3067 +
3068 + if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
3069 +- spin_lock_irq(&wqe->lock);
3070 ++ raw_spin_lock_irq(&wqe->lock);
3071 + if (!wq_list_empty(&wqe->work_list))
3072 + io_worker_handle_work(worker);
3073 + else
3074 +- spin_unlock_irq(&wqe->lock);
3075 ++ raw_spin_unlock_irq(&wqe->lock);
3076 + }
3077 +
3078 + io_worker_exit(worker);
3079 +@@ -633,14 +632,14 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
3080 +
3081 + worker->flags &= ~IO_WORKER_F_RUNNING;
3082 +
3083 +- spin_lock_irq(&wqe->lock);
3084 ++ raw_spin_lock_irq(&wqe->lock);
3085 + io_wqe_dec_running(wqe, worker);
3086 +- spin_unlock_irq(&wqe->lock);
3087 ++ raw_spin_unlock_irq(&wqe->lock);
3088 + }
3089 +
3090 + static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
3091 + {
3092 +- struct io_wqe_acct *acct =&wqe->acct[index];
3093 ++ struct io_wqe_acct *acct = &wqe->acct[index];
3094 + struct io_worker *worker;
3095 +
3096 + worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
3097 +@@ -659,7 +658,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
3098 + return false;
3099 + }
3100 +
3101 +- spin_lock_irq(&wqe->lock);
3102 ++ raw_spin_lock_irq(&wqe->lock);
3103 + hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
3104 + list_add_tail_rcu(&worker->all_list, &wqe->all_list);
3105 + worker->flags |= IO_WORKER_F_FREE;
3106 +@@ -668,11 +667,12 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
3107 + if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
3108 + worker->flags |= IO_WORKER_F_FIXED;
3109 + acct->nr_workers++;
3110 +- spin_unlock_irq(&wqe->lock);
3111 ++ raw_spin_unlock_irq(&wqe->lock);
3112 +
3113 + if (index == IO_WQ_ACCT_UNBOUND)
3114 + atomic_inc(&wq->user->processes);
3115 +
3116 ++ refcount_inc(&wq->refs);
3117 + wake_up_process(worker->task);
3118 + return true;
3119 + }
3120 +@@ -688,28 +688,63 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
3121 + return acct->nr_workers < acct->max_workers;
3122 + }
3123 +
3124 ++static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
3125 ++{
3126 ++ send_sig(SIGINT, worker->task, 1);
3127 ++ return false;
3128 ++}
3129 ++
3130 ++/*
3131 ++ * Iterate the passed in list and call the specific function for each
3132 ++ * worker that isn't exiting
3133 ++ */
3134 ++static bool io_wq_for_each_worker(struct io_wqe *wqe,
3135 ++ bool (*func)(struct io_worker *, void *),
3136 ++ void *data)
3137 ++{
3138 ++ struct io_worker *worker;
3139 ++ bool ret = false;
3140 ++
3141 ++ list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
3142 ++ if (io_worker_get(worker)) {
3143 ++ /* no task if node is/was offline */
3144 ++ if (worker->task)
3145 ++ ret = func(worker, data);
3146 ++ io_worker_release(worker);
3147 ++ if (ret)
3148 ++ break;
3149 ++ }
3150 ++ }
3151 ++
3152 ++ return ret;
3153 ++}
3154 ++
3155 ++static bool io_wq_worker_wake(struct io_worker *worker, void *data)
3156 ++{
3157 ++ wake_up_process(worker->task);
3158 ++ return false;
3159 ++}
3160 ++
3161 + /*
3162 + * Manager thread. Tasked with creating new workers, if we need them.
3163 + */
3164 + static int io_wq_manager(void *data)
3165 + {
3166 + struct io_wq *wq = data;
3167 +- int workers_to_create = num_possible_nodes();
3168 + int node;
3169 +
3170 + /* create fixed workers */
3171 +- refcount_set(&wq->refs, workers_to_create);
3172 ++ refcount_set(&wq->refs, 1);
3173 + for_each_node(node) {
3174 + if (!node_online(node))
3175 + continue;
3176 +- if (!create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
3177 +- goto err;
3178 +- workers_to_create--;
3179 ++ if (create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
3180 ++ continue;
3181 ++ set_bit(IO_WQ_BIT_ERROR, &wq->state);
3182 ++ set_bit(IO_WQ_BIT_EXIT, &wq->state);
3183 ++ goto out;
3184 + }
3185 +
3186 +- while (workers_to_create--)
3187 +- refcount_dec(&wq->refs);
3188 +-
3189 + complete(&wq->done);
3190 +
3191 + while (!kthread_should_stop()) {
3192 +@@ -723,12 +758,12 @@ static int io_wq_manager(void *data)
3193 + if (!node_online(node))
3194 + continue;
3195 +
3196 +- spin_lock_irq(&wqe->lock);
3197 ++ raw_spin_lock_irq(&wqe->lock);
3198 + if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
3199 + fork_worker[IO_WQ_ACCT_BOUND] = true;
3200 + if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND))
3201 + fork_worker[IO_WQ_ACCT_UNBOUND] = true;
3202 +- spin_unlock_irq(&wqe->lock);
3203 ++ raw_spin_unlock_irq(&wqe->lock);
3204 + if (fork_worker[IO_WQ_ACCT_BOUND])
3205 + create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND);
3206 + if (fork_worker[IO_WQ_ACCT_UNBOUND])
3207 +@@ -741,12 +776,18 @@ static int io_wq_manager(void *data)
3208 + if (current->task_works)
3209 + task_work_run();
3210 +
3211 +- return 0;
3212 +-err:
3213 +- set_bit(IO_WQ_BIT_ERROR, &wq->state);
3214 +- set_bit(IO_WQ_BIT_EXIT, &wq->state);
3215 +- if (refcount_sub_and_test(workers_to_create, &wq->refs))
3216 ++out:
3217 ++ if (refcount_dec_and_test(&wq->refs)) {
3218 + complete(&wq->done);
3219 ++ return 0;
3220 ++ }
3221 ++ /* if ERROR is set and we get here, we have workers to wake */
3222 ++ if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
3223 ++ rcu_read_lock();
3224 ++ for_each_node(node)
3225 ++ io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
3226 ++ rcu_read_unlock();
3227 ++ }
3228 + return 0;
3229 + }
3230 +
3231 +@@ -825,10 +866,10 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
3232 + }
3233 +
3234 + work_flags = work->flags;
3235 +- spin_lock_irqsave(&wqe->lock, flags);
3236 ++ raw_spin_lock_irqsave(&wqe->lock, flags);
3237 + io_wqe_insert_work(wqe, work);
3238 + wqe->flags &= ~IO_WQE_FLAG_STALLED;
3239 +- spin_unlock_irqrestore(&wqe->lock, flags);
3240 ++ raw_spin_unlock_irqrestore(&wqe->lock, flags);
3241 +
3242 + if ((work_flags & IO_WQ_WORK_CONCURRENT) ||
3243 + !atomic_read(&acct->nr_running))
3244 +@@ -854,37 +895,6 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
3245 + work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
3246 + }
3247 +
3248 +-static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
3249 +-{
3250 +- send_sig(SIGINT, worker->task, 1);
3251 +- return false;
3252 +-}
3253 +-
3254 +-/*
3255 +- * Iterate the passed in list and call the specific function for each
3256 +- * worker that isn't exiting
3257 +- */
3258 +-static bool io_wq_for_each_worker(struct io_wqe *wqe,
3259 +- bool (*func)(struct io_worker *, void *),
3260 +- void *data)
3261 +-{
3262 +- struct io_worker *worker;
3263 +- bool ret = false;
3264 +-
3265 +- list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
3266 +- if (io_worker_get(worker)) {
3267 +- /* no task if node is/was offline */
3268 +- if (worker->task)
3269 +- ret = func(worker, data);
3270 +- io_worker_release(worker);
3271 +- if (ret)
3272 +- break;
3273 +- }
3274 +- }
3275 +-
3276 +- return ret;
3277 +-}
3278 +-
3279 + void io_wq_cancel_all(struct io_wq *wq)
3280 + {
3281 + int node;
3282 +@@ -955,13 +965,13 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
3283 + unsigned long flags;
3284 +
3285 + retry:
3286 +- spin_lock_irqsave(&wqe->lock, flags);
3287 ++ raw_spin_lock_irqsave(&wqe->lock, flags);
3288 + wq_list_for_each(node, prev, &wqe->work_list) {
3289 + work = container_of(node, struct io_wq_work, list);
3290 + if (!match->fn(work, match->data))
3291 + continue;
3292 + io_wqe_remove_pending(wqe, work, prev);
3293 +- spin_unlock_irqrestore(&wqe->lock, flags);
3294 ++ raw_spin_unlock_irqrestore(&wqe->lock, flags);
3295 + io_run_cancel(work, wqe);
3296 + match->nr_pending++;
3297 + if (!match->cancel_all)
3298 +@@ -970,7 +980,7 @@ retry:
3299 + /* not safe to continue after unlock */
3300 + goto retry;
3301 + }
3302 +- spin_unlock_irqrestore(&wqe->lock, flags);
3303 ++ raw_spin_unlock_irqrestore(&wqe->lock, flags);
3304 + }
3305 +
3306 + static void io_wqe_cancel_running_work(struct io_wqe *wqe,
3307 +@@ -1078,7 +1088,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
3308 + }
3309 + atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
3310 + wqe->wq = wq;
3311 +- spin_lock_init(&wqe->lock);
3312 ++ raw_spin_lock_init(&wqe->lock);
3313 + INIT_WQ_LIST(&wqe->work_list);
3314 + INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
3315 + INIT_LIST_HEAD(&wqe->all_list);
3316 +@@ -1117,12 +1127,6 @@ bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
3317 + return refcount_inc_not_zero(&wq->use_refs);
3318 + }
3319 +
3320 +-static bool io_wq_worker_wake(struct io_worker *worker, void *data)
3321 +-{
3322 +- wake_up_process(worker->task);
3323 +- return false;
3324 +-}
3325 +-
3326 + static void __io_wq_destroy(struct io_wq *wq)
3327 + {
3328 + int node;
3329 +diff --git a/fs/io-wq.h b/fs/io-wq.h
3330 +index 071f1a9978002..9be6def2b5a6f 100644
3331 +--- a/fs/io-wq.h
3332 ++++ b/fs/io-wq.h
3333 +@@ -88,6 +88,7 @@ struct io_wq_work {
3334 + struct files_struct *files;
3335 + struct mm_struct *mm;
3336 + const struct cred *creds;
3337 ++ struct nsproxy *nsproxy;
3338 + struct fs_struct *fs;
3339 + unsigned flags;
3340 + };
3341 +diff --git a/fs/io_uring.c b/fs/io_uring.c
3342 +index d2bb2ae9551f0..8e9c58fa76362 100644
3343 +--- a/fs/io_uring.c
3344 ++++ b/fs/io_uring.c
3345 +@@ -78,6 +78,7 @@
3346 + #include <linux/fs_struct.h>
3347 + #include <linux/splice.h>
3348 + #include <linux/task_work.h>
3349 ++#include <linux/io_uring.h>
3350 +
3351 + #define CREATE_TRACE_POINTS
3352 + #include <trace/events/io_uring.h>
3353 +@@ -264,7 +265,16 @@ struct io_ring_ctx {
3354 + /* IO offload */
3355 + struct io_wq *io_wq;
3356 + struct task_struct *sqo_thread; /* if using sq thread polling */
3357 +- struct mm_struct *sqo_mm;
3358 ++
3359 ++ /*
3360 ++ * For SQPOLL usage - we hold a reference to the parent task, so we
3361 ++ * have access to the ->files
3362 ++ */
3363 ++ struct task_struct *sqo_task;
3364 ++
3365 ++ /* Only used for accounting purposes */
3366 ++ struct mm_struct *mm_account;
3367 ++
3368 + wait_queue_head_t sqo_wait;
3369 +
3370 + /*
3371 +@@ -274,8 +284,6 @@ struct io_ring_ctx {
3372 + */
3373 + struct fixed_file_data *file_data;
3374 + unsigned nr_user_files;
3375 +- int ring_fd;
3376 +- struct file *ring_file;
3377 +
3378 + /* if used, fixed mapped user buffers */
3379 + unsigned nr_user_bufs;
3380 +@@ -541,7 +549,6 @@ enum {
3381 + REQ_F_NO_FILE_TABLE_BIT,
3382 + REQ_F_QUEUE_TIMEOUT_BIT,
3383 + REQ_F_WORK_INITIALIZED_BIT,
3384 +- REQ_F_TASK_PINNED_BIT,
3385 +
3386 + /* not a real bit, just to check we're not overflowing the space */
3387 + __REQ_F_LAST_BIT,
3388 +@@ -599,8 +606,6 @@ enum {
3389 + REQ_F_QUEUE_TIMEOUT = BIT(REQ_F_QUEUE_TIMEOUT_BIT),
3390 + /* io_wq_work is initialized */
3391 + REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
3392 +- /* req->task is refcounted */
3393 +- REQ_F_TASK_PINNED = BIT(REQ_F_TASK_PINNED_BIT),
3394 + };
3395 +
3396 + struct async_poll {
3397 +@@ -915,21 +920,6 @@ struct sock *io_uring_get_socket(struct file *file)
3398 + }
3399 + EXPORT_SYMBOL(io_uring_get_socket);
3400 +
3401 +-static void io_get_req_task(struct io_kiocb *req)
3402 +-{
3403 +- if (req->flags & REQ_F_TASK_PINNED)
3404 +- return;
3405 +- get_task_struct(req->task);
3406 +- req->flags |= REQ_F_TASK_PINNED;
3407 +-}
3408 +-
3409 +-/* not idempotent -- it doesn't clear REQ_F_TASK_PINNED */
3410 +-static void __io_put_req_task(struct io_kiocb *req)
3411 +-{
3412 +- if (req->flags & REQ_F_TASK_PINNED)
3413 +- put_task_struct(req->task);
3414 +-}
3415 +-
3416 + static void io_file_put_work(struct work_struct *work);
3417 +
3418 + /*
3419 +@@ -1141,14 +1131,34 @@ static void io_kill_timeout(struct io_kiocb *req)
3420 + }
3421 + }
3422 +
3423 +-static void io_kill_timeouts(struct io_ring_ctx *ctx)
3424 ++static bool io_task_match(struct io_kiocb *req, struct task_struct *tsk)
3425 ++{
3426 ++ struct io_ring_ctx *ctx = req->ctx;
3427 ++
3428 ++ if (!tsk || req->task == tsk)
3429 ++ return true;
3430 ++ if ((ctx->flags & IORING_SETUP_SQPOLL) && req->task == ctx->sqo_thread)
3431 ++ return true;
3432 ++ return false;
3433 ++}
3434 ++
3435 ++/*
3436 ++ * Returns true if we found and killed one or more timeouts
3437 ++ */
3438 ++static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk)
3439 + {
3440 + struct io_kiocb *req, *tmp;
3441 ++ int canceled = 0;
3442 +
3443 + spin_lock_irq(&ctx->completion_lock);
3444 +- list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list)
3445 +- io_kill_timeout(req);
3446 ++ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list) {
3447 ++ if (io_task_match(req, tsk)) {
3448 ++ io_kill_timeout(req);
3449 ++ canceled++;
3450 ++ }
3451 ++ }
3452 + spin_unlock_irq(&ctx->completion_lock);
3453 ++ return canceled != 0;
3454 + }
3455 +
3456 + static void __io_queue_deferred(struct io_ring_ctx *ctx)
3457 +@@ -1229,12 +1239,24 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
3458 + eventfd_signal(ctx->cq_ev_fd, 1);
3459 + }
3460 +
3461 ++static inline bool io_match_files(struct io_kiocb *req,
3462 ++ struct files_struct *files)
3463 ++{
3464 ++ if (!files)
3465 ++ return true;
3466 ++ if (req->flags & REQ_F_WORK_INITIALIZED)
3467 ++ return req->work.files == files;
3468 ++ return false;
3469 ++}
3470 ++
3471 + /* Returns true if there are no backlogged entries after the flush */
3472 +-static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
3473 ++static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
3474 ++ struct task_struct *tsk,
3475 ++ struct files_struct *files)
3476 + {
3477 + struct io_rings *rings = ctx->rings;
3478 ++ struct io_kiocb *req, *tmp;
3479 + struct io_uring_cqe *cqe;
3480 +- struct io_kiocb *req;
3481 + unsigned long flags;
3482 + LIST_HEAD(list);
3483 +
3484 +@@ -1253,7 +1275,12 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
3485 + ctx->cq_overflow_flushed = 1;
3486 +
3487 + cqe = NULL;
3488 +- while (!list_empty(&ctx->cq_overflow_list)) {
3489 ++ list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, list) {
3490 ++ if (tsk && req->task != tsk)
3491 ++ continue;
3492 ++ if (!io_match_files(req, files))
3493 ++ continue;
3494 ++
3495 + cqe = io_get_cqring(ctx);
3496 + if (!cqe && !force)
3497 + break;
3498 +@@ -1307,7 +1334,12 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
3499 + WRITE_ONCE(cqe->user_data, req->user_data);
3500 + WRITE_ONCE(cqe->res, res);
3501 + WRITE_ONCE(cqe->flags, cflags);
3502 +- } else if (ctx->cq_overflow_flushed) {
3503 ++ } else if (ctx->cq_overflow_flushed || req->task->io_uring->in_idle) {
3504 ++ /*
3505 ++ * If we're in ring overflow flush mode, or in task cancel mode,
3506 ++ * then we cannot store the request for later flushing, we need
3507 ++ * to drop it on the floor.
3508 ++ */
3509 + WRITE_ONCE(ctx->rings->cq_overflow,
3510 + atomic_inc_return(&ctx->cached_cq_overflow));
3511 + } else {
3512 +@@ -1412,15 +1444,35 @@ static inline void io_put_file(struct io_kiocb *req, struct file *file,
3513 + fput(file);
3514 + }
3515 +
3516 ++static void io_req_drop_files(struct io_kiocb *req)
3517 ++{
3518 ++ struct io_ring_ctx *ctx = req->ctx;
3519 ++ unsigned long flags;
3520 ++
3521 ++ spin_lock_irqsave(&ctx->inflight_lock, flags);
3522 ++ list_del(&req->inflight_entry);
3523 ++ if (waitqueue_active(&ctx->inflight_wait))
3524 ++ wake_up(&ctx->inflight_wait);
3525 ++ spin_unlock_irqrestore(&ctx->inflight_lock, flags);
3526 ++ req->flags &= ~REQ_F_INFLIGHT;
3527 ++ put_files_struct(req->work.files);
3528 ++ put_nsproxy(req->work.nsproxy);
3529 ++ req->work.files = NULL;
3530 ++}
3531 ++
3532 + static void __io_req_aux_free(struct io_kiocb *req)
3533 + {
3534 ++ struct io_uring_task *tctx = req->task->io_uring;
3535 + if (req->flags & REQ_F_NEED_CLEANUP)
3536 + io_cleanup_req(req);
3537 +
3538 + kfree(req->io);
3539 + if (req->file)
3540 + io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
3541 +- __io_put_req_task(req);
3542 ++ atomic_long_inc(&tctx->req_complete);
3543 ++ if (tctx->in_idle)
3544 ++ wake_up(&tctx->wait);
3545 ++ put_task_struct(req->task);
3546 + io_req_work_drop_env(req);
3547 + }
3548 +
3549 +@@ -1428,16 +1480,8 @@ static void __io_free_req(struct io_kiocb *req)
3550 + {
3551 + __io_req_aux_free(req);
3552 +
3553 +- if (req->flags & REQ_F_INFLIGHT) {
3554 +- struct io_ring_ctx *ctx = req->ctx;
3555 +- unsigned long flags;
3556 +-
3557 +- spin_lock_irqsave(&ctx->inflight_lock, flags);
3558 +- list_del(&req->inflight_entry);
3559 +- if (waitqueue_active(&ctx->inflight_wait))
3560 +- wake_up(&ctx->inflight_wait);
3561 +- spin_unlock_irqrestore(&ctx->inflight_lock, flags);
3562 +- }
3563 ++ if (req->flags & REQ_F_INFLIGHT)
3564 ++ io_req_drop_files(req);
3565 +
3566 + percpu_ref_put(&req->ctx->refs);
3567 + if (likely(!io_is_fallback_req(req)))
3568 +@@ -1717,7 +1761,7 @@ static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush)
3569 + if (noflush && !list_empty(&ctx->cq_overflow_list))
3570 + return -1U;
3571 +
3572 +- io_cqring_overflow_flush(ctx, false);
3573 ++ io_cqring_overflow_flush(ctx, false, NULL, NULL);
3574 + }
3575 +
3576 + /* See comment at the top of this file */
3577 +@@ -1738,7 +1782,7 @@ static inline bool io_req_multi_free(struct req_batch *rb, struct io_kiocb *req)
3578 + if ((req->flags & REQ_F_LINK_HEAD) || io_is_fallback_req(req))
3579 + return false;
3580 +
3581 +- if (req->file || req->io)
3582 ++ if (req->file || req->io || req->task)
3583 + rb->need_iter++;
3584 +
3585 + rb->reqs[rb->to_free++] = req;
3586 +@@ -1762,6 +1806,12 @@ static int io_put_kbuf(struct io_kiocb *req)
3587 +
3588 + static inline bool io_run_task_work(void)
3589 + {
3590 ++ /*
3591 ++ * Not safe to run on exiting task, and the task_work handling will
3592 ++ * not add work to such a task.
3593 ++ */
3594 ++ if (unlikely(current->flags & PF_EXITING))
3595 ++ return false;
3596 + if (current->task_works) {
3597 + __set_current_state(TASK_RUNNING);
3598 + task_work_run();
3599 +@@ -3492,8 +3542,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
3600 + return -EBADF;
3601 +
3602 + req->close.fd = READ_ONCE(sqe->fd);
3603 +- if ((req->file && req->file->f_op == &io_uring_fops) ||
3604 +- req->close.fd == req->ctx->ring_fd)
3605 ++ if ((req->file && req->file->f_op == &io_uring_fops))
3606 + return -EBADF;
3607 +
3608 + req->close.put_file = NULL;
3609 +@@ -4397,9 +4446,10 @@ static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx,
3610 + {
3611 + if (io_op_defs[req->opcode].needs_mm && !current->mm) {
3612 + if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) ||
3613 +- !mmget_not_zero(ctx->sqo_mm)))
3614 ++ !ctx->sqo_task->mm ||
3615 ++ !mmget_not_zero(ctx->sqo_task->mm)))
3616 + return -EFAULT;
3617 +- kthread_use_mm(ctx->sqo_mm);
3618 ++ kthread_use_mm(ctx->sqo_task->mm);
3619 + }
3620 +
3621 + return 0;
3622 +@@ -4550,7 +4600,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
3623 + if (req->flags & REQ_F_WORK_INITIALIZED)
3624 + memcpy(&apoll->work, &req->work, sizeof(req->work));
3625 +
3626 +- io_get_req_task(req);
3627 + req->apoll = apoll;
3628 + INIT_HLIST_NODE(&req->hash_node);
3629 +
3630 +@@ -4635,7 +4684,10 @@ static bool io_poll_remove_one(struct io_kiocb *req)
3631 + return do_complete;
3632 + }
3633 +
3634 +-static void io_poll_remove_all(struct io_ring_ctx *ctx)
3635 ++/*
3636 ++ * Returns true if we found and killed one or more poll requests
3637 ++ */
3638 ++static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk)
3639 + {
3640 + struct hlist_node *tmp;
3641 + struct io_kiocb *req;
3642 +@@ -4646,13 +4698,17 @@ static void io_poll_remove_all(struct io_ring_ctx *ctx)
3643 + struct hlist_head *list;
3644 +
3645 + list = &ctx->cancel_hash[i];
3646 +- hlist_for_each_entry_safe(req, tmp, list, hash_node)
3647 +- posted += io_poll_remove_one(req);
3648 ++ hlist_for_each_entry_safe(req, tmp, list, hash_node) {
3649 ++ if (io_task_match(req, tsk))
3650 ++ posted += io_poll_remove_one(req);
3651 ++ }
3652 + }
3653 + spin_unlock_irq(&ctx->completion_lock);
3654 +
3655 + if (posted)
3656 + io_cqring_ev_posted(ctx);
3657 ++
3658 ++ return posted != 0;
3659 + }
3660 +
3661 + static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
3662 +@@ -4738,8 +4794,6 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
3663 +
3664 + events = READ_ONCE(sqe->poll_events);
3665 + poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
3666 +-
3667 +- io_get_req_task(req);
3668 + return 0;
3669 + }
3670 +
3671 +@@ -5626,32 +5680,20 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
3672 +
3673 + static int io_grab_files(struct io_kiocb *req)
3674 + {
3675 +- int ret = -EBADF;
3676 + struct io_ring_ctx *ctx = req->ctx;
3677 +
3678 + if (req->work.files || (req->flags & REQ_F_NO_FILE_TABLE))
3679 + return 0;
3680 +- if (!ctx->ring_file)
3681 +- return -EBADF;
3682 +
3683 +- rcu_read_lock();
3684 ++ req->work.files = get_files_struct(current);
3685 ++ get_nsproxy(current->nsproxy);
3686 ++ req->work.nsproxy = current->nsproxy;
3687 ++ req->flags |= REQ_F_INFLIGHT;
3688 ++
3689 + spin_lock_irq(&ctx->inflight_lock);
3690 +- /*
3691 +- * We use the f_ops->flush() handler to ensure that we can flush
3692 +- * out work accessing these files if the fd is closed. Check if
3693 +- * the fd has changed since we started down this path, and disallow
3694 +- * this operation if it has.
3695 +- */
3696 +- if (fcheck(ctx->ring_fd) == ctx->ring_file) {
3697 +- list_add(&req->inflight_entry, &ctx->inflight_list);
3698 +- req->flags |= REQ_F_INFLIGHT;
3699 +- req->work.files = current->files;
3700 +- ret = 0;
3701 +- }
3702 ++ list_add(&req->inflight_entry, &ctx->inflight_list);
3703 + spin_unlock_irq(&ctx->inflight_lock);
3704 +- rcu_read_unlock();
3705 +-
3706 +- return ret;
3707 ++ return 0;
3708 + }
3709 +
3710 + static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
3711 +@@ -6021,6 +6063,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
3712 + /* one is dropped after submission, the other at completion */
3713 + refcount_set(&req->refs, 2);
3714 + req->task = current;
3715 ++ get_task_struct(req->task);
3716 ++ atomic_long_inc(&req->task->io_uring->req_issue);
3717 + req->result = 0;
3718 +
3719 + if (unlikely(req->opcode >= IORING_OP_LAST))
3720 +@@ -6056,8 +6100,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
3721 + return io_req_set_file(state, req, READ_ONCE(sqe->fd));
3722 + }
3723 +
3724 +-static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
3725 +- struct file *ring_file, int ring_fd)
3726 ++static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
3727 + {
3728 + struct io_submit_state state, *statep = NULL;
3729 + struct io_kiocb *link = NULL;
3730 +@@ -6066,7 +6109,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
3731 + /* if we have a backlog and couldn't flush it all, return BUSY */
3732 + if (test_bit(0, &ctx->sq_check_overflow)) {
3733 + if (!list_empty(&ctx->cq_overflow_list) &&
3734 +- !io_cqring_overflow_flush(ctx, false))
3735 ++ !io_cqring_overflow_flush(ctx, false, NULL, NULL))
3736 + return -EBUSY;
3737 + }
3738 +
3739 +@@ -6081,9 +6124,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
3740 + statep = &state;
3741 + }
3742 +
3743 +- ctx->ring_fd = ring_fd;
3744 +- ctx->ring_file = ring_file;
3745 +-
3746 + for (i = 0; i < nr; i++) {
3747 + const struct io_uring_sqe *sqe;
3748 + struct io_kiocb *req;
3749 +@@ -6244,7 +6284,7 @@ static int io_sq_thread(void *data)
3750 +
3751 + mutex_lock(&ctx->uring_lock);
3752 + if (likely(!percpu_ref_is_dying(&ctx->refs)))
3753 +- ret = io_submit_sqes(ctx, to_submit, NULL, -1);
3754 ++ ret = io_submit_sqes(ctx, to_submit);
3755 + mutex_unlock(&ctx->uring_lock);
3756 + timeout = jiffies + ctx->sq_thread_idle;
3757 + }
3758 +@@ -7073,14 +7113,38 @@ out_fput:
3759 + return ret;
3760 + }
3761 +
3762 ++static int io_uring_alloc_task_context(struct task_struct *task)
3763 ++{
3764 ++ struct io_uring_task *tctx;
3765 ++
3766 ++ tctx = kmalloc(sizeof(*tctx), GFP_KERNEL);
3767 ++ if (unlikely(!tctx))
3768 ++ return -ENOMEM;
3769 ++
3770 ++ xa_init(&tctx->xa);
3771 ++ init_waitqueue_head(&tctx->wait);
3772 ++ tctx->last = NULL;
3773 ++ tctx->in_idle = 0;
3774 ++ atomic_long_set(&tctx->req_issue, 0);
3775 ++ atomic_long_set(&tctx->req_complete, 0);
3776 ++ task->io_uring = tctx;
3777 ++ return 0;
3778 ++}
3779 ++
3780 ++void __io_uring_free(struct task_struct *tsk)
3781 ++{
3782 ++ struct io_uring_task *tctx = tsk->io_uring;
3783 ++
3784 ++ WARN_ON_ONCE(!xa_empty(&tctx->xa));
3785 ++ kfree(tctx);
3786 ++ tsk->io_uring = NULL;
3787 ++}
3788 ++
3789 + static int io_sq_offload_start(struct io_ring_ctx *ctx,
3790 + struct io_uring_params *p)
3791 + {
3792 + int ret;
3793 +
3794 +- mmgrab(current->mm);
3795 +- ctx->sqo_mm = current->mm;
3796 +-
3797 + if (ctx->flags & IORING_SETUP_SQPOLL) {
3798 + ret = -EPERM;
3799 + if (!capable(CAP_SYS_ADMIN))
3800 +@@ -7111,6 +7175,9 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
3801 + ctx->sqo_thread = NULL;
3802 + goto err;
3803 + }
3804 ++ ret = io_uring_alloc_task_context(ctx->sqo_thread);
3805 ++ if (ret)
3806 ++ goto err;
3807 + wake_up_process(ctx->sqo_thread);
3808 + } else if (p->flags & IORING_SETUP_SQ_AFF) {
3809 + /* Can't have SQ_AFF without SQPOLL */
3810 +@@ -7125,8 +7192,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
3811 + return 0;
3812 + err:
3813 + io_finish_async(ctx);
3814 +- mmdrop(ctx->sqo_mm);
3815 +- ctx->sqo_mm = NULL;
3816 + return ret;
3817 + }
3818 +
3819 +@@ -7456,8 +7521,12 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
3820 + static void io_ring_ctx_free(struct io_ring_ctx *ctx)
3821 + {
3822 + io_finish_async(ctx);
3823 +- if (ctx->sqo_mm)
3824 +- mmdrop(ctx->sqo_mm);
3825 ++ if (ctx->sqo_task) {
3826 ++ put_task_struct(ctx->sqo_task);
3827 ++ ctx->sqo_task = NULL;
3828 ++ mmdrop(ctx->mm_account);
3829 ++ ctx->mm_account = NULL;
3830 ++ }
3831 +
3832 + io_iopoll_reap_events(ctx);
3833 + io_sqe_buffer_unregister(ctx);
3834 +@@ -7528,7 +7597,7 @@ static void io_ring_exit_work(struct work_struct *work)
3835 +
3836 + ctx = container_of(work, struct io_ring_ctx, exit_work);
3837 + if (ctx->rings)
3838 +- io_cqring_overflow_flush(ctx, true);
3839 ++ io_cqring_overflow_flush(ctx, true, NULL, NULL);
3840 +
3841 + /*
3842 + * If we're doing polled IO and end up having requests being
3843 +@@ -7539,7 +7608,7 @@ static void io_ring_exit_work(struct work_struct *work)
3844 + while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)) {
3845 + io_iopoll_reap_events(ctx);
3846 + if (ctx->rings)
3847 +- io_cqring_overflow_flush(ctx, true);
3848 ++ io_cqring_overflow_flush(ctx, true, NULL, NULL);
3849 + }
3850 + io_ring_ctx_free(ctx);
3851 + }
3852 +@@ -7550,8 +7619,8 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
3853 + percpu_ref_kill(&ctx->refs);
3854 + mutex_unlock(&ctx->uring_lock);
3855 +
3856 +- io_kill_timeouts(ctx);
3857 +- io_poll_remove_all(ctx);
3858 ++ io_kill_timeouts(ctx, NULL);
3859 ++ io_poll_remove_all(ctx, NULL);
3860 +
3861 + if (ctx->io_wq)
3862 + io_wq_cancel_all(ctx->io_wq);
3863 +@@ -7559,7 +7628,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
3864 + io_iopoll_reap_events(ctx);
3865 + /* if we failed setting up the ctx, we might not have any rings */
3866 + if (ctx->rings)
3867 +- io_cqring_overflow_flush(ctx, true);
3868 ++ io_cqring_overflow_flush(ctx, true, NULL, NULL);
3869 + idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
3870 +
3871 + /*
3872 +@@ -7588,7 +7657,7 @@ static bool io_wq_files_match(struct io_wq_work *work, void *data)
3873 + {
3874 + struct files_struct *files = data;
3875 +
3876 +- return work->files == files;
3877 ++ return !files || work->files == files;
3878 + }
3879 +
3880 + /*
3881 +@@ -7609,12 +7678,6 @@ static bool io_match_link(struct io_kiocb *preq, struct io_kiocb *req)
3882 + return false;
3883 + }
3884 +
3885 +-static inline bool io_match_files(struct io_kiocb *req,
3886 +- struct files_struct *files)
3887 +-{
3888 +- return (req->flags & REQ_F_WORK_INITIALIZED) && req->work.files == files;
3889 +-}
3890 +-
3891 + static bool io_match_link_files(struct io_kiocb *req,
3892 + struct files_struct *files)
3893 + {
3894 +@@ -7729,11 +7792,14 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx,
3895 + }
3896 + }
3897 +
3898 +-static void io_uring_cancel_files(struct io_ring_ctx *ctx,
3899 ++/*
3900 ++ * Returns true if we found and killed one or more files pinning requests
3901 ++ */
3902 ++static bool io_uring_cancel_files(struct io_ring_ctx *ctx,
3903 + struct files_struct *files)
3904 + {
3905 + if (list_empty_careful(&ctx->inflight_list))
3906 +- return;
3907 ++ return false;
3908 +
3909 + io_cancel_defer_files(ctx, files);
3910 + /* cancel all at once, should be faster than doing it one by one*/
3911 +@@ -7745,7 +7811,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
3912 +
3913 + spin_lock_irq(&ctx->inflight_lock);
3914 + list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
3915 +- if (req->work.files != files)
3916 ++ if (files && req->work.files != files)
3917 + continue;
3918 + /* req is being completed, ignore */
3919 + if (!refcount_inc_not_zero(&req->refs))
3920 +@@ -7791,9 +7857,13 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
3921 + io_put_req(cancel_req);
3922 + }
3923 +
3924 ++ /* cancellations _may_ trigger task work */
3925 ++ io_run_task_work();
3926 + schedule();
3927 + finish_wait(&ctx->inflight_wait, &wait);
3928 + }
3929 ++
3930 ++ return true;
3931 + }
3932 +
3933 + static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
3934 +@@ -7801,21 +7871,198 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
3935 + struct io_kiocb *req = container_of(work, struct io_kiocb, work);
3936 + struct task_struct *task = data;
3937 +
3938 +- return req->task == task;
3939 ++ return io_task_match(req, task);
3940 ++}
3941 ++
3942 ++static bool __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
3943 ++ struct task_struct *task,
3944 ++ struct files_struct *files)
3945 ++{
3946 ++ bool ret;
3947 ++
3948 ++ ret = io_uring_cancel_files(ctx, files);
3949 ++ if (!files) {
3950 ++ enum io_wq_cancel cret;
3951 ++
3952 ++ cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, task, true);
3953 ++ if (cret != IO_WQ_CANCEL_NOTFOUND)
3954 ++ ret = true;
3955 ++
3956 ++ /* SQPOLL thread does its own polling */
3957 ++ if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
3958 ++ if (!list_empty_careful(&ctx->poll_list)) {
3959 ++ io_iopoll_reap_events(ctx);
3960 ++ ret = true;
3961 ++ }
3962 ++ }
3963 ++
3964 ++ ret |= io_poll_remove_all(ctx, task);
3965 ++ ret |= io_kill_timeouts(ctx, task);
3966 ++ }
3967 ++
3968 ++ return ret;
3969 ++}
3970 ++
3971 ++/*
3972 ++ * We need to iteratively cancel requests, in case a request has dependent
3973 ++ * hard links. These persist even for failure of cancelations, hence keep
3974 ++ * looping until none are found.
3975 ++ */
3976 ++static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
3977 ++ struct files_struct *files)
3978 ++{
3979 ++ struct task_struct *task = current;
3980 ++
3981 ++ if (ctx->flags & IORING_SETUP_SQPOLL)
3982 ++ task = ctx->sqo_thread;
3983 ++
3984 ++ io_cqring_overflow_flush(ctx, true, task, files);
3985 ++
3986 ++ while (__io_uring_cancel_task_requests(ctx, task, files)) {
3987 ++ io_run_task_work();
3988 ++ cond_resched();
3989 ++ }
3990 ++}
3991 ++
3992 ++/*
3993 ++ * Note that this task has used io_uring. We use it for cancelation purposes.
3994 ++ */
3995 ++static int io_uring_add_task_file(struct file *file)
3996 ++{
3997 ++ struct io_uring_task *tctx = current->io_uring;
3998 ++
3999 ++ if (unlikely(!tctx)) {
4000 ++ int ret;
4001 ++
4002 ++ ret = io_uring_alloc_task_context(current);
4003 ++ if (unlikely(ret))
4004 ++ return ret;
4005 ++ tctx = current->io_uring;
4006 ++ }
4007 ++ if (tctx->last != file) {
4008 ++ void *old = xa_load(&tctx->xa, (unsigned long)file);
4009 ++
4010 ++ if (!old) {
4011 ++ get_file(file);
4012 ++ xa_store(&tctx->xa, (unsigned long)file, file, GFP_KERNEL);
4013 ++ }
4014 ++ tctx->last = file;
4015 ++ }
4016 ++
4017 ++ return 0;
4018 ++}
4019 ++
4020 ++/*
4021 ++ * Remove this io_uring_file -> task mapping.
4022 ++ */
4023 ++static void io_uring_del_task_file(struct file *file)
4024 ++{
4025 ++ struct io_uring_task *tctx = current->io_uring;
4026 ++
4027 ++ if (tctx->last == file)
4028 ++ tctx->last = NULL;
4029 ++ file = xa_erase(&tctx->xa, (unsigned long)file);
4030 ++ if (file)
4031 ++ fput(file);
4032 ++}
4033 ++
4034 ++static void __io_uring_attempt_task_drop(struct file *file)
4035 ++{
4036 ++ struct file *old = xa_load(&current->io_uring->xa, (unsigned long)file);
4037 ++
4038 ++ if (old == file)
4039 ++ io_uring_del_task_file(file);
4040 ++}
4041 ++
4042 ++/*
4043 ++ * Drop task note for this file if we're the only ones that hold it after
4044 ++ * pending fput()
4045 ++ */
4046 ++static void io_uring_attempt_task_drop(struct file *file, bool exiting)
4047 ++{
4048 ++ if (!current->io_uring)
4049 ++ return;
4050 ++ /*
4051 ++ * fput() is pending, will be 2 if the only other ref is our potential
4052 ++ * task file note. If the task is exiting, drop regardless of count.
4053 ++ */
4054 ++ if (!exiting && atomic_long_read(&file->f_count) != 2)
4055 ++ return;
4056 ++
4057 ++ __io_uring_attempt_task_drop(file);
4058 ++}
4059 ++
4060 ++void __io_uring_files_cancel(struct files_struct *files)
4061 ++{
4062 ++ struct io_uring_task *tctx = current->io_uring;
4063 ++ struct file *file;
4064 ++ unsigned long index;
4065 ++
4066 ++ /* make sure overflow events are dropped */
4067 ++ tctx->in_idle = true;
4068 ++
4069 ++ xa_for_each(&tctx->xa, index, file) {
4070 ++ struct io_ring_ctx *ctx = file->private_data;
4071 ++
4072 ++ io_uring_cancel_task_requests(ctx, files);
4073 ++ if (files)
4074 ++ io_uring_del_task_file(file);
4075 ++ }
4076 ++}
4077 ++
4078 ++static inline bool io_uring_task_idle(struct io_uring_task *tctx)
4079 ++{
4080 ++ return atomic_long_read(&tctx->req_issue) ==
4081 ++ atomic_long_read(&tctx->req_complete);
4082 ++}
4083 ++
4084 ++/*
4085 ++ * Find any io_uring fd that this task has registered or done IO on, and cancel
4086 ++ * requests.
4087 ++ */
4088 ++void __io_uring_task_cancel(void)
4089 ++{
4090 ++ struct io_uring_task *tctx = current->io_uring;
4091 ++ DEFINE_WAIT(wait);
4092 ++ long completions;
4093 ++
4094 ++ /* make sure overflow events are dropped */
4095 ++ tctx->in_idle = true;
4096 ++
4097 ++ while (!io_uring_task_idle(tctx)) {
4098 ++ /* read completions before cancelations */
4099 ++ completions = atomic_long_read(&tctx->req_complete);
4100 ++ __io_uring_files_cancel(NULL);
4101 ++
4102 ++ prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
4103 ++
4104 ++ /*
4105 ++ * If we've seen completions, retry. This avoids a race where
4106 ++ * a completion comes in before we did prepare_to_wait().
4107 ++ */
4108 ++ if (completions != atomic_long_read(&tctx->req_complete))
4109 ++ continue;
4110 ++ if (io_uring_task_idle(tctx))
4111 ++ break;
4112 ++ schedule();
4113 ++ }
4114 ++
4115 ++ finish_wait(&tctx->wait, &wait);
4116 ++ tctx->in_idle = false;
4117 + }
4118 +
4119 + static int io_uring_flush(struct file *file, void *data)
4120 + {
4121 + struct io_ring_ctx *ctx = file->private_data;
4122 +
4123 +- io_uring_cancel_files(ctx, data);
4124 +-
4125 + /*
4126 + * If the task is going away, cancel work it may have pending
4127 + */
4128 + if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
4129 +- io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, current, true);
4130 ++ data = NULL;
4131 +
4132 ++ io_uring_cancel_task_requests(ctx, data);
4133 ++ io_uring_attempt_task_drop(file, !data);
4134 + return 0;
4135 + }
4136 +
4137 +@@ -7924,13 +8171,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
4138 + ret = 0;
4139 + if (ctx->flags & IORING_SETUP_SQPOLL) {
4140 + if (!list_empty_careful(&ctx->cq_overflow_list))
4141 +- io_cqring_overflow_flush(ctx, false);
4142 ++ io_cqring_overflow_flush(ctx, false, NULL, NULL);
4143 + if (flags & IORING_ENTER_SQ_WAKEUP)
4144 + wake_up(&ctx->sqo_wait);
4145 + submitted = to_submit;
4146 + } else if (to_submit) {
4147 ++ ret = io_uring_add_task_file(f.file);
4148 ++ if (unlikely(ret))
4149 ++ goto out;
4150 + mutex_lock(&ctx->uring_lock);
4151 +- submitted = io_submit_sqes(ctx, to_submit, f.file, fd);
4152 ++ submitted = io_submit_sqes(ctx, to_submit);
4153 + mutex_unlock(&ctx->uring_lock);
4154 +
4155 + if (submitted != to_submit)
4156 +@@ -8142,6 +8392,7 @@ static int io_uring_get_fd(struct io_ring_ctx *ctx)
4157 + file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
4158 + O_RDWR | O_CLOEXEC);
4159 + if (IS_ERR(file)) {
4160 ++err_fd:
4161 + put_unused_fd(ret);
4162 + ret = PTR_ERR(file);
4163 + goto err;
4164 +@@ -8150,6 +8401,10 @@ static int io_uring_get_fd(struct io_ring_ctx *ctx)
4165 + #if defined(CONFIG_UNIX)
4166 + ctx->ring_sock->file = file;
4167 + #endif
4168 ++ if (unlikely(io_uring_add_task_file(file))) {
4169 ++ file = ERR_PTR(-ENOMEM);
4170 ++ goto err_fd;
4171 ++ }
4172 + fd_install(ret, file);
4173 + return ret;
4174 + err:
4175 +@@ -8228,6 +8483,16 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
4176 + ctx->user = user;
4177 + ctx->creds = get_current_cred();
4178 +
4179 ++ ctx->sqo_task = get_task_struct(current);
4180 ++ /*
4181 ++ * This is just grabbed for accounting purposes. When a process exits,
4182 ++ * the mm is exited and dropped before the files, hence we need to hang
4183 ++ * on to this mm purely for the purposes of being able to unaccount
4184 ++ * memory (locked/pinned vm). It's not used for anything else.
4185 ++ */
4186 ++ mmgrab(current->mm);
4187 ++ ctx->mm_account = current->mm;
4188 ++
4189 + ret = io_allocate_scq_urings(ctx, p);
4190 + if (ret)
4191 + goto err;
4192 +diff --git a/include/linux/fs.h b/include/linux/fs.h
4193 +index ac1e89872db4f..819245cc9dbd4 100644
4194 +--- a/include/linux/fs.h
4195 ++++ b/include/linux/fs.h
4196 +@@ -3011,7 +3011,6 @@ extern int do_pipe_flags(int *, int);
4197 + id(UNKNOWN, unknown) \
4198 + id(FIRMWARE, firmware) \
4199 + id(FIRMWARE_PREALLOC_BUFFER, firmware) \
4200 +- id(FIRMWARE_EFI_EMBEDDED, firmware) \
4201 + id(MODULE, kernel-module) \
4202 + id(KEXEC_IMAGE, kexec-image) \
4203 + id(KEXEC_INITRAMFS, kexec-initramfs) \
4204 +diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
4205 +new file mode 100644
4206 +index 0000000000000..c09135a1ef132
4207 +--- /dev/null
4208 ++++ b/include/linux/io_uring.h
4209 +@@ -0,0 +1,53 @@
4210 ++/* SPDX-License-Identifier: GPL-2.0-or-later */
4211 ++#ifndef _LINUX_IO_URING_H
4212 ++#define _LINUX_IO_URING_H
4213 ++
4214 ++#include <linux/sched.h>
4215 ++#include <linux/xarray.h>
4216 ++#include <linux/percpu-refcount.h>
4217 ++
4218 ++struct io_uring_task {
4219 ++ /* submission side */
4220 ++ struct xarray xa;
4221 ++ struct wait_queue_head wait;
4222 ++ struct file *last;
4223 ++ atomic_long_t req_issue;
4224 ++
4225 ++ /* completion side */
4226 ++ bool in_idle ____cacheline_aligned_in_smp;
4227 ++ atomic_long_t req_complete;
4228 ++};
4229 ++
4230 ++#if defined(CONFIG_IO_URING)
4231 ++void __io_uring_task_cancel(void);
4232 ++void __io_uring_files_cancel(struct files_struct *files);
4233 ++void __io_uring_free(struct task_struct *tsk);
4234 ++
4235 ++static inline void io_uring_task_cancel(void)
4236 ++{
4237 ++ if (current->io_uring && !xa_empty(&current->io_uring->xa))
4238 ++ __io_uring_task_cancel();
4239 ++}
4240 ++static inline void io_uring_files_cancel(struct files_struct *files)
4241 ++{
4242 ++ if (current->io_uring && !xa_empty(&current->io_uring->xa))
4243 ++ __io_uring_files_cancel(files);
4244 ++}
4245 ++static inline void io_uring_free(struct task_struct *tsk)
4246 ++{
4247 ++ if (tsk->io_uring)
4248 ++ __io_uring_free(tsk);
4249 ++}
4250 ++#else
4251 ++static inline void io_uring_task_cancel(void)
4252 ++{
4253 ++}
4254 ++static inline void io_uring_files_cancel(struct files_struct *files)
4255 ++{
4256 ++}
4257 ++static inline void io_uring_free(struct task_struct *tsk)
4258 ++{
4259 ++}
4260 ++#endif
4261 ++
4262 ++#endif
4263 +diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h
4264 +index 122f3439e1af2..c65d7a3be3c69 100644
4265 +--- a/include/linux/mtd/pfow.h
4266 ++++ b/include/linux/mtd/pfow.h
4267 +@@ -128,7 +128,7 @@ static inline void print_drs_error(unsigned dsr)
4268 +
4269 + if (!(dsr & DSR_AVAILABLE))
4270 + printk(KERN_NOTICE"DSR.15: (0) Device not Available\n");
4271 +- if (prog_status & 0x03)
4272 ++ if ((prog_status & 0x03) == 0x03)
4273 + printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid "
4274 + "half with 41h command\n");
4275 + else if (prog_status & 0x02)
4276 +diff --git a/include/linux/pm.h b/include/linux/pm.h
4277 +index 121c104a4090e..1010bf3d3008b 100644
4278 +--- a/include/linux/pm.h
4279 ++++ b/include/linux/pm.h
4280 +@@ -584,7 +584,7 @@ struct dev_pm_info {
4281 + #endif
4282 + #ifdef CONFIG_PM
4283 + struct hrtimer suspend_timer;
4284 +- unsigned long timer_expires;
4285 ++ u64 timer_expires;
4286 + struct work_struct work;
4287 + wait_queue_head_t wait_queue;
4288 + struct wake_irq *wakeirq;
4289 +diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h
4290 +index dd464943f717a..5b90eff50bf6e 100644
4291 +--- a/include/linux/qcom-geni-se.h
4292 ++++ b/include/linux/qcom-geni-se.h
4293 +@@ -229,6 +229,9 @@ struct geni_se {
4294 + #define GENI_SE_VERSION_MINOR(ver) ((ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT)
4295 + #define GENI_SE_VERSION_STEP(ver) (ver & HW_VER_STEP_MASK)
4296 +
4297 ++/* QUP SE VERSION value for major number 2 and minor number 5 */
4298 ++#define QUP_SE_VERSION_2_5 0x20050000
4299 ++
4300 + #if IS_ENABLED(CONFIG_QCOM_GENI_SE)
4301 +
4302 + u32 geni_se_get_qup_hw_version(struct geni_se *se);
4303 +diff --git a/include/linux/sched.h b/include/linux/sched.h
4304 +index 6833729430932..f0f38e86ab1ee 100644
4305 +--- a/include/linux/sched.h
4306 ++++ b/include/linux/sched.h
4307 +@@ -61,6 +61,7 @@ struct sighand_struct;
4308 + struct signal_struct;
4309 + struct task_delay_info;
4310 + struct task_group;
4311 ++struct io_uring_task;
4312 +
4313 + /*
4314 + * Task state bitmask. NOTE! These bits are also
4315 +@@ -923,6 +924,10 @@ struct task_struct {
4316 + /* Open file information: */
4317 + struct files_struct *files;
4318 +
4319 ++#ifdef CONFIG_IO_URING
4320 ++ struct io_uring_task *io_uring;
4321 ++#endif
4322 ++
4323 + /* Namespaces: */
4324 + struct nsproxy *nsproxy;
4325 +
4326 +diff --git a/include/linux/string.h b/include/linux/string.h
4327 +index 9b7a0632e87aa..b1f3894a0a3e4 100644
4328 +--- a/include/linux/string.h
4329 ++++ b/include/linux/string.h
4330 +@@ -161,20 +161,13 @@ extern int bcmp(const void *,const void *,__kernel_size_t);
4331 + #ifndef __HAVE_ARCH_MEMCHR
4332 + extern void * memchr(const void *,int,__kernel_size_t);
4333 + #endif
4334 +-#ifndef __HAVE_ARCH_MEMCPY_MCSAFE
4335 +-static inline __must_check unsigned long memcpy_mcsafe(void *dst,
4336 +- const void *src, size_t cnt)
4337 +-{
4338 +- memcpy(dst, src, cnt);
4339 +- return 0;
4340 +-}
4341 +-#endif
4342 + #ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE
4343 + static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt)
4344 + {
4345 + memcpy(dst, src, cnt);
4346 + }
4347 + #endif
4348 ++
4349 + void *memchr_inv(const void *s, int c, size_t n);
4350 + char *strreplace(char *s, char old, char new);
4351 +
4352 +diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
4353 +index 0a76ddc07d597..1ef3ab2343aa4 100644
4354 +--- a/include/linux/uaccess.h
4355 ++++ b/include/linux/uaccess.h
4356 +@@ -163,6 +163,19 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n)
4357 + }
4358 + #endif
4359 +
4360 ++#ifndef copy_mc_to_kernel
4361 ++/*
4362 ++ * Without arch opt-in this generic copy_mc_to_kernel() will not handle
4363 ++ * #MC (or arch equivalent) during source read.
4364 ++ */
4365 ++static inline unsigned long __must_check
4366 ++copy_mc_to_kernel(void *dst, const void *src, size_t cnt)
4367 ++{
4368 ++ memcpy(dst, src, cnt);
4369 ++ return 0;
4370 ++}
4371 ++#endif
4372 ++
4373 + static __always_inline void pagefault_disabled_inc(void)
4374 + {
4375 + current->pagefault_disabled++;
4376 +diff --git a/include/linux/uio.h b/include/linux/uio.h
4377 +index 9576fd8158d7d..6a97b4d10b2ed 100644
4378 +--- a/include/linux/uio.h
4379 ++++ b/include/linux/uio.h
4380 +@@ -186,10 +186,10 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
4381 + #define _copy_from_iter_flushcache _copy_from_iter_nocache
4382 + #endif
4383 +
4384 +-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
4385 +-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i);
4386 ++#ifdef CONFIG_ARCH_HAS_COPY_MC
4387 ++size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
4388 + #else
4389 +-#define _copy_to_iter_mcsafe _copy_to_iter
4390 ++#define _copy_mc_to_iter _copy_to_iter
4391 + #endif
4392 +
4393 + static __always_inline __must_check
4394 +@@ -202,12 +202,12 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
4395 + }
4396 +
4397 + static __always_inline __must_check
4398 +-size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
4399 ++size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
4400 + {
4401 + if (unlikely(!check_copy_size(addr, bytes, true)))
4402 + return 0;
4403 + else
4404 +- return _copy_to_iter_mcsafe(addr, bytes, i);
4405 ++ return _copy_mc_to_iter(addr, bytes, i);
4406 + }
4407 +
4408 + size_t iov_iter_zero(size_t bytes, struct iov_iter *);
4409 +diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
4410 +index ec2cbfab71f35..f09541cba3c9d 100644
4411 +--- a/include/net/netfilter/nf_tables.h
4412 ++++ b/include/net/netfilter/nf_tables.h
4413 +@@ -896,6 +896,12 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule)
4414 + return (struct nft_expr *)&rule->data[rule->dlen];
4415 + }
4416 +
4417 ++static inline bool nft_expr_more(const struct nft_rule *rule,
4418 ++ const struct nft_expr *expr)
4419 ++{
4420 ++ return expr != nft_expr_last(rule) && expr->ops;
4421 ++}
4422 ++
4423 + static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
4424 + {
4425 + return (void *)&rule->data[rule->dlen];
4426 +diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
4427 +index a3fd55194e0b1..7bffadcfd6eb0 100644
4428 +--- a/include/uapi/linux/bpf.h
4429 ++++ b/include/uapi/linux/bpf.h
4430 +@@ -1416,8 +1416,8 @@ union bpf_attr {
4431 + * Return
4432 + * The return value depends on the result of the test, and can be:
4433 + *
4434 +- * * 0, if the *skb* task belongs to the cgroup2.
4435 +- * * 1, if the *skb* task does not belong to the cgroup2.
4436 ++ * * 0, if current task belongs to the cgroup2.
4437 ++ * * 1, if current task does not belong to the cgroup2.
4438 + * * A negative error code, if an error occurred.
4439 + *
4440 + * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
4441 +diff --git a/init/init_task.c b/init/init_task.c
4442 +index 15089d15010ab..7802f91109b48 100644
4443 +--- a/init/init_task.c
4444 ++++ b/init/init_task.c
4445 +@@ -113,6 +113,9 @@ struct task_struct init_task
4446 + .thread = INIT_THREAD,
4447 + .fs = &init_fs,
4448 + .files = &init_files,
4449 ++#ifdef CONFIG_IO_URING
4450 ++ .io_uring = NULL,
4451 ++#endif
4452 + .signal = &init_signals,
4453 + .sighand = &init_sighand,
4454 + .nsproxy = &init_nsproxy,
4455 +diff --git a/kernel/fork.c b/kernel/fork.c
4456 +index 0074bbe8c66f1..c725015b3c465 100644
4457 +--- a/kernel/fork.c
4458 ++++ b/kernel/fork.c
4459 +@@ -95,6 +95,7 @@
4460 + #include <linux/stackleak.h>
4461 + #include <linux/kasan.h>
4462 + #include <linux/scs.h>
4463 ++#include <linux/io_uring.h>
4464 +
4465 + #include <asm/pgalloc.h>
4466 + #include <linux/uaccess.h>
4467 +@@ -745,6 +746,7 @@ void __put_task_struct(struct task_struct *tsk)
4468 + WARN_ON(refcount_read(&tsk->usage));
4469 + WARN_ON(tsk == current);
4470 +
4471 ++ io_uring_free(tsk);
4472 + cgroup_free(tsk);
4473 + task_numa_free(tsk, true);
4474 + security_task_free(tsk);
4475 +@@ -2022,6 +2024,10 @@ static __latent_entropy struct task_struct *copy_process(
4476 + p->vtime.state = VTIME_INACTIVE;
4477 + #endif
4478 +
4479 ++#ifdef CONFIG_IO_URING
4480 ++ p->io_uring = NULL;
4481 ++#endif
4482 ++
4483 + #if defined(SPLIT_RSS_COUNTING)
4484 + memset(&p->rss_stat, 0, sizeof(p->rss_stat));
4485 + #endif
4486 +diff --git a/lib/Kconfig b/lib/Kconfig
4487 +index df3f3da959900..7761458649377 100644
4488 +--- a/lib/Kconfig
4489 ++++ b/lib/Kconfig
4490 +@@ -631,7 +631,12 @@ config UACCESS_MEMCPY
4491 + config ARCH_HAS_UACCESS_FLUSHCACHE
4492 + bool
4493 +
4494 +-config ARCH_HAS_UACCESS_MCSAFE
4495 ++# arch has a concept of a recoverable synchronous exception due to a
4496 ++# memory-read error like x86 machine-check or ARM data-abort, and
4497 ++# implements copy_mc_to_{user,kernel} to abort and report
4498 ++# 'bytes-transferred' if that exception fires when accessing the source
4499 ++# buffer.
4500 ++config ARCH_HAS_COPY_MC
4501 + bool
4502 +
4503 + # Temporary. Goes away when all archs are cleaned up
4504 +diff --git a/lib/iov_iter.c b/lib/iov_iter.c
4505 +index bf538c2bec777..aefe469905434 100644
4506 +--- a/lib/iov_iter.c
4507 ++++ b/lib/iov_iter.c
4508 +@@ -636,30 +636,30 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
4509 + }
4510 + EXPORT_SYMBOL(_copy_to_iter);
4511 +
4512 +-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
4513 +-static int copyout_mcsafe(void __user *to, const void *from, size_t n)
4514 ++#ifdef CONFIG_ARCH_HAS_COPY_MC
4515 ++static int copyout_mc(void __user *to, const void *from, size_t n)
4516 + {
4517 + if (access_ok(to, n)) {
4518 + instrument_copy_to_user(to, from, n);
4519 +- n = copy_to_user_mcsafe((__force void *) to, from, n);
4520 ++ n = copy_mc_to_user((__force void *) to, from, n);
4521 + }
4522 + return n;
4523 + }
4524 +
4525 +-static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
4526 ++static unsigned long copy_mc_to_page(struct page *page, size_t offset,
4527 + const char *from, size_t len)
4528 + {
4529 + unsigned long ret;
4530 + char *to;
4531 +
4532 + to = kmap_atomic(page);
4533 +- ret = memcpy_mcsafe(to + offset, from, len);
4534 ++ ret = copy_mc_to_kernel(to + offset, from, len);
4535 + kunmap_atomic(to);
4536 +
4537 + return ret;
4538 + }
4539 +
4540 +-static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
4541 ++static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
4542 + struct iov_iter *i)
4543 + {
4544 + struct pipe_inode_info *pipe = i->pipe;
4545 +@@ -677,7 +677,7 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
4546 + size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
4547 + unsigned long rem;
4548 +
4549 +- rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page,
4550 ++ rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
4551 + off, addr, chunk);
4552 + i->head = i_head;
4553 + i->iov_offset = off + chunk - rem;
4554 +@@ -694,18 +694,17 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
4555 + }
4556 +
4557 + /**
4558 +- * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
4559 ++ * _copy_mc_to_iter - copy to iter with source memory error exception handling
4560 + * @addr: source kernel address
4561 + * @bytes: total transfer length
4562 + * @iter: destination iterator
4563 + *
4564 +- * The pmem driver arranges for filesystem-dax to use this facility via
4565 +- * dax_copy_to_iter() for protecting read/write to persistent memory.
4566 +- * Unless / until an architecture can guarantee identical performance
4567 +- * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
4568 +- * performance regression to switch more users to the mcsafe version.
4569 ++ * The pmem driver deploys this for the dax operation
4570 ++ * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
4571 ++ * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
4572 ++ * successfully copied.
4573 + *
4574 +- * Otherwise, the main differences between this and typical _copy_to_iter().
4575 ++ * The main differences between this and typical _copy_to_iter().
4576 + *
4577 + * * Typical tail/residue handling after a fault retries the copy
4578 + * byte-by-byte until the fault happens again. Re-triggering machine
4579 +@@ -716,23 +715,22 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
4580 + * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
4581 + * Compare to copy_to_iter() where only ITER_IOVEC attempts might return
4582 + * a short copy.
4583 +- *
4584 +- * See MCSAFE_TEST for self-test.
4585 + */
4586 +-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
4587 ++size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
4588 + {
4589 + const char *from = addr;
4590 + unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
4591 +
4592 + if (unlikely(iov_iter_is_pipe(i)))
4593 +- return copy_pipe_to_iter_mcsafe(addr, bytes, i);
4594 ++ return copy_mc_pipe_to_iter(addr, bytes, i);
4595 + if (iter_is_iovec(i))
4596 + might_fault();
4597 + iterate_and_advance(i, bytes, v,
4598 +- copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
4599 ++ copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
4600 ++ v.iov_len),
4601 + ({
4602 +- rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
4603 +- (from += v.bv_len) - v.bv_len, v.bv_len);
4604 ++ rem = copy_mc_to_page(v.bv_page, v.bv_offset,
4605 ++ (from += v.bv_len) - v.bv_len, v.bv_len);
4606 + if (rem) {
4607 + curr_addr = (unsigned long) from;
4608 + bytes = curr_addr - s_addr - rem;
4609 +@@ -740,8 +738,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
4610 + }
4611 + }),
4612 + ({
4613 +- rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
4614 +- v.iov_len);
4615 ++ rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
4616 ++ - v.iov_len, v.iov_len);
4617 + if (rem) {
4618 + curr_addr = (unsigned long) from;
4619 + bytes = curr_addr - s_addr - rem;
4620 +@@ -752,8 +750,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
4621 +
4622 + return bytes;
4623 + }
4624 +-EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
4625 +-#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
4626 ++EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
4627 ++#endif /* CONFIG_ARCH_HAS_COPY_MC */
4628 +
4629 + size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
4630 + {
4631 +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
4632 +index 06a8242aa6980..6dd7f44497ecc 100644
4633 +--- a/net/ipv4/tcp.c
4634 ++++ b/net/ipv4/tcp.c
4635 +@@ -483,6 +483,8 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
4636 + return true;
4637 + if (tcp_rmem_pressure(sk))
4638 + return true;
4639 ++ if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss)
4640 ++ return true;
4641 + }
4642 + if (sk->sk_prot->stream_memory_read)
4643 + return sk->sk_prot->stream_memory_read(sk);
4644 +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
4645 +index 02cc972edd0b0..6c7e982169467 100644
4646 +--- a/net/ipv4/tcp_input.c
4647 ++++ b/net/ipv4/tcp_input.c
4648 +@@ -4790,7 +4790,8 @@ void tcp_data_ready(struct sock *sk)
4649 + int avail = tp->rcv_nxt - tp->copied_seq;
4650 +
4651 + if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
4652 +- !sock_flag(sk, SOCK_DONE))
4653 ++ !sock_flag(sk, SOCK_DONE) &&
4654 ++ tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss)
4655 + return;
4656 +
4657 + sk->sk_data_ready(sk);
4658 +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
4659 +index 05059f620d41e..fe51a7df4f524 100644
4660 +--- a/net/netfilter/nf_tables_api.c
4661 ++++ b/net/netfilter/nf_tables_api.c
4662 +@@ -295,7 +295,7 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx,
4663 + struct nft_expr *expr;
4664 +
4665 + expr = nft_expr_first(rule);
4666 +- while (expr != nft_expr_last(rule) && expr->ops) {
4667 ++ while (nft_expr_more(rule, expr)) {
4668 + if (expr->ops->activate)
4669 + expr->ops->activate(ctx, expr);
4670 +
4671 +@@ -310,7 +310,7 @@ static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
4672 + struct nft_expr *expr;
4673 +
4674 + expr = nft_expr_first(rule);
4675 +- while (expr != nft_expr_last(rule) && expr->ops) {
4676 ++ while (nft_expr_more(rule, expr)) {
4677 + if (expr->ops->deactivate)
4678 + expr->ops->deactivate(ctx, expr, phase);
4679 +
4680 +@@ -2917,7 +2917,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
4681 + * is called on error from nf_tables_newrule().
4682 + */
4683 + expr = nft_expr_first(rule);
4684 +- while (expr != nft_expr_last(rule) && expr->ops) {
4685 ++ while (nft_expr_more(rule, expr)) {
4686 + next = nft_expr_next(expr);
4687 + nf_tables_expr_destroy(ctx, expr);
4688 + expr = next;
4689 +diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
4690 +index c7cf1cde46def..ce2387bfb5dce 100644
4691 +--- a/net/netfilter/nf_tables_offload.c
4692 ++++ b/net/netfilter/nf_tables_offload.c
4693 +@@ -37,7 +37,7 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
4694 + struct nft_expr *expr;
4695 +
4696 + expr = nft_expr_first(rule);
4697 +- while (expr->ops && expr != nft_expr_last(rule)) {
4698 ++ while (nft_expr_more(rule, expr)) {
4699 + if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION)
4700 + num_actions++;
4701 +
4702 +@@ -61,7 +61,7 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
4703 + ctx->net = net;
4704 + ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC;
4705 +
4706 +- while (expr->ops && expr != nft_expr_last(rule)) {
4707 ++ while (nft_expr_more(rule, expr)) {
4708 + if (!expr->ops->offload) {
4709 + err = -EOPNOTSUPP;
4710 + goto err_out;
4711 +diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
4712 +index e298ec3b3c9e3..ca026e2bf8d27 100644
4713 +--- a/net/sched/act_mpls.c
4714 ++++ b/net/sched/act_mpls.c
4715 +@@ -408,6 +408,7 @@ static void __exit mpls_cleanup_module(void)
4716 + module_init(mpls_init_module);
4717 + module_exit(mpls_cleanup_module);
4718 +
4719 ++MODULE_SOFTDEP("post: mpls_gso");
4720 + MODULE_AUTHOR("Netronome Systems <oss-drivers@×××××××××.com>");
4721 + MODULE_LICENSE("GPL");
4722 + MODULE_DESCRIPTION("MPLS manipulation actions");
4723 +diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
4724 +index 8bf6bde1cfe59..aa2448253dbab 100644
4725 +--- a/net/sched/cls_api.c
4726 ++++ b/net/sched/cls_api.c
4727 +@@ -650,12 +650,12 @@ static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
4728 + block_cb->indr.binder_type,
4729 + &block->flow_block, tcf_block_shared(block),
4730 + &extack);
4731 ++ rtnl_lock();
4732 + down_write(&block->cb_lock);
4733 + list_del(&block_cb->driver_list);
4734 + list_move(&block_cb->list, &bo.cb_list);
4735 +- up_write(&block->cb_lock);
4736 +- rtnl_lock();
4737 + tcf_block_unbind(block, &bo);
4738 ++ up_write(&block->cb_lock);
4739 + rtnl_unlock();
4740 + }
4741 +
4742 +diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
4743 +index 84f82771cdf5d..0c345e43a09a3 100644
4744 +--- a/net/sched/sch_netem.c
4745 ++++ b/net/sched/sch_netem.c
4746 +@@ -330,7 +330,7 @@ static s64 tabledist(s64 mu, s32 sigma,
4747 +
4748 + /* default uniform distribution */
4749 + if (dist == NULL)
4750 +- return ((rnd % (2 * sigma)) + mu) - sigma;
4751 ++ return ((rnd % (2 * (u32)sigma)) + mu) - sigma;
4752 +
4753 + t = dist->table[rnd % dist->size];
4754 + x = (sigma % NETEM_DIST_SCALE) * t;
4755 +@@ -812,6 +812,10 @@ static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
4756 + q->slot_config.max_packets = INT_MAX;
4757 + if (q->slot_config.max_bytes == 0)
4758 + q->slot_config.max_bytes = INT_MAX;
4759 ++
4760 ++ /* capping dist_jitter to the range acceptable by tabledist() */
4761 ++ q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter));
4762 ++
4763 + q->slot.packets_left = q->slot_config.max_packets;
4764 + q->slot.bytes_left = q->slot_config.max_bytes;
4765 + if (q->slot_config.min_delay | q->slot_config.max_delay |
4766 +@@ -1037,6 +1041,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
4767 + if (tb[TCA_NETEM_SLOT])
4768 + get_slot(q, tb[TCA_NETEM_SLOT]);
4769 +
4770 ++ /* capping jitter to the range acceptable by tabledist() */
4771 ++ q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
4772 ++
4773 + return ret;
4774 +
4775 + get_table_failure:
4776 +diff --git a/net/tipc/msg.c b/net/tipc/msg.c
4777 +index 15b24fbcbe970..0d6297f75df18 100644
4778 +--- a/net/tipc/msg.c
4779 ++++ b/net/tipc/msg.c
4780 +@@ -150,12 +150,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
4781 + if (fragid == FIRST_FRAGMENT) {
4782 + if (unlikely(head))
4783 + goto err;
4784 +- if (skb_cloned(frag))
4785 +- frag = skb_copy(frag, GFP_ATOMIC);
4786 ++ *buf = NULL;
4787 ++ frag = skb_unshare(frag, GFP_ATOMIC);
4788 + if (unlikely(!frag))
4789 + goto err;
4790 + head = *headbuf = frag;
4791 +- *buf = NULL;
4792 + TIPC_SKB_CB(head)->tail = NULL;
4793 + if (skb_is_nonlinear(head)) {
4794 + skb_walk_frags(head, tail) {
4795 +diff --git a/scripts/setlocalversion b/scripts/setlocalversion
4796 +index 20f2efd57b11a..bb709eda96cdf 100755
4797 +--- a/scripts/setlocalversion
4798 ++++ b/scripts/setlocalversion
4799 +@@ -45,7 +45,7 @@ scm_version()
4800 +
4801 + # Check for git and a git repo.
4802 + if test -z "$(git rev-parse --show-cdup 2>/dev/null)" &&
4803 +- head=$(git rev-parse --verify --short HEAD 2>/dev/null); then
4804 ++ head=$(git rev-parse --verify HEAD 2>/dev/null); then
4805 +
4806 + # If we are at a tagged commit (like "v2.6.30-rc6"), we ignore
4807 + # it, because this version is defined in the top level Makefile.
4808 +@@ -59,11 +59,22 @@ scm_version()
4809 + fi
4810 + # If we are past a tagged commit (like
4811 + # "v2.6.30-rc5-302-g72357d5"), we pretty print it.
4812 +- if atag="$(git describe 2>/dev/null)"; then
4813 +- echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),$(NF))}'
4814 +-
4815 +- # If we don't have a tag at all we print -g{commitish}.
4816 ++ #
4817 ++ # Ensure the abbreviated sha1 has exactly 12
4818 ++ # hex characters, to make the output
4819 ++ # independent of git version, local
4820 ++ # core.abbrev settings and/or total number of
4821 ++ # objects in the current repository - passing
4822 ++ # --abbrev=12 ensures a minimum of 12, and the
4823 ++ # awk substr() then picks the 'g' and first 12
4824 ++ # hex chars.
4825 ++ if atag="$(git describe --abbrev=12 2>/dev/null)"; then
4826 ++ echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),substr($(NF),0,13))}'
4827 ++
4828 ++ # If we don't have a tag at all we print -g{commitish},
4829 ++ # again using exactly 12 hex chars.
4830 + else
4831 ++ head="$(echo $head | cut -c1-12)"
4832 + printf '%s%s' -g $head
4833 + fi
4834 + fi
4835 +diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
4836 +index 0d36259b690df..e4b47759ba1ca 100644
4837 +--- a/security/integrity/evm/evm_main.c
4838 ++++ b/security/integrity/evm/evm_main.c
4839 +@@ -181,6 +181,12 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
4840 + break;
4841 + case EVM_IMA_XATTR_DIGSIG:
4842 + case EVM_XATTR_PORTABLE_DIGSIG:
4843 ++ /* accept xattr with non-empty signature field */
4844 ++ if (xattr_len <= sizeof(struct signature_v2_hdr)) {
4845 ++ evm_status = INTEGRITY_FAIL;
4846 ++ goto out;
4847 ++ }
4848 ++
4849 + hdr = (struct signature_v2_hdr *)xattr_data;
4850 + digest.hdr.algo = hdr->hash_algo;
4851 + rc = evm_calc_hash(dentry, xattr_name, xattr_value,
4852 +diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h
4853 +deleted file mode 100644
4854 +index 2ccd588fbad45..0000000000000
4855 +--- a/tools/arch/x86/include/asm/mcsafe_test.h
4856 ++++ /dev/null
4857 +@@ -1,13 +0,0 @@
4858 +-/* SPDX-License-Identifier: GPL-2.0 */
4859 +-#ifndef _MCSAFE_TEST_H_
4860 +-#define _MCSAFE_TEST_H_
4861 +-
4862 +-.macro MCSAFE_TEST_CTL
4863 +-.endm
4864 +-
4865 +-.macro MCSAFE_TEST_SRC reg count target
4866 +-.endm
4867 +-
4868 +-.macro MCSAFE_TEST_DST reg count target
4869 +-.endm
4870 +-#endif /* _MCSAFE_TEST_H_ */
4871 +diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
4872 +index 45f8e1b02241f..0b5b8ae56bd91 100644
4873 +--- a/tools/arch/x86/lib/memcpy_64.S
4874 ++++ b/tools/arch/x86/lib/memcpy_64.S
4875 +@@ -4,7 +4,6 @@
4876 + #include <linux/linkage.h>
4877 + #include <asm/errno.h>
4878 + #include <asm/cpufeatures.h>
4879 +-#include <asm/mcsafe_test.h>
4880 + #include <asm/alternative-asm.h>
4881 + #include <asm/export.h>
4882 +
4883 +@@ -187,117 +186,3 @@ SYM_FUNC_START(memcpy_orig)
4884 + SYM_FUNC_END(memcpy_orig)
4885 +
4886 + .popsection
4887 +-
4888 +-#ifndef CONFIG_UML
4889 +-
4890 +-MCSAFE_TEST_CTL
4891 +-
4892 +-/*
4893 +- * __memcpy_mcsafe - memory copy with machine check exception handling
4894 +- * Note that we only catch machine checks when reading the source addresses.
4895 +- * Writes to target are posted and don't generate machine checks.
4896 +- */
4897 +-SYM_FUNC_START(__memcpy_mcsafe)
4898 +- cmpl $8, %edx
4899 +- /* Less than 8 bytes? Go to byte copy loop */
4900 +- jb .L_no_whole_words
4901 +-
4902 +- /* Check for bad alignment of source */
4903 +- testl $7, %esi
4904 +- /* Already aligned */
4905 +- jz .L_8byte_aligned
4906 +-
4907 +- /* Copy one byte at a time until source is 8-byte aligned */
4908 +- movl %esi, %ecx
4909 +- andl $7, %ecx
4910 +- subl $8, %ecx
4911 +- negl %ecx
4912 +- subl %ecx, %edx
4913 +-.L_read_leading_bytes:
4914 +- movb (%rsi), %al
4915 +- MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
4916 +- MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
4917 +-.L_write_leading_bytes:
4918 +- movb %al, (%rdi)
4919 +- incq %rsi
4920 +- incq %rdi
4921 +- decl %ecx
4922 +- jnz .L_read_leading_bytes
4923 +-
4924 +-.L_8byte_aligned:
4925 +- movl %edx, %ecx
4926 +- andl $7, %edx
4927 +- shrl $3, %ecx
4928 +- jz .L_no_whole_words
4929 +-
4930 +-.L_read_words:
4931 +- movq (%rsi), %r8
4932 +- MCSAFE_TEST_SRC %rsi 8 .E_read_words
4933 +- MCSAFE_TEST_DST %rdi 8 .E_write_words
4934 +-.L_write_words:
4935 +- movq %r8, (%rdi)
4936 +- addq $8, %rsi
4937 +- addq $8, %rdi
4938 +- decl %ecx
4939 +- jnz .L_read_words
4940 +-
4941 +- /* Any trailing bytes? */
4942 +-.L_no_whole_words:
4943 +- andl %edx, %edx
4944 +- jz .L_done_memcpy_trap
4945 +-
4946 +- /* Copy trailing bytes */
4947 +- movl %edx, %ecx
4948 +-.L_read_trailing_bytes:
4949 +- movb (%rsi), %al
4950 +- MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
4951 +- MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
4952 +-.L_write_trailing_bytes:
4953 +- movb %al, (%rdi)
4954 +- incq %rsi
4955 +- incq %rdi
4956 +- decl %ecx
4957 +- jnz .L_read_trailing_bytes
4958 +-
4959 +- /* Copy successful. Return zero */
4960 +-.L_done_memcpy_trap:
4961 +- xorl %eax, %eax
4962 +-.L_done:
4963 +- ret
4964 +-SYM_FUNC_END(__memcpy_mcsafe)
4965 +-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
4966 +-
4967 +- .section .fixup, "ax"
4968 +- /*
4969 +- * Return number of bytes not copied for any failure. Note that
4970 +- * there is no "tail" handling since the source buffer is 8-byte
4971 +- * aligned and poison is cacheline aligned.
4972 +- */
4973 +-.E_read_words:
4974 +- shll $3, %ecx
4975 +-.E_leading_bytes:
4976 +- addl %edx, %ecx
4977 +-.E_trailing_bytes:
4978 +- mov %ecx, %eax
4979 +- jmp .L_done
4980 +-
4981 +- /*
4982 +- * For write fault handling, given the destination is unaligned,
4983 +- * we handle faults on multi-byte writes with a byte-by-byte
4984 +- * copy up to the write-protected page.
4985 +- */
4986 +-.E_write_words:
4987 +- shll $3, %ecx
4988 +- addl %edx, %ecx
4989 +- movl %ecx, %edx
4990 +- jmp mcsafe_handle_tail
4991 +-
4992 +- .previous
4993 +-
4994 +- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
4995 +- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
4996 +- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
4997 +- _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
4998 +- _ASM_EXTABLE(.L_write_words, .E_write_words)
4999 +- _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
5000 +-#endif
5001 +diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
5002 +index a3fd55194e0b1..7bffadcfd6eb0 100644
5003 +--- a/tools/include/uapi/linux/bpf.h
5004 ++++ b/tools/include/uapi/linux/bpf.h
5005 +@@ -1416,8 +1416,8 @@ union bpf_attr {
5006 + * Return
5007 + * The return value depends on the result of the test, and can be:
5008 + *
5009 +- * * 0, if the *skb* task belongs to the cgroup2.
5010 +- * * 1, if the *skb* task does not belong to the cgroup2.
5011 ++ * * 0, if current task belongs to the cgroup2.
5012 ++ * * 1, if current task does not belong to the cgroup2.
5013 + * * A negative error code, if an error occurred.
5014 + *
5015 + * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
5016 +diff --git a/tools/objtool/check.c b/tools/objtool/check.c
5017 +index 773e6c7ee5f93..0ed92c3b19266 100644
5018 +--- a/tools/objtool/check.c
5019 ++++ b/tools/objtool/check.c
5020 +@@ -548,8 +548,9 @@ static const char *uaccess_safe_builtin[] = {
5021 + "__ubsan_handle_shift_out_of_bounds",
5022 + /* misc */
5023 + "csum_partial_copy_generic",
5024 +- "__memcpy_mcsafe",
5025 +- "mcsafe_handle_tail",
5026 ++ "copy_mc_fragile",
5027 ++ "copy_mc_fragile_handle_tail",
5028 ++ "copy_mc_enhanced_fast_string",
5029 + "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
5030 + NULL
5031 + };
5032 +diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
5033 +index 768e408757a05..5352303518e1f 100644
5034 +--- a/tools/perf/bench/Build
5035 ++++ b/tools/perf/bench/Build
5036 +@@ -11,7 +11,6 @@ perf-y += epoll-ctl.o
5037 + perf-y += synthesize.o
5038 + perf-y += kallsyms-parse.o
5039 +
5040 +-perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
5041 + perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
5042 + perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
5043 +
5044 +diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c
5045 +deleted file mode 100644
5046 +index 4130734dde84b..0000000000000
5047 +--- a/tools/perf/bench/mem-memcpy-x86-64-lib.c
5048 ++++ /dev/null
5049 +@@ -1,24 +0,0 @@
5050 +-/*
5051 +- * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
5052 +- * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
5053 +- * happy.
5054 +- */
5055 +-#include <linux/types.h>
5056 +-
5057 +-unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
5058 +-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
5059 +-
5060 +-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
5061 +-{
5062 +- for (; len; --len, to++, from++) {
5063 +- /*
5064 +- * Call the assembly routine back directly since
5065 +- * memcpy_mcsafe() may silently fallback to memcpy.
5066 +- */
5067 +- unsigned long rem = __memcpy_mcsafe(to, from, 1);
5068 +-
5069 +- if (rem)
5070 +- break;
5071 +- }
5072 +- return len;
5073 +-}
5074 +diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
5075 +index a8ee5c4d41ebb..50a390d87db26 100644
5076 +--- a/tools/testing/nvdimm/test/nfit.c
5077 ++++ b/tools/testing/nvdimm/test/nfit.c
5078 +@@ -23,7 +23,8 @@
5079 + #include "nfit_test.h"
5080 + #include "../watermark.h"
5081 +
5082 +-#include <asm/mcsafe_test.h>
5083 ++#include <asm/copy_mc_test.h>
5084 ++#include <asm/mce.h>
5085 +
5086 + /*
5087 + * Generate an NFIT table to describe the following topology:
5088 +@@ -3052,7 +3053,7 @@ static struct platform_driver nfit_test_driver = {
5089 + .id_table = nfit_test_id,
5090 + };
5091 +
5092 +-static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
5093 ++static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
5094 +
5095 + enum INJECT {
5096 + INJECT_NONE,
5097 +@@ -3060,7 +3061,7 @@ enum INJECT {
5098 + INJECT_DST,
5099 + };
5100 +
5101 +-static void mcsafe_test_init(char *dst, char *src, size_t size)
5102 ++static void copy_mc_test_init(char *dst, char *src, size_t size)
5103 + {
5104 + size_t i;
5105 +
5106 +@@ -3069,7 +3070,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size)
5107 + src[i] = (char) i;
5108 + }
5109 +
5110 +-static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
5111 ++static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
5112 + size_t size, unsigned long rem)
5113 + {
5114 + size_t i;
5115 +@@ -3090,12 +3091,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
5116 + return true;
5117 + }
5118 +
5119 +-void mcsafe_test(void)
5120 ++void copy_mc_test(void)
5121 + {
5122 + char *inject_desc[] = { "none", "source", "destination" };
5123 + enum INJECT inj;
5124 +
5125 +- if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
5126 ++ if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
5127 + pr_info("%s: run...\n", __func__);
5128 + } else {
5129 + pr_info("%s: disabled, skip.\n", __func__);
5130 +@@ -3113,31 +3114,31 @@ void mcsafe_test(void)
5131 +
5132 + switch (inj) {
5133 + case INJECT_NONE:
5134 +- mcsafe_inject_src(NULL);
5135 +- mcsafe_inject_dst(NULL);
5136 +- dst = &mcsafe_buf[2048];
5137 +- src = &mcsafe_buf[1024 - i];
5138 ++ copy_mc_inject_src(NULL);
5139 ++ copy_mc_inject_dst(NULL);
5140 ++ dst = &copy_mc_buf[2048];
5141 ++ src = &copy_mc_buf[1024 - i];
5142 + expect = 0;
5143 + break;
5144 + case INJECT_SRC:
5145 +- mcsafe_inject_src(&mcsafe_buf[1024]);
5146 +- mcsafe_inject_dst(NULL);
5147 +- dst = &mcsafe_buf[2048];
5148 +- src = &mcsafe_buf[1024 - i];
5149 ++ copy_mc_inject_src(&copy_mc_buf[1024]);
5150 ++ copy_mc_inject_dst(NULL);
5151 ++ dst = &copy_mc_buf[2048];
5152 ++ src = &copy_mc_buf[1024 - i];
5153 + expect = 512 - i;
5154 + break;
5155 + case INJECT_DST:
5156 +- mcsafe_inject_src(NULL);
5157 +- mcsafe_inject_dst(&mcsafe_buf[2048]);
5158 +- dst = &mcsafe_buf[2048 - i];
5159 +- src = &mcsafe_buf[1024];
5160 ++ copy_mc_inject_src(NULL);
5161 ++ copy_mc_inject_dst(&copy_mc_buf[2048]);
5162 ++ dst = &copy_mc_buf[2048 - i];
5163 ++ src = &copy_mc_buf[1024];
5164 + expect = 512 - i;
5165 + break;
5166 + }
5167 +
5168 +- mcsafe_test_init(dst, src, 512);
5169 +- rem = __memcpy_mcsafe(dst, src, 512);
5170 +- valid = mcsafe_test_validate(dst, src, 512, expect);
5171 ++ copy_mc_test_init(dst, src, 512);
5172 ++ rem = copy_mc_fragile(dst, src, 512);
5173 ++ valid = copy_mc_test_validate(dst, src, 512, expect);
5174 + if (rem == expect && valid)
5175 + continue;
5176 + pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
5177 +@@ -3149,8 +3150,8 @@ void mcsafe_test(void)
5178 + }
5179 + }
5180 +
5181 +- mcsafe_inject_src(NULL);
5182 +- mcsafe_inject_dst(NULL);
5183 ++ copy_mc_inject_src(NULL);
5184 ++ copy_mc_inject_dst(NULL);
5185 + }
5186 +
5187 + static __init int nfit_test_init(void)
5188 +@@ -3161,7 +3162,7 @@ static __init int nfit_test_init(void)
5189 + libnvdimm_test();
5190 + acpi_nfit_test();
5191 + device_dax_test();
5192 +- mcsafe_test();
5193 ++ copy_mc_test();
5194 + dax_pmem_test();
5195 + dax_pmem_core_test();
5196 + #ifdef CONFIG_DEV_DAX_PMEM_COMPAT
5197 +diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
5198 +index ddaf140b82553..994b11af765ce 100644
5199 +--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
5200 ++++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
5201 +@@ -12,4 +12,4 @@ memcpy_p7_t1
5202 + copyuser_64_exc_t0
5203 + copyuser_64_exc_t1
5204 + copyuser_64_exc_t2
5205 +-memcpy_mcsafe_64
5206 ++copy_mc_64
5207 +diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
5208 +index 0917983a1c781..3095b1f1c02b3 100644
5209 +--- a/tools/testing/selftests/powerpc/copyloops/Makefile
5210 ++++ b/tools/testing/selftests/powerpc/copyloops/Makefile
5211 +@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4
5212 + TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
5213 + copyuser_p7_t0 copyuser_p7_t1 \
5214 + memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
5215 +- memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
5216 ++ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
5217 + copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
5218 +
5219 + EXTRA_SOURCES := validate.c ../harness.c stubs.S
5220 +@@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES)
5221 + -D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
5222 + -o $@ $^
5223 +
5224 +-$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
5225 ++$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES)
5226 + $(CC) $(CPPFLAGS) $(CFLAGS) \
5227 +- -D COPY_LOOP=test_memcpy_mcsafe \
5228 ++ -D COPY_LOOP=test_copy_mc_generic \
5229 + -o $@ $^
5230 +
5231 + $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
5232 +diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
5233 +new file mode 100644
5234 +index 0000000000000..88d46c471493b
5235 +--- /dev/null
5236 ++++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
5237 +@@ -0,0 +1,242 @@
5238 ++/* SPDX-License-Identifier: GPL-2.0 */
5239 ++/*
5240 ++ * Copyright (C) IBM Corporation, 2011
5241 ++ * Derived from copyuser_power7.s by Anton Blanchard <anton@××××××.com>
5242 ++ * Author - Balbir Singh <bsingharora@×××××.com>
5243 ++ */
5244 ++#include <asm/ppc_asm.h>
5245 ++#include <asm/errno.h>
5246 ++#include <asm/export.h>
5247 ++
5248 ++ .macro err1
5249 ++100:
5250 ++ EX_TABLE(100b,.Ldo_err1)
5251 ++ .endm
5252 ++
5253 ++ .macro err2
5254 ++200:
5255 ++ EX_TABLE(200b,.Ldo_err2)
5256 ++ .endm
5257 ++
5258 ++ .macro err3
5259 ++300: EX_TABLE(300b,.Ldone)
5260 ++ .endm
5261 ++
5262 ++.Ldo_err2:
5263 ++ ld r22,STK_REG(R22)(r1)
5264 ++ ld r21,STK_REG(R21)(r1)
5265 ++ ld r20,STK_REG(R20)(r1)
5266 ++ ld r19,STK_REG(R19)(r1)
5267 ++ ld r18,STK_REG(R18)(r1)
5268 ++ ld r17,STK_REG(R17)(r1)
5269 ++ ld r16,STK_REG(R16)(r1)
5270 ++ ld r15,STK_REG(R15)(r1)
5271 ++ ld r14,STK_REG(R14)(r1)
5272 ++ addi r1,r1,STACKFRAMESIZE
5273 ++.Ldo_err1:
5274 ++ /* Do a byte by byte copy to get the exact remaining size */
5275 ++ mtctr r7
5276 ++46:
5277 ++err3; lbz r0,0(r4)
5278 ++ addi r4,r4,1
5279 ++err3; stb r0,0(r3)
5280 ++ addi r3,r3,1
5281 ++ bdnz 46b
5282 ++ li r3,0
5283 ++ blr
5284 ++
5285 ++.Ldone:
5286 ++ mfctr r3
5287 ++ blr
5288 ++
5289 ++
5290 ++_GLOBAL(copy_mc_generic)
5291 ++ mr r7,r5
5292 ++ cmpldi r5,16
5293 ++ blt .Lshort_copy
5294 ++
5295 ++.Lcopy:
5296 ++ /* Get the source 8B aligned */
5297 ++ neg r6,r4
5298 ++ mtocrf 0x01,r6
5299 ++ clrldi r6,r6,(64-3)
5300 ++
5301 ++ bf cr7*4+3,1f
5302 ++err1; lbz r0,0(r4)
5303 ++ addi r4,r4,1
5304 ++err1; stb r0,0(r3)
5305 ++ addi r3,r3,1
5306 ++ subi r7,r7,1
5307 ++
5308 ++1: bf cr7*4+2,2f
5309 ++err1; lhz r0,0(r4)
5310 ++ addi r4,r4,2
5311 ++err1; sth r0,0(r3)
5312 ++ addi r3,r3,2
5313 ++ subi r7,r7,2
5314 ++
5315 ++2: bf cr7*4+1,3f
5316 ++err1; lwz r0,0(r4)
5317 ++ addi r4,r4,4
5318 ++err1; stw r0,0(r3)
5319 ++ addi r3,r3,4
5320 ++ subi r7,r7,4
5321 ++
5322 ++3: sub r5,r5,r6
5323 ++ cmpldi r5,128
5324 ++
5325 ++ mflr r0
5326 ++ stdu r1,-STACKFRAMESIZE(r1)
5327 ++ std r14,STK_REG(R14)(r1)
5328 ++ std r15,STK_REG(R15)(r1)
5329 ++ std r16,STK_REG(R16)(r1)
5330 ++ std r17,STK_REG(R17)(r1)
5331 ++ std r18,STK_REG(R18)(r1)
5332 ++ std r19,STK_REG(R19)(r1)
5333 ++ std r20,STK_REG(R20)(r1)
5334 ++ std r21,STK_REG(R21)(r1)
5335 ++ std r22,STK_REG(R22)(r1)
5336 ++ std r0,STACKFRAMESIZE+16(r1)
5337 ++
5338 ++ blt 5f
5339 ++ srdi r6,r5,7
5340 ++ mtctr r6
5341 ++
5342 ++ /* Now do cacheline (128B) sized loads and stores. */
5343 ++ .align 5
5344 ++4:
5345 ++err2; ld r0,0(r4)
5346 ++err2; ld r6,8(r4)
5347 ++err2; ld r8,16(r4)
5348 ++err2; ld r9,24(r4)
5349 ++err2; ld r10,32(r4)
5350 ++err2; ld r11,40(r4)
5351 ++err2; ld r12,48(r4)
5352 ++err2; ld r14,56(r4)
5353 ++err2; ld r15,64(r4)
5354 ++err2; ld r16,72(r4)
5355 ++err2; ld r17,80(r4)
5356 ++err2; ld r18,88(r4)
5357 ++err2; ld r19,96(r4)
5358 ++err2; ld r20,104(r4)
5359 ++err2; ld r21,112(r4)
5360 ++err2; ld r22,120(r4)
5361 ++ addi r4,r4,128
5362 ++err2; std r0,0(r3)
5363 ++err2; std r6,8(r3)
5364 ++err2; std r8,16(r3)
5365 ++err2; std r9,24(r3)
5366 ++err2; std r10,32(r3)
5367 ++err2; std r11,40(r3)
5368 ++err2; std r12,48(r3)
5369 ++err2; std r14,56(r3)
5370 ++err2; std r15,64(r3)
5371 ++err2; std r16,72(r3)
5372 ++err2; std r17,80(r3)
5373 ++err2; std r18,88(r3)
5374 ++err2; std r19,96(r3)
5375 ++err2; std r20,104(r3)
5376 ++err2; std r21,112(r3)
5377 ++err2; std r22,120(r3)
5378 ++ addi r3,r3,128
5379 ++ subi r7,r7,128
5380 ++ bdnz 4b
5381 ++
5382 ++ clrldi r5,r5,(64-7)
5383 ++
5384 ++ /* Up to 127B to go */
5385 ++5: srdi r6,r5,4
5386 ++ mtocrf 0x01,r6
5387 ++
5388 ++6: bf cr7*4+1,7f
5389 ++err2; ld r0,0(r4)
5390 ++err2; ld r6,8(r4)
5391 ++err2; ld r8,16(r4)
5392 ++err2; ld r9,24(r4)
5393 ++err2; ld r10,32(r4)
5394 ++err2; ld r11,40(r4)
5395 ++err2; ld r12,48(r4)
5396 ++err2; ld r14,56(r4)
5397 ++ addi r4,r4,64
5398 ++err2; std r0,0(r3)
5399 ++err2; std r6,8(r3)
5400 ++err2; std r8,16(r3)
5401 ++err2; std r9,24(r3)
5402 ++err2; std r10,32(r3)
5403 ++err2; std r11,40(r3)
5404 ++err2; std r12,48(r3)
5405 ++err2; std r14,56(r3)
5406 ++ addi r3,r3,64
5407 ++ subi r7,r7,64
5408 ++
5409 ++7: ld r14,STK_REG(R14)(r1)
5410 ++ ld r15,STK_REG(R15)(r1)
5411 ++ ld r16,STK_REG(R16)(r1)
5412 ++ ld r17,STK_REG(R17)(r1)
5413 ++ ld r18,STK_REG(R18)(r1)
5414 ++ ld r19,STK_REG(R19)(r1)
5415 ++ ld r20,STK_REG(R20)(r1)
5416 ++ ld r21,STK_REG(R21)(r1)
5417 ++ ld r22,STK_REG(R22)(r1)
5418 ++ addi r1,r1,STACKFRAMESIZE
5419 ++
5420 ++ /* Up to 63B to go */
5421 ++ bf cr7*4+2,8f
5422 ++err1; ld r0,0(r4)
5423 ++err1; ld r6,8(r4)
5424 ++err1; ld r8,16(r4)
5425 ++err1; ld r9,24(r4)
5426 ++ addi r4,r4,32
5427 ++err1; std r0,0(r3)
5428 ++err1; std r6,8(r3)
5429 ++err1; std r8,16(r3)
5430 ++err1; std r9,24(r3)
5431 ++ addi r3,r3,32
5432 ++ subi r7,r7,32
5433 ++
5434 ++ /* Up to 31B to go */
5435 ++8: bf cr7*4+3,9f
5436 ++err1; ld r0,0(r4)
5437 ++err1; ld r6,8(r4)
5438 ++ addi r4,r4,16
5439 ++err1; std r0,0(r3)
5440 ++err1; std r6,8(r3)
5441 ++ addi r3,r3,16
5442 ++ subi r7,r7,16
5443 ++
5444 ++9: clrldi r5,r5,(64-4)
5445 ++
5446 ++ /* Up to 15B to go */
5447 ++.Lshort_copy:
5448 ++ mtocrf 0x01,r5
5449 ++ bf cr7*4+0,12f
5450 ++err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
5451 ++err1; lwz r6,4(r4)
5452 ++ addi r4,r4,8
5453 ++err1; stw r0,0(r3)
5454 ++err1; stw r6,4(r3)
5455 ++ addi r3,r3,8
5456 ++ subi r7,r7,8
5457 ++
5458 ++12: bf cr7*4+1,13f
5459 ++err1; lwz r0,0(r4)
5460 ++ addi r4,r4,4
5461 ++err1; stw r0,0(r3)
5462 ++ addi r3,r3,4
5463 ++ subi r7,r7,4
5464 ++
5465 ++13: bf cr7*4+2,14f
5466 ++err1; lhz r0,0(r4)
5467 ++ addi r4,r4,2
5468 ++err1; sth r0,0(r3)
5469 ++ addi r3,r3,2
5470 ++ subi r7,r7,2
5471 ++
5472 ++14: bf cr7*4+3,15f
5473 ++err1; lbz r0,0(r4)
5474 ++err1; stb r0,0(r3)
5475 ++
5476 ++15: li r3,0
5477 ++ blr
5478 ++
5479 ++EXPORT_SYMBOL_GPL(copy_mc_generic);