Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:4.10 commit in: /
Date: Sat, 08 Apr 2017 13:51:29
Message-Id: 1491659463.8fb2c956e0adbcdcac001eff148fcbf3b7d81ae6.mpagano@gentoo
1 commit: 8fb2c956e0adbcdcac001eff148fcbf3b7d81ae6
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Sat Apr 8 13:51:03 2017 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Sat Apr 8 13:51:03 2017 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=8fb2c956
7
8 Linux patch 4.10.9
9
10 0000_README | 4 +
11 1008_linux-4.10.9.patch | 4556 +++++++++++++++++++++++++++++++++++++++++++++++
12 2 files changed, 4560 insertions(+)
13
14 diff --git a/0000_README b/0000_README
15 index 4c7de50..5f8d5b0 100644
16 --- a/0000_README
17 +++ b/0000_README
18 @@ -75,6 +75,10 @@ Patch: 1007_linux-4.10.8.patch
19 From: http://www.kernel.org
20 Desc: Linux 4.10.8
21
22 +Patch: 1008_linux-4.10.9.patch
23 +From: http://www.kernel.org
24 +Desc: Linux 4.10.9
25 +
26 Patch: 1500_XATTR_USER_PREFIX.patch
27 From: https://bugs.gentoo.org/show_bug.cgi?id=470644
28 Desc: Support for namespace user.pax.* on tmpfs.
29
30 diff --git a/1008_linux-4.10.9.patch b/1008_linux-4.10.9.patch
31 new file mode 100644
32 index 0000000..1aba6be
33 --- /dev/null
34 +++ b/1008_linux-4.10.9.patch
35 @@ -0,0 +1,4556 @@
36 +diff --git a/Documentation/devicetree/bindings/rng/omap_rng.txt b/Documentation/devicetree/bindings/rng/omap_rng.txt
37 +index 471477299ece..9cf7876ab434 100644
38 +--- a/Documentation/devicetree/bindings/rng/omap_rng.txt
39 ++++ b/Documentation/devicetree/bindings/rng/omap_rng.txt
40 +@@ -12,7 +12,8 @@ Required properties:
41 + - reg : Offset and length of the register set for the module
42 + - interrupts : the interrupt number for the RNG module.
43 + Used for "ti,omap4-rng" and "inside-secure,safexcel-eip76"
44 +-- clocks: the trng clock source
45 ++- clocks: the trng clock source. Only mandatory for the
46 ++ "inside-secure,safexcel-eip76" compatible.
47 +
48 + Example:
49 + /* AM335x */
50 +diff --git a/Makefile b/Makefile
51 +index 82e0809fed9b..4ebd511dee58 100644
52 +--- a/Makefile
53 ++++ b/Makefile
54 +@@ -1,6 +1,6 @@
55 + VERSION = 4
56 + PATCHLEVEL = 10
57 +-SUBLEVEL = 8
58 ++SUBLEVEL = 9
59 + EXTRAVERSION =
60 + NAME = Fearless Coyote
61 +
62 +diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
63 +index d408fa21a07c..928562967f3c 100644
64 +--- a/arch/arc/mm/cache.c
65 ++++ b/arch/arc/mm/cache.c
66 +@@ -633,6 +633,9 @@ noinline static void slc_entire_op(const int op)
67 +
68 + write_aux_reg(ARC_REG_SLC_INVALIDATE, 1);
69 +
70 ++ /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
71 ++ read_aux_reg(r);
72 ++
73 + /* Important to wait for flush to complete */
74 + while (read_aux_reg(r) & SLC_CTRL_BUSY);
75 + }
76 +diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi
77 +index f09a2bb08979..4b6049240ec2 100644
78 +--- a/arch/arm/boot/dts/bcm5301x.dtsi
79 ++++ b/arch/arm/boot/dts/bcm5301x.dtsi
80 +@@ -66,14 +66,14 @@
81 + timer@20200 {
82 + compatible = "arm,cortex-a9-global-timer";
83 + reg = <0x20200 0x100>;
84 +- interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
85 ++ interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
86 + clocks = <&periph_clk>;
87 + };
88 +
89 + local-timer@20600 {
90 + compatible = "arm,cortex-a9-twd-timer";
91 + reg = <0x20600 0x100>;
92 +- interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>;
93 ++ interrupts = <GIC_PPI 13 IRQ_TYPE_EDGE_RISING>;
94 + clocks = <&periph_clk>;
95 + };
96 +
97 +diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
98 +index 8ac0e5994ed2..0ddf3698b85d 100644
99 +--- a/arch/mips/lantiq/irq.c
100 ++++ b/arch/mips/lantiq/irq.c
101 +@@ -269,6 +269,11 @@ static void ltq_hw5_irqdispatch(void)
102 + DEFINE_HWx_IRQDISPATCH(5)
103 + #endif
104 +
105 ++static void ltq_hw_irq_handler(struct irq_desc *desc)
106 ++{
107 ++ ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2);
108 ++}
109 ++
110 + #ifdef CONFIG_MIPS_MT_SMP
111 + void __init arch_init_ipiirq(int irq, struct irqaction *action)
112 + {
113 +@@ -313,23 +318,19 @@ static struct irqaction irq_call = {
114 + asmlinkage void plat_irq_dispatch(void)
115 + {
116 + unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
117 +- unsigned int i;
118 +-
119 +- if ((MIPS_CPU_TIMER_IRQ == 7) && (pending & CAUSEF_IP7)) {
120 +- do_IRQ(MIPS_CPU_TIMER_IRQ);
121 +- goto out;
122 +- } else {
123 +- for (i = 0; i < MAX_IM; i++) {
124 +- if (pending & (CAUSEF_IP2 << i)) {
125 +- ltq_hw_irqdispatch(i);
126 +- goto out;
127 +- }
128 +- }
129 ++ int irq;
130 ++
131 ++ if (!pending) {
132 ++ spurious_interrupt();
133 ++ return;
134 + }
135 +- pr_alert("Spurious IRQ: CAUSE=0x%08x\n", read_c0_status());
136 +
137 +-out:
138 +- return;
139 ++ pending >>= CAUSEB_IP;
140 ++ while (pending) {
141 ++ irq = fls(pending) - 1;
142 ++ do_IRQ(MIPS_CPU_IRQ_BASE + irq);
143 ++ pending &= ~BIT(irq);
144 ++ }
145 + }
146 +
147 + static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
148 +@@ -354,11 +355,6 @@ static const struct irq_domain_ops irq_domain_ops = {
149 + .map = icu_map,
150 + };
151 +
152 +-static struct irqaction cascade = {
153 +- .handler = no_action,
154 +- .name = "cascade",
155 +-};
156 +-
157 + int __init icu_of_init(struct device_node *node, struct device_node *parent)
158 + {
159 + struct device_node *eiu_node;
160 +@@ -390,7 +386,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent)
161 + mips_cpu_irq_init();
162 +
163 + for (i = 0; i < MAX_IM; i++)
164 +- setup_irq(i + 2, &cascade);
165 ++ irq_set_chained_handler(i + 2, ltq_hw_irq_handler);
166 +
167 + if (cpu_has_vint) {
168 + pr_info("Setting up vectored interrupts\n");
169 +diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
170 +index 9a2aee1b90fc..7fcf5128996a 100644
171 +--- a/arch/parisc/include/asm/uaccess.h
172 ++++ b/arch/parisc/include/asm/uaccess.h
173 +@@ -68,6 +68,15 @@ struct exception_table_entry {
174 + ".previous\n"
175 +
176 + /*
177 ++ * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry
178 ++ * (with lowest bit set) for which the fault handler in fixup_exception() will
179 ++ * load -EFAULT into %r8 for a read or write fault, and zeroes the target
180 ++ * register in case of a read fault in get_user().
181 ++ */
182 ++#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\
183 ++ ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1)
184 ++
185 ++/*
186 + * The page fault handler stores, in a per-cpu area, the following information
187 + * if a fixup routine is available.
188 + */
189 +@@ -94,7 +103,7 @@ struct exception_data {
190 + #define __get_user(x, ptr) \
191 + ({ \
192 + register long __gu_err __asm__ ("r8") = 0; \
193 +- register long __gu_val __asm__ ("r9") = 0; \
194 ++ register long __gu_val; \
195 + \
196 + load_sr2(); \
197 + switch (sizeof(*(ptr))) { \
198 +@@ -110,22 +119,23 @@ struct exception_data {
199 + })
200 +
201 + #define __get_user_asm(ldx, ptr) \
202 +- __asm__("\n1:\t" ldx "\t0(%%sr2,%2),%0\n\t" \
203 +- ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_1)\
204 ++ __asm__("1: " ldx " 0(%%sr2,%2),%0\n" \
205 ++ "9:\n" \
206 ++ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
207 + : "=r"(__gu_val), "=r"(__gu_err) \
208 +- : "r"(ptr), "1"(__gu_err) \
209 +- : "r1");
210 ++ : "r"(ptr), "1"(__gu_err));
211 +
212 + #if !defined(CONFIG_64BIT)
213 +
214 + #define __get_user_asm64(ptr) \
215 +- __asm__("\n1:\tldw 0(%%sr2,%2),%0" \
216 +- "\n2:\tldw 4(%%sr2,%2),%R0\n\t" \
217 +- ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_2)\
218 +- ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_get_user_skip_1)\
219 ++ __asm__(" copy %%r0,%R0\n" \
220 ++ "1: ldw 0(%%sr2,%2),%0\n" \
221 ++ "2: ldw 4(%%sr2,%2),%R0\n" \
222 ++ "9:\n" \
223 ++ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
224 ++ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
225 + : "=r"(__gu_val), "=r"(__gu_err) \
226 +- : "r"(ptr), "1"(__gu_err) \
227 +- : "r1");
228 ++ : "r"(ptr), "1"(__gu_err));
229 +
230 + #endif /* !defined(CONFIG_64BIT) */
231 +
232 +@@ -151,32 +161,31 @@ struct exception_data {
233 + * The "__put_user/kernel_asm()" macros tell gcc they read from memory
234 + * instead of writing. This is because they do not write to any memory
235 + * gcc knows about, so there are no aliasing issues. These macros must
236 +- * also be aware that "fixup_put_user_skip_[12]" are executed in the
237 +- * context of the fault, and any registers used there must be listed
238 +- * as clobbers. In this case only "r1" is used by the current routines.
239 +- * r8/r9 are already listed as err/val.
240 ++ * also be aware that fixups are executed in the context of the fault,
241 ++ * and any registers used there must be listed as clobbers.
242 ++ * r8 is already listed as err.
243 + */
244 +
245 + #define __put_user_asm(stx, x, ptr) \
246 + __asm__ __volatile__ ( \
247 +- "\n1:\t" stx "\t%2,0(%%sr2,%1)\n\t" \
248 +- ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_1)\
249 ++ "1: " stx " %2,0(%%sr2,%1)\n" \
250 ++ "9:\n" \
251 ++ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
252 + : "=r"(__pu_err) \
253 +- : "r"(ptr), "r"(x), "0"(__pu_err) \
254 +- : "r1")
255 ++ : "r"(ptr), "r"(x), "0"(__pu_err))
256 +
257 +
258 + #if !defined(CONFIG_64BIT)
259 +
260 + #define __put_user_asm64(__val, ptr) do { \
261 + __asm__ __volatile__ ( \
262 +- "\n1:\tstw %2,0(%%sr2,%1)" \
263 +- "\n2:\tstw %R2,4(%%sr2,%1)\n\t" \
264 +- ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_2)\
265 +- ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_put_user_skip_1)\
266 ++ "1: stw %2,0(%%sr2,%1)\n" \
267 ++ "2: stw %R2,4(%%sr2,%1)\n" \
268 ++ "9:\n" \
269 ++ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
270 ++ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
271 + : "=r"(__pu_err) \
272 +- : "r"(ptr), "r"(__val), "0"(__pu_err) \
273 +- : "r1"); \
274 ++ : "r"(ptr), "r"(__val), "0"(__pu_err)); \
275 + } while (0)
276 +
277 + #endif /* !defined(CONFIG_64BIT) */
278 +diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
279 +index 7484b3d11e0d..c6d6272a934f 100644
280 +--- a/arch/parisc/kernel/parisc_ksyms.c
281 ++++ b/arch/parisc/kernel/parisc_ksyms.c
282 +@@ -47,16 +47,6 @@ EXPORT_SYMBOL(__cmpxchg_u64);
283 + EXPORT_SYMBOL(lclear_user);
284 + EXPORT_SYMBOL(lstrnlen_user);
285 +
286 +-/* Global fixups - defined as int to avoid creation of function pointers */
287 +-extern int fixup_get_user_skip_1;
288 +-extern int fixup_get_user_skip_2;
289 +-extern int fixup_put_user_skip_1;
290 +-extern int fixup_put_user_skip_2;
291 +-EXPORT_SYMBOL(fixup_get_user_skip_1);
292 +-EXPORT_SYMBOL(fixup_get_user_skip_2);
293 +-EXPORT_SYMBOL(fixup_put_user_skip_1);
294 +-EXPORT_SYMBOL(fixup_put_user_skip_2);
295 +-
296 + #ifndef CONFIG_64BIT
297 + /* Needed so insmod can set dp value */
298 + extern int $global$;
299 +diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
300 +index 9e2d98ee6f9c..3286cbc7b934 100644
301 +--- a/arch/parisc/kernel/process.c
302 ++++ b/arch/parisc/kernel/process.c
303 +@@ -140,6 +140,8 @@ void machine_power_off(void)
304 + printk(KERN_EMERG "System shut down completed.\n"
305 + "Please power this system off now.");
306 +
307 ++ /* prevent soft lockup/stalled CPU messages for endless loop. */
308 ++ rcu_sysrq_start();
309 + for (;;);
310 + }
311 +
312 +diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile
313 +index 8fa92b8d839a..f2dac4d73b1b 100644
314 +--- a/arch/parisc/lib/Makefile
315 ++++ b/arch/parisc/lib/Makefile
316 +@@ -2,7 +2,7 @@
317 + # Makefile for parisc-specific library files
318 + #
319 +
320 +-lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o \
321 ++lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \
322 + ucmpdi2.o delay.o
323 +
324 + obj-y := iomap.o
325 +diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S
326 +deleted file mode 100644
327 +index a5b72f22c7a6..000000000000
328 +--- a/arch/parisc/lib/fixup.S
329 ++++ /dev/null
330 +@@ -1,98 +0,0 @@
331 +-/*
332 +- * Linux/PA-RISC Project (http://www.parisc-linux.org/)
333 +- *
334 +- * Copyright (C) 2004 Randolph Chung <tausq@××××××.org>
335 +- *
336 +- * This program is free software; you can redistribute it and/or modify
337 +- * it under the terms of the GNU General Public License as published by
338 +- * the Free Software Foundation; either version 2, or (at your option)
339 +- * any later version.
340 +- *
341 +- * This program is distributed in the hope that it will be useful,
342 +- * but WITHOUT ANY WARRANTY; without even the implied warranty of
343 +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
344 +- * GNU General Public License for more details.
345 +- *
346 +- * You should have received a copy of the GNU General Public License
347 +- * along with this program; if not, write to the Free Software
348 +- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
349 +- *
350 +- * Fixup routines for kernel exception handling.
351 +- */
352 +-#include <asm/asm-offsets.h>
353 +-#include <asm/assembly.h>
354 +-#include <asm/errno.h>
355 +-#include <linux/linkage.h>
356 +-
357 +-#ifdef CONFIG_SMP
358 +- .macro get_fault_ip t1 t2
359 +- loadgp
360 +- addil LT%__per_cpu_offset,%r27
361 +- LDREG RT%__per_cpu_offset(%r1),\t1
362 +- /* t2 = smp_processor_id() */
363 +- mfctl 30,\t2
364 +- ldw TI_CPU(\t2),\t2
365 +-#ifdef CONFIG_64BIT
366 +- extrd,u \t2,63,32,\t2
367 +-#endif
368 +- /* t2 = &__per_cpu_offset[smp_processor_id()]; */
369 +- LDREGX \t2(\t1),\t2
370 +- addil LT%exception_data,%r27
371 +- LDREG RT%exception_data(%r1),\t1
372 +- /* t1 = this_cpu_ptr(&exception_data) */
373 +- add,l \t1,\t2,\t1
374 +- /* %r27 = t1->fault_gp - restore gp */
375 +- LDREG EXCDATA_GP(\t1), %r27
376 +- /* t1 = t1->fault_ip */
377 +- LDREG EXCDATA_IP(\t1), \t1
378 +- .endm
379 +-#else
380 +- .macro get_fault_ip t1 t2
381 +- loadgp
382 +- /* t1 = this_cpu_ptr(&exception_data) */
383 +- addil LT%exception_data,%r27
384 +- LDREG RT%exception_data(%r1),\t2
385 +- /* %r27 = t2->fault_gp - restore gp */
386 +- LDREG EXCDATA_GP(\t2), %r27
387 +- /* t1 = t2->fault_ip */
388 +- LDREG EXCDATA_IP(\t2), \t1
389 +- .endm
390 +-#endif
391 +-
392 +- .level LEVEL
393 +-
394 +- .text
395 +- .section .fixup, "ax"
396 +-
397 +- /* get_user() fixups, store -EFAULT in r8, and 0 in r9 */
398 +-ENTRY_CFI(fixup_get_user_skip_1)
399 +- get_fault_ip %r1,%r8
400 +- ldo 4(%r1), %r1
401 +- ldi -EFAULT, %r8
402 +- bv %r0(%r1)
403 +- copy %r0, %r9
404 +-ENDPROC_CFI(fixup_get_user_skip_1)
405 +-
406 +-ENTRY_CFI(fixup_get_user_skip_2)
407 +- get_fault_ip %r1,%r8
408 +- ldo 8(%r1), %r1
409 +- ldi -EFAULT, %r8
410 +- bv %r0(%r1)
411 +- copy %r0, %r9
412 +-ENDPROC_CFI(fixup_get_user_skip_2)
413 +-
414 +- /* put_user() fixups, store -EFAULT in r8 */
415 +-ENTRY_CFI(fixup_put_user_skip_1)
416 +- get_fault_ip %r1,%r8
417 +- ldo 4(%r1), %r1
418 +- bv %r0(%r1)
419 +- ldi -EFAULT, %r8
420 +-ENDPROC_CFI(fixup_put_user_skip_1)
421 +-
422 +-ENTRY_CFI(fixup_put_user_skip_2)
423 +- get_fault_ip %r1,%r8
424 +- ldo 8(%r1), %r1
425 +- bv %r0(%r1)
426 +- ldi -EFAULT, %r8
427 +-ENDPROC_CFI(fixup_put_user_skip_2)
428 +-
429 +diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
430 +index 56845de6b5df..f01188c044ee 100644
431 +--- a/arch/parisc/lib/lusercopy.S
432 ++++ b/arch/parisc/lib/lusercopy.S
433 +@@ -5,6 +5,8 @@
434 + * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
435 + * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
436 + * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
437 ++ * Copyright (C) 2017 Helge Deller <deller@×××.de>
438 ++ * Copyright (C) 2017 John David Anglin <dave.anglin@××××.net>
439 + *
440 + *
441 + * This program is free software; you can redistribute it and/or modify
442 +@@ -132,4 +134,320 @@ ENDPROC_CFI(lstrnlen_user)
443 +
444 + .procend
445 +
446 ++
447 ++
448 ++/*
449 ++ * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
450 ++ *
451 ++ * Inputs:
452 ++ * - sr1 already contains space of source region
453 ++ * - sr2 already contains space of destination region
454 ++ *
455 ++ * Returns:
456 ++ * - number of bytes that could not be copied.
457 ++ * On success, this will be zero.
458 ++ *
459 ++ * This code is based on a C-implementation of a copy routine written by
460 ++ * Randolph Chung, which in turn was derived from the glibc.
461 ++ *
462 ++ * Several strategies are tried to try to get the best performance for various
463 ++ * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
464 ++ * at a time using general registers. Unaligned copies are handled either by
465 ++ * aligning the destination and then using shift-and-write method, or in a few
466 ++ * cases by falling back to a byte-at-a-time copy.
467 ++ *
468 ++ * Testing with various alignments and buffer sizes shows that this code is
469 ++ * often >10x faster than a simple byte-at-a-time copy, even for strangely
470 ++ * aligned operands. It is interesting to note that the glibc version of memcpy
471 ++ * (written in C) is actually quite fast already. This routine is able to beat
472 ++ * it by 30-40% for aligned copies because of the loop unrolling, but in some
473 ++ * cases the glibc version is still slightly faster. This lends more
474 ++ * credibility that gcc can generate very good code as long as we are careful.
475 ++ *
476 ++ * Possible optimizations:
477 ++ * - add cache prefetching
478 ++ * - try not to use the post-increment address modifiers; they may create
479 ++ * additional interlocks. Assumption is that those were only efficient on old
480 ++ * machines (pre PA8000 processors)
481 ++ */
482 ++
483 ++ dst = arg0
484 ++ src = arg1
485 ++ len = arg2
486 ++ end = arg3
487 ++ t1 = r19
488 ++ t2 = r20
489 ++ t3 = r21
490 ++ t4 = r22
491 ++ srcspc = sr1
492 ++ dstspc = sr2
493 ++
494 ++ t0 = r1
495 ++ a1 = t1
496 ++ a2 = t2
497 ++ a3 = t3
498 ++ a0 = t4
499 ++
500 ++ save_src = ret0
501 ++ save_dst = ret1
502 ++ save_len = r31
503 ++
504 ++ENTRY_CFI(pa_memcpy)
505 ++ .proc
506 ++ .callinfo NO_CALLS
507 ++ .entry
508 ++
509 ++ /* Last destination address */
510 ++ add dst,len,end
511 ++
512 ++ /* short copy with less than 16 bytes? */
513 ++ cmpib,>>=,n 15,len,.Lbyte_loop
514 ++
515 ++ /* same alignment? */
516 ++ xor src,dst,t0
517 ++ extru t0,31,2,t1
518 ++ cmpib,<>,n 0,t1,.Lunaligned_copy
519 ++
520 ++#ifdef CONFIG_64BIT
521 ++ /* only do 64-bit copies if we can get aligned. */
522 ++ extru t0,31,3,t1
523 ++ cmpib,<>,n 0,t1,.Lalign_loop32
524 ++
525 ++ /* loop until we are 64-bit aligned */
526 ++.Lalign_loop64:
527 ++ extru dst,31,3,t1
528 ++ cmpib,=,n 0,t1,.Lcopy_loop_16
529 ++20: ldb,ma 1(srcspc,src),t1
530 ++21: stb,ma t1,1(dstspc,dst)
531 ++ b .Lalign_loop64
532 ++ ldo -1(len),len
533 ++
534 ++ ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
535 ++ ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
536 ++
537 ++ ldi 31,t0
538 ++.Lcopy_loop_16:
539 ++ cmpb,COND(>>=),n t0,len,.Lword_loop
540 ++
541 ++10: ldd 0(srcspc,src),t1
542 ++11: ldd 8(srcspc,src),t2
543 ++ ldo 16(src),src
544 ++12: std,ma t1,8(dstspc,dst)
545 ++13: std,ma t2,8(dstspc,dst)
546 ++14: ldd 0(srcspc,src),t1
547 ++15: ldd 8(srcspc,src),t2
548 ++ ldo 16(src),src
549 ++16: std,ma t1,8(dstspc,dst)
550 ++17: std,ma t2,8(dstspc,dst)
551 ++
552 ++ ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
553 ++ ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
554 ++ ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
555 ++ ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
556 ++ ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
557 ++ ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
558 ++ ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
559 ++ ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
560 ++
561 ++ b .Lcopy_loop_16
562 ++ ldo -32(len),len
563 ++
564 ++.Lword_loop:
565 ++ cmpib,COND(>>=),n 3,len,.Lbyte_loop
566 ++20: ldw,ma 4(srcspc,src),t1
567 ++21: stw,ma t1,4(dstspc,dst)
568 ++ b .Lword_loop
569 ++ ldo -4(len),len
570 ++
571 ++ ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
572 ++ ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
573 ++
574 ++#endif /* CONFIG_64BIT */
575 ++
576 ++ /* loop until we are 32-bit aligned */
577 ++.Lalign_loop32:
578 ++ extru dst,31,2,t1
579 ++ cmpib,=,n 0,t1,.Lcopy_loop_4
580 ++20: ldb,ma 1(srcspc,src),t1
581 ++21: stb,ma t1,1(dstspc,dst)
582 ++ b .Lalign_loop32
583 ++ ldo -1(len),len
584 ++
585 ++ ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
586 ++ ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
587 ++
588 ++
589 ++.Lcopy_loop_4:
590 ++ cmpib,COND(>>=),n 15,len,.Lbyte_loop
591 ++
592 ++10: ldw 0(srcspc,src),t1
593 ++11: ldw 4(srcspc,src),t2
594 ++12: stw,ma t1,4(dstspc,dst)
595 ++13: stw,ma t2,4(dstspc,dst)
596 ++14: ldw 8(srcspc,src),t1
597 ++15: ldw 12(srcspc,src),t2
598 ++ ldo 16(src),src
599 ++16: stw,ma t1,4(dstspc,dst)
600 ++17: stw,ma t2,4(dstspc,dst)
601 ++
602 ++ ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
603 ++ ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
604 ++ ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
605 ++ ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
606 ++ ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
607 ++ ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
608 ++ ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
609 ++ ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
610 ++
611 ++ b .Lcopy_loop_4
612 ++ ldo -16(len),len
613 ++
614 ++.Lbyte_loop:
615 ++ cmpclr,COND(<>) len,%r0,%r0
616 ++ b,n .Lcopy_done
617 ++20: ldb 0(srcspc,src),t1
618 ++ ldo 1(src),src
619 ++21: stb,ma t1,1(dstspc,dst)
620 ++ b .Lbyte_loop
621 ++ ldo -1(len),len
622 ++
623 ++ ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
624 ++ ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
625 ++
626 ++.Lcopy_done:
627 ++ bv %r0(%r2)
628 ++ sub end,dst,ret0
629 ++
630 ++
631 ++ /* src and dst are not aligned the same way. */
632 ++ /* need to go the hard way */
633 ++.Lunaligned_copy:
634 ++ /* align until dst is 32bit-word-aligned */
635 ++ extru dst,31,2,t1
636 ++ cmpib,COND(=),n 0,t1,.Lcopy_dstaligned
637 ++20: ldb 0(srcspc,src),t1
638 ++ ldo 1(src),src
639 ++21: stb,ma t1,1(dstspc,dst)
640 ++ b .Lunaligned_copy
641 ++ ldo -1(len),len
642 ++
643 ++ ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
644 ++ ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
645 ++
646 ++.Lcopy_dstaligned:
647 ++
648 ++ /* store src, dst and len in safe place */
649 ++ copy src,save_src
650 ++ copy dst,save_dst
651 ++ copy len,save_len
652 ++
653 ++ /* len now needs give number of words to copy */
654 ++ SHRREG len,2,len
655 ++
656 ++ /*
657 ++ * Copy from a not-aligned src to an aligned dst using shifts.
658 ++ * Handles 4 words per loop.
659 ++ */
660 ++
661 ++ depw,z src,28,2,t0
662 ++ subi 32,t0,t0
663 ++ mtsar t0
664 ++ extru len,31,2,t0
665 ++ cmpib,= 2,t0,.Lcase2
666 ++ /* Make src aligned by rounding it down. */
667 ++ depi 0,31,2,src
668 ++
669 ++ cmpiclr,<> 3,t0,%r0
670 ++ b,n .Lcase3
671 ++ cmpiclr,<> 1,t0,%r0
672 ++ b,n .Lcase1
673 ++.Lcase0:
674 ++ cmpb,= %r0,len,.Lcda_finish
675 ++ nop
676 ++
677 ++1: ldw,ma 4(srcspc,src), a3
678 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
679 ++1: ldw,ma 4(srcspc,src), a0
680 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
681 ++ b,n .Ldo3
682 ++.Lcase1:
683 ++1: ldw,ma 4(srcspc,src), a2
684 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
685 ++1: ldw,ma 4(srcspc,src), a3
686 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
687 ++ ldo -1(len),len
688 ++ cmpb,=,n %r0,len,.Ldo0
689 ++.Ldo4:
690 ++1: ldw,ma 4(srcspc,src), a0
691 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
692 ++ shrpw a2, a3, %sar, t0
693 ++1: stw,ma t0, 4(dstspc,dst)
694 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
695 ++.Ldo3:
696 ++1: ldw,ma 4(srcspc,src), a1
697 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
698 ++ shrpw a3, a0, %sar, t0
699 ++1: stw,ma t0, 4(dstspc,dst)
700 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
701 ++.Ldo2:
702 ++1: ldw,ma 4(srcspc,src), a2
703 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
704 ++ shrpw a0, a1, %sar, t0
705 ++1: stw,ma t0, 4(dstspc,dst)
706 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
707 ++.Ldo1:
708 ++1: ldw,ma 4(srcspc,src), a3
709 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
710 ++ shrpw a1, a2, %sar, t0
711 ++1: stw,ma t0, 4(dstspc,dst)
712 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
713 ++ ldo -4(len),len
714 ++ cmpb,<> %r0,len,.Ldo4
715 ++ nop
716 ++.Ldo0:
717 ++ shrpw a2, a3, %sar, t0
718 ++1: stw,ma t0, 4(dstspc,dst)
719 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
720 ++
721 ++.Lcda_rdfault:
722 ++.Lcda_finish:
723 ++ /* calculate new src, dst and len and jump to byte-copy loop */
724 ++ sub dst,save_dst,t0
725 ++ add save_src,t0,src
726 ++ b .Lbyte_loop
727 ++ sub save_len,t0,len
728 ++
729 ++.Lcase3:
730 ++1: ldw,ma 4(srcspc,src), a0
731 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
732 ++1: ldw,ma 4(srcspc,src), a1
733 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
734 ++ b .Ldo2
735 ++ ldo 1(len),len
736 ++.Lcase2:
737 ++1: ldw,ma 4(srcspc,src), a1
738 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
739 ++1: ldw,ma 4(srcspc,src), a2
740 ++ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
741 ++ b .Ldo1
742 ++ ldo 2(len),len
743 ++
744 ++
745 ++ /* fault exception fixup handlers: */
746 ++#ifdef CONFIG_64BIT
747 ++.Lcopy16_fault:
748 ++10: b .Lcopy_done
749 ++ std,ma t1,8(dstspc,dst)
750 ++ ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
751 ++#endif
752 ++
753 ++.Lcopy8_fault:
754 ++10: b .Lcopy_done
755 ++ stw,ma t1,4(dstspc,dst)
756 ++ ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
757 ++
758 ++ .exit
759 ++ENDPROC_CFI(pa_memcpy)
760 ++ .procend
761 ++
762 + .end
763 +diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
764 +index f82ff10ed974..b3d47ec1d80a 100644
765 +--- a/arch/parisc/lib/memcpy.c
766 ++++ b/arch/parisc/lib/memcpy.c
767 +@@ -2,7 +2,7 @@
768 + * Optimized memory copy routines.
769 + *
770 + * Copyright (C) 2004 Randolph Chung <tausq@××××××.org>
771 +- * Copyright (C) 2013 Helge Deller <deller@×××.de>
772 ++ * Copyright (C) 2013-2017 Helge Deller <deller@×××.de>
773 + *
774 + * This program is free software; you can redistribute it and/or modify
775 + * it under the terms of the GNU General Public License as published by
776 +@@ -21,474 +21,21 @@
777 + * Portions derived from the GNU C Library
778 + * Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
779 + *
780 +- * Several strategies are tried to try to get the best performance for various
781 +- * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using
782 +- * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using
783 +- * general registers. Unaligned copies are handled either by aligning the
784 +- * destination and then using shift-and-write method, or in a few cases by
785 +- * falling back to a byte-at-a-time copy.
786 +- *
787 +- * I chose to implement this in C because it is easier to maintain and debug,
788 +- * and in my experiments it appears that the C code generated by gcc (3.3/3.4
789 +- * at the time of writing) is fairly optimal. Unfortunately some of the
790 +- * semantics of the copy routine (exception handling) is difficult to express
791 +- * in C, so we have to play some tricks to get it to work.
792 +- *
793 +- * All the loads and stores are done via explicit asm() code in order to use
794 +- * the right space registers.
795 +- *
796 +- * Testing with various alignments and buffer sizes shows that this code is
797 +- * often >10x faster than a simple byte-at-a-time copy, even for strangely
798 +- * aligned operands. It is interesting to note that the glibc version
799 +- * of memcpy (written in C) is actually quite fast already. This routine is
800 +- * able to beat it by 30-40% for aligned copies because of the loop unrolling,
801 +- * but in some cases the glibc version is still slightly faster. This lends
802 +- * more credibility that gcc can generate very good code as long as we are
803 +- * careful.
804 +- *
805 +- * TODO:
806 +- * - cache prefetching needs more experimentation to get optimal settings
807 +- * - try not to use the post-increment address modifiers; they create additional
808 +- * interlocks
809 +- * - replace byte-copy loops with stybs sequences
810 + */
811 +
812 +-#ifdef __KERNEL__
813 + #include <linux/module.h>
814 + #include <linux/compiler.h>
815 + #include <linux/uaccess.h>
816 +-#define s_space "%%sr1"
817 +-#define d_space "%%sr2"
818 +-#else
819 +-#include "memcpy.h"
820 +-#define s_space "%%sr0"
821 +-#define d_space "%%sr0"
822 +-#define pa_memcpy new2_copy
823 +-#endif
824 +
825 + DECLARE_PER_CPU(struct exception_data, exception_data);
826 +
827 +-#define preserve_branch(label) do { \
828 +- volatile int dummy = 0; \
829 +- /* The following branch is never taken, it's just here to */ \
830 +- /* prevent gcc from optimizing away our exception code. */ \
831 +- if (unlikely(dummy != dummy)) \
832 +- goto label; \
833 +-} while (0)
834 +-
835 + #define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3))
836 + #define get_kernel_space() (0)
837 +
838 +-#define MERGE(w0, sh_1, w1, sh_2) ({ \
839 +- unsigned int _r; \
840 +- asm volatile ( \
841 +- "mtsar %3\n" \
842 +- "shrpw %1, %2, %%sar, %0\n" \
843 +- : "=r"(_r) \
844 +- : "r"(w0), "r"(w1), "r"(sh_2) \
845 +- ); \
846 +- _r; \
847 +-})
848 +-#define THRESHOLD 16
849 +-
850 +-#ifdef DEBUG_MEMCPY
851 +-#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0)
852 +-#else
853 +-#define DPRINTF(fmt, args...)
854 +-#endif
855 +-
856 +-#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \
857 +- __asm__ __volatile__ ( \
858 +- "1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t" \
859 +- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
860 +- : _tt(_t), "+r"(_a) \
861 +- : \
862 +- : "r8")
863 +-
864 +-#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \
865 +- __asm__ __volatile__ ( \
866 +- "1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t" \
867 +- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
868 +- : "+r"(_a) \
869 +- : _tt(_t) \
870 +- : "r8")
871 +-
872 +-#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e)
873 +-#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e)
874 +-#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e)
875 +-#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e)
876 +-#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e)
877 +-#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e)
878 +-
879 +-#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) \
880 +- __asm__ __volatile__ ( \
881 +- "1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t" \
882 +- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
883 +- : _tt(_t) \
884 +- : "r"(_a) \
885 +- : "r8")
886 +-
887 +-#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) \
888 +- __asm__ __volatile__ ( \
889 +- "1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t" \
890 +- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
891 +- : \
892 +- : _tt(_t), "r"(_a) \
893 +- : "r8")
894 +-
895 +-#define ldw(_s,_o,_a,_t,_e) def_load_insn(ldw,"=r",_s,_o,_a,_t,_e)
896 +-#define stw(_s,_t,_o,_a,_e) def_store_insn(stw,"r",_s,_t,_o,_a,_e)
897 +-
898 +-#ifdef CONFIG_PREFETCH
899 +-static inline void prefetch_src(const void *addr)
900 +-{
901 +- __asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr));
902 +-}
903 +-
904 +-static inline void prefetch_dst(const void *addr)
905 +-{
906 +- __asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr));
907 +-}
908 +-#else
909 +-#define prefetch_src(addr) do { } while(0)
910 +-#define prefetch_dst(addr) do { } while(0)
911 +-#endif
912 +-
913 +-#define PA_MEMCPY_OK 0
914 +-#define PA_MEMCPY_LOAD_ERROR 1
915 +-#define PA_MEMCPY_STORE_ERROR 2
916 +-
917 +-/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
918 +- * per loop. This code is derived from glibc.
919 +- */
920 +-static noinline unsigned long copy_dstaligned(unsigned long dst,
921 +- unsigned long src, unsigned long len)
922 +-{
923 +- /* gcc complains that a2 and a3 may be uninitialized, but actually
924 +- * they cannot be. Initialize a2/a3 to shut gcc up.
925 +- */
926 +- register unsigned int a0, a1, a2 = 0, a3 = 0;
927 +- int sh_1, sh_2;
928 +-
929 +- /* prefetch_src((const void *)src); */
930 +-
931 +- /* Calculate how to shift a word read at the memory operation
932 +- aligned srcp to make it aligned for copy. */
933 +- sh_1 = 8 * (src % sizeof(unsigned int));
934 +- sh_2 = 8 * sizeof(unsigned int) - sh_1;
935 +-
936 +- /* Make src aligned by rounding it down. */
937 +- src &= -sizeof(unsigned int);
938 +-
939 +- switch (len % 4)
940 +- {
941 +- case 2:
942 +- /* a1 = ((unsigned int *) src)[0];
943 +- a2 = ((unsigned int *) src)[1]; */
944 +- ldw(s_space, 0, src, a1, cda_ldw_exc);
945 +- ldw(s_space, 4, src, a2, cda_ldw_exc);
946 +- src -= 1 * sizeof(unsigned int);
947 +- dst -= 3 * sizeof(unsigned int);
948 +- len += 2;
949 +- goto do1;
950 +- case 3:
951 +- /* a0 = ((unsigned int *) src)[0];
952 +- a1 = ((unsigned int *) src)[1]; */
953 +- ldw(s_space, 0, src, a0, cda_ldw_exc);
954 +- ldw(s_space, 4, src, a1, cda_ldw_exc);
955 +- src -= 0 * sizeof(unsigned int);
956 +- dst -= 2 * sizeof(unsigned int);
957 +- len += 1;
958 +- goto do2;
959 +- case 0:
960 +- if (len == 0)
961 +- return PA_MEMCPY_OK;
962 +- /* a3 = ((unsigned int *) src)[0];
963 +- a0 = ((unsigned int *) src)[1]; */
964 +- ldw(s_space, 0, src, a3, cda_ldw_exc);
965 +- ldw(s_space, 4, src, a0, cda_ldw_exc);
966 +- src -=-1 * sizeof(unsigned int);
967 +- dst -= 1 * sizeof(unsigned int);
968 +- len += 0;
969 +- goto do3;
970 +- case 1:
971 +- /* a2 = ((unsigned int *) src)[0];
972 +- a3 = ((unsigned int *) src)[1]; */
973 +- ldw(s_space, 0, src, a2, cda_ldw_exc);
974 +- ldw(s_space, 4, src, a3, cda_ldw_exc);
975 +- src -=-2 * sizeof(unsigned int);
976 +- dst -= 0 * sizeof(unsigned int);
977 +- len -= 1;
978 +- if (len == 0)
979 +- goto do0;
980 +- goto do4; /* No-op. */
981 +- }
982 +-
983 +- do
984 +- {
985 +- /* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */
986 +-do4:
987 +- /* a0 = ((unsigned int *) src)[0]; */
988 +- ldw(s_space, 0, src, a0, cda_ldw_exc);
989 +- /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
990 +- stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
991 +-do3:
992 +- /* a1 = ((unsigned int *) src)[1]; */
993 +- ldw(s_space, 4, src, a1, cda_ldw_exc);
994 +- /* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */
995 +- stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc);
996 +-do2:
997 +- /* a2 = ((unsigned int *) src)[2]; */
998 +- ldw(s_space, 8, src, a2, cda_ldw_exc);
999 +- /* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */
1000 +- stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc);
1001 +-do1:
1002 +- /* a3 = ((unsigned int *) src)[3]; */
1003 +- ldw(s_space, 12, src, a3, cda_ldw_exc);
1004 +- /* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */
1005 +- stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc);
1006 +-
1007 +- src += 4 * sizeof(unsigned int);
1008 +- dst += 4 * sizeof(unsigned int);
1009 +- len -= 4;
1010 +- }
1011 +- while (len != 0);
1012 +-
1013 +-do0:
1014 +- /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
1015 +- stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
1016 +-
1017 +- preserve_branch(handle_load_error);
1018 +- preserve_branch(handle_store_error);
1019 +-
1020 +- return PA_MEMCPY_OK;
1021 +-
1022 +-handle_load_error:
1023 +- __asm__ __volatile__ ("cda_ldw_exc:\n");
1024 +- return PA_MEMCPY_LOAD_ERROR;
1025 +-
1026 +-handle_store_error:
1027 +- __asm__ __volatile__ ("cda_stw_exc:\n");
1028 +- return PA_MEMCPY_STORE_ERROR;
1029 +-}
1030 +-
1031 +-
1032 +-/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR.
1033 +- * In case of an access fault the faulty address can be read from the per_cpu
1034 +- * exception data struct. */
1035 +-static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp,
1036 +- unsigned long len)
1037 +-{
1038 +- register unsigned long src, dst, t1, t2, t3;
1039 +- register unsigned char *pcs, *pcd;
1040 +- register unsigned int *pws, *pwd;
1041 +- register double *pds, *pdd;
1042 +- unsigned long ret;
1043 +-
1044 +- src = (unsigned long)srcp;
1045 +- dst = (unsigned long)dstp;
1046 +- pcs = (unsigned char *)srcp;
1047 +- pcd = (unsigned char *)dstp;
1048 +-
1049 +- /* prefetch_src((const void *)srcp); */
1050 +-
1051 +- if (len < THRESHOLD)
1052 +- goto byte_copy;
1053 +-
1054 +- /* Check alignment */
1055 +- t1 = (src ^ dst);
1056 +- if (unlikely(t1 & (sizeof(double)-1)))
1057 +- goto unaligned_copy;
1058 +-
1059 +- /* src and dst have same alignment. */
1060 +-
1061 +- /* Copy bytes till we are double-aligned. */
1062 +- t2 = src & (sizeof(double) - 1);
1063 +- if (unlikely(t2 != 0)) {
1064 +- t2 = sizeof(double) - t2;
1065 +- while (t2 && len) {
1066 +- /* *pcd++ = *pcs++; */
1067 +- ldbma(s_space, pcs, t3, pmc_load_exc);
1068 +- len--;
1069 +- stbma(d_space, t3, pcd, pmc_store_exc);
1070 +- t2--;
1071 +- }
1072 +- }
1073 +-
1074 +- pds = (double *)pcs;
1075 +- pdd = (double *)pcd;
1076 +-
1077 +-#if 0
1078 +- /* Copy 8 doubles at a time */
1079 +- while (len >= 8*sizeof(double)) {
1080 +- register double r1, r2, r3, r4, r5, r6, r7, r8;
1081 +- /* prefetch_src((char *)pds + L1_CACHE_BYTES); */
1082 +- flddma(s_space, pds, r1, pmc_load_exc);
1083 +- flddma(s_space, pds, r2, pmc_load_exc);
1084 +- flddma(s_space, pds, r3, pmc_load_exc);
1085 +- flddma(s_space, pds, r4, pmc_load_exc);
1086 +- fstdma(d_space, r1, pdd, pmc_store_exc);
1087 +- fstdma(d_space, r2, pdd, pmc_store_exc);
1088 +- fstdma(d_space, r3, pdd, pmc_store_exc);
1089 +- fstdma(d_space, r4, pdd, pmc_store_exc);
1090 +-
1091 +-#if 0
1092 +- if (L1_CACHE_BYTES <= 32)
1093 +- prefetch_src((char *)pds + L1_CACHE_BYTES);
1094 +-#endif
1095 +- flddma(s_space, pds, r5, pmc_load_exc);
1096 +- flddma(s_space, pds, r6, pmc_load_exc);
1097 +- flddma(s_space, pds, r7, pmc_load_exc);
1098 +- flddma(s_space, pds, r8, pmc_load_exc);
1099 +- fstdma(d_space, r5, pdd, pmc_store_exc);
1100 +- fstdma(d_space, r6, pdd, pmc_store_exc);
1101 +- fstdma(d_space, r7, pdd, pmc_store_exc);
1102 +- fstdma(d_space, r8, pdd, pmc_store_exc);
1103 +- len -= 8*sizeof(double);
1104 +- }
1105 +-#endif
1106 +-
1107 +- pws = (unsigned int *)pds;
1108 +- pwd = (unsigned int *)pdd;
1109 +-
1110 +-word_copy:
1111 +- while (len >= 8*sizeof(unsigned int)) {
1112 +- register unsigned int r1,r2,r3,r4,r5,r6,r7,r8;
1113 +- /* prefetch_src((char *)pws + L1_CACHE_BYTES); */
1114 +- ldwma(s_space, pws, r1, pmc_load_exc);
1115 +- ldwma(s_space, pws, r2, pmc_load_exc);
1116 +- ldwma(s_space, pws, r3, pmc_load_exc);
1117 +- ldwma(s_space, pws, r4, pmc_load_exc);
1118 +- stwma(d_space, r1, pwd, pmc_store_exc);
1119 +- stwma(d_space, r2, pwd, pmc_store_exc);
1120 +- stwma(d_space, r3, pwd, pmc_store_exc);
1121 +- stwma(d_space, r4, pwd, pmc_store_exc);
1122 +-
1123 +- ldwma(s_space, pws, r5, pmc_load_exc);
1124 +- ldwma(s_space, pws, r6, pmc_load_exc);
1125 +- ldwma(s_space, pws, r7, pmc_load_exc);
1126 +- ldwma(s_space, pws, r8, pmc_load_exc);
1127 +- stwma(d_space, r5, pwd, pmc_store_exc);
1128 +- stwma(d_space, r6, pwd, pmc_store_exc);
1129 +- stwma(d_space, r7, pwd, pmc_store_exc);
1130 +- stwma(d_space, r8, pwd, pmc_store_exc);
1131 +- len -= 8*sizeof(unsigned int);
1132 +- }
1133 +-
1134 +- while (len >= 4*sizeof(unsigned int)) {
1135 +- register unsigned int r1,r2,r3,r4;
1136 +- ldwma(s_space, pws, r1, pmc_load_exc);
1137 +- ldwma(s_space, pws, r2, pmc_load_exc);
1138 +- ldwma(s_space, pws, r3, pmc_load_exc);
1139 +- ldwma(s_space, pws, r4, pmc_load_exc);
1140 +- stwma(d_space, r1, pwd, pmc_store_exc);
1141 +- stwma(d_space, r2, pwd, pmc_store_exc);
1142 +- stwma(d_space, r3, pwd, pmc_store_exc);
1143 +- stwma(d_space, r4, pwd, pmc_store_exc);
1144 +- len -= 4*sizeof(unsigned int);
1145 +- }
1146 +-
1147 +- pcs = (unsigned char *)pws;
1148 +- pcd = (unsigned char *)pwd;
1149 +-
1150 +-byte_copy:
1151 +- while (len) {
1152 +- /* *pcd++ = *pcs++; */
1153 +- ldbma(s_space, pcs, t3, pmc_load_exc);
1154 +- stbma(d_space, t3, pcd, pmc_store_exc);
1155 +- len--;
1156 +- }
1157 +-
1158 +- return PA_MEMCPY_OK;
1159 +-
1160 +-unaligned_copy:
1161 +- /* possibly we are aligned on a word, but not on a double... */
1162 +- if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) {
1163 +- t2 = src & (sizeof(unsigned int) - 1);
1164 +-
1165 +- if (unlikely(t2 != 0)) {
1166 +- t2 = sizeof(unsigned int) - t2;
1167 +- while (t2) {
1168 +- /* *pcd++ = *pcs++; */
1169 +- ldbma(s_space, pcs, t3, pmc_load_exc);
1170 +- stbma(d_space, t3, pcd, pmc_store_exc);
1171 +- len--;
1172 +- t2--;
1173 +- }
1174 +- }
1175 +-
1176 +- pws = (unsigned int *)pcs;
1177 +- pwd = (unsigned int *)pcd;
1178 +- goto word_copy;
1179 +- }
1180 +-
1181 +- /* Align the destination. */
1182 +- if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) {
1183 +- t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1));
1184 +- while (t2) {
1185 +- /* *pcd++ = *pcs++; */
1186 +- ldbma(s_space, pcs, t3, pmc_load_exc);
1187 +- stbma(d_space, t3, pcd, pmc_store_exc);
1188 +- len--;
1189 +- t2--;
1190 +- }
1191 +- dst = (unsigned long)pcd;
1192 +- src = (unsigned long)pcs;
1193 +- }
1194 +-
1195 +- ret = copy_dstaligned(dst, src, len / sizeof(unsigned int));
1196 +- if (ret)
1197 +- return ret;
1198 +-
1199 +- pcs += (len & -sizeof(unsigned int));
1200 +- pcd += (len & -sizeof(unsigned int));
1201 +- len %= sizeof(unsigned int);
1202 +-
1203 +- preserve_branch(handle_load_error);
1204 +- preserve_branch(handle_store_error);
1205 +-
1206 +- goto byte_copy;
1207 +-
1208 +-handle_load_error:
1209 +- __asm__ __volatile__ ("pmc_load_exc:\n");
1210 +- return PA_MEMCPY_LOAD_ERROR;
1211 +-
1212 +-handle_store_error:
1213 +- __asm__ __volatile__ ("pmc_store_exc:\n");
1214 +- return PA_MEMCPY_STORE_ERROR;
1215 +-}
1216 +-
1217 +-
1218 + /* Returns 0 for success, otherwise, returns number of bytes not transferred. */
1219 +-static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
1220 +-{
1221 +- unsigned long ret, fault_addr, reference;
1222 +- struct exception_data *d;
1223 +-
1224 +- ret = pa_memcpy_internal(dstp, srcp, len);
1225 +- if (likely(ret == PA_MEMCPY_OK))
1226 +- return 0;
1227 +-
1228 +- /* if a load or store fault occured we can get the faulty addr */
1229 +- d = this_cpu_ptr(&exception_data);
1230 +- fault_addr = d->fault_addr;
1231 +-
1232 +- /* error in load or store? */
1233 +- if (ret == PA_MEMCPY_LOAD_ERROR)
1234 +- reference = (unsigned long) srcp;
1235 +- else
1236 +- reference = (unsigned long) dstp;
1237 ++extern unsigned long pa_memcpy(void *dst, const void *src,
1238 ++ unsigned long len);
1239 +
1240 +- DPRINTF("pa_memcpy: fault type = %lu, len=%lu fault_addr=%lu ref=%lu\n",
1241 +- ret, len, fault_addr, reference);
1242 +-
1243 +- if (fault_addr >= reference)
1244 +- return len - (fault_addr - reference);
1245 +- else
1246 +- return len;
1247 +-}
1248 +-
1249 +-#ifdef __KERNEL__
1250 + unsigned long __copy_to_user(void __user *dst, const void *src,
1251 + unsigned long len)
1252 + {
1253 +@@ -537,5 +84,3 @@ long probe_kernel_read(void *dst, const void *src, size_t size)
1254 +
1255 + return __probe_kernel_read(dst, src, size);
1256 + }
1257 +-
1258 +-#endif
1259 +diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
1260 +index 1a0b4f63f0e9..040c48fc5391 100644
1261 +--- a/arch/parisc/mm/fault.c
1262 ++++ b/arch/parisc/mm/fault.c
1263 +@@ -149,6 +149,23 @@ int fixup_exception(struct pt_regs *regs)
1264 + d->fault_space = regs->isr;
1265 + d->fault_addr = regs->ior;
1266 +
1267 ++ /*
1268 ++ * Fix up get_user() and put_user().
1269 ++ * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant
1270 ++ * bit in the relative address of the fixup routine to indicate
1271 ++ * that %r8 should be loaded with -EFAULT to report a userspace
1272 ++ * access error.
1273 ++ */
1274 ++ if (fix->fixup & 1) {
1275 ++ regs->gr[8] = -EFAULT;
1276 ++
1277 ++ /* zero target register for get_user() */
1278 ++ if (parisc_acctyp(0, regs->iir) == VM_READ) {
1279 ++ int treg = regs->iir & 0x1f;
1280 ++ regs->gr[treg] = 0;
1281 ++ }
1282 ++ }
1283 ++
1284 + regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup;
1285 + regs->iaoq[0] &= ~3;
1286 + /*
1287 +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
1288 +index c989e67dcc9d..9764463ce833 100644
1289 +--- a/arch/x86/kvm/vmx.c
1290 ++++ b/arch/x86/kvm/vmx.c
1291 +@@ -10027,7 +10027,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
1292 + {
1293 + struct vcpu_vmx *vmx = to_vmx(vcpu);
1294 + u32 exec_control;
1295 +- bool nested_ept_enabled = false;
1296 +
1297 + vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
1298 + vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
1299 +@@ -10192,7 +10191,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
1300 + vmcs12->guest_intr_status);
1301 + }
1302 +
1303 +- nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0;
1304 + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
1305 + }
1306 +
1307 +@@ -10344,7 +10342,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
1308 + vmx_set_efer(vcpu, vcpu->arch.efer);
1309 +
1310 + /* Shadow page tables on either EPT or shadow page tables. */
1311 +- if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled,
1312 ++ if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
1313 + entry_failure_code))
1314 + return 1;
1315 +
1316 +diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
1317 +index 779782f58324..9a53a06e5a3e 100644
1318 +--- a/arch/x86/lib/memcpy_64.S
1319 ++++ b/arch/x86/lib/memcpy_64.S
1320 +@@ -290,7 +290,7 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
1321 + _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
1322 + _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
1323 + _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
1324 +- _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
1325 ++ _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
1326 + _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
1327 + _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
1328 + _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
1329 +diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
1330 +index 887e57182716..aed206475aa7 100644
1331 +--- a/arch/x86/mm/kaslr.c
1332 ++++ b/arch/x86/mm/kaslr.c
1333 +@@ -48,7 +48,7 @@ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
1334 + #if defined(CONFIG_X86_ESPFIX64)
1335 + static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
1336 + #elif defined(CONFIG_EFI)
1337 +-static const unsigned long vaddr_end = EFI_VA_START;
1338 ++static const unsigned long vaddr_end = EFI_VA_END;
1339 + #else
1340 + static const unsigned long vaddr_end = __START_KERNEL_map;
1341 + #endif
1342 +@@ -105,7 +105,7 @@ void __init kernel_randomize_memory(void)
1343 + */
1344 + BUILD_BUG_ON(vaddr_start >= vaddr_end);
1345 + BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
1346 +- vaddr_end >= EFI_VA_START);
1347 ++ vaddr_end >= EFI_VA_END);
1348 + BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
1349 + IS_ENABLED(CONFIG_EFI)) &&
1350 + vaddr_end >= __START_KERNEL_map);
1351 +diff --git a/block/bio.c b/block/bio.c
1352 +index 2b375020fc49..17ece5b40a2f 100644
1353 +--- a/block/bio.c
1354 ++++ b/block/bio.c
1355 +@@ -376,10 +376,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
1356 + bio_list_init(&punt);
1357 + bio_list_init(&nopunt);
1358 +
1359 +- while ((bio = bio_list_pop(current->bio_list)))
1360 ++ while ((bio = bio_list_pop(&current->bio_list[0])))
1361 + bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
1362 ++ current->bio_list[0] = nopunt;
1363 +
1364 +- *current->bio_list = nopunt;
1365 ++ bio_list_init(&nopunt);
1366 ++ while ((bio = bio_list_pop(&current->bio_list[1])))
1367 ++ bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
1368 ++ current->bio_list[1] = nopunt;
1369 +
1370 + spin_lock(&bs->rescue_lock);
1371 + bio_list_merge(&bs->rescue_list, &punt);
1372 +@@ -466,7 +470,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
1373 + * we retry with the original gfp_flags.
1374 + */
1375 +
1376 +- if (current->bio_list && !bio_list_empty(current->bio_list))
1377 ++ if (current->bio_list &&
1378 ++ (!bio_list_empty(&current->bio_list[0]) ||
1379 ++ !bio_list_empty(&current->bio_list[1])))
1380 + gfp_mask &= ~__GFP_DIRECT_RECLAIM;
1381 +
1382 + p = mempool_alloc(bs->bio_pool, gfp_mask);
1383 +diff --git a/block/blk-core.c b/block/blk-core.c
1384 +index 61ba08c58b64..9734b5d0d932 100644
1385 +--- a/block/blk-core.c
1386 ++++ b/block/blk-core.c
1387 +@@ -1977,7 +1977,14 @@ generic_make_request_checks(struct bio *bio)
1388 + */
1389 + blk_qc_t generic_make_request(struct bio *bio)
1390 + {
1391 +- struct bio_list bio_list_on_stack;
1392 ++ /*
1393 ++ * bio_list_on_stack[0] contains bios submitted by the current
1394 ++ * make_request_fn.
1395 ++ * bio_list_on_stack[1] contains bios that were submitted before
1396 ++ * the current make_request_fn, but that haven't been processed
1397 ++ * yet.
1398 ++ */
1399 ++ struct bio_list bio_list_on_stack[2];
1400 + blk_qc_t ret = BLK_QC_T_NONE;
1401 +
1402 + if (!generic_make_request_checks(bio))
1403 +@@ -1994,7 +2001,7 @@ blk_qc_t generic_make_request(struct bio *bio)
1404 + * should be added at the tail
1405 + */
1406 + if (current->bio_list) {
1407 +- bio_list_add(current->bio_list, bio);
1408 ++ bio_list_add(&current->bio_list[0], bio);
1409 + goto out;
1410 + }
1411 +
1412 +@@ -2013,23 +2020,39 @@ blk_qc_t generic_make_request(struct bio *bio)
1413 + * bio_list, and call into ->make_request() again.
1414 + */
1415 + BUG_ON(bio->bi_next);
1416 +- bio_list_init(&bio_list_on_stack);
1417 +- current->bio_list = &bio_list_on_stack;
1418 ++ bio_list_init(&bio_list_on_stack[0]);
1419 ++ current->bio_list = bio_list_on_stack;
1420 + do {
1421 + struct request_queue *q = bdev_get_queue(bio->bi_bdev);
1422 +
1423 + if (likely(blk_queue_enter(q, false) == 0)) {
1424 ++ struct bio_list lower, same;
1425 ++
1426 ++ /* Create a fresh bio_list for all subordinate requests */
1427 ++ bio_list_on_stack[1] = bio_list_on_stack[0];
1428 ++ bio_list_init(&bio_list_on_stack[0]);
1429 + ret = q->make_request_fn(q, bio);
1430 +
1431 + blk_queue_exit(q);
1432 +
1433 +- bio = bio_list_pop(current->bio_list);
1434 ++ /* sort new bios into those for a lower level
1435 ++ * and those for the same level
1436 ++ */
1437 ++ bio_list_init(&lower);
1438 ++ bio_list_init(&same);
1439 ++ while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
1440 ++ if (q == bdev_get_queue(bio->bi_bdev))
1441 ++ bio_list_add(&same, bio);
1442 ++ else
1443 ++ bio_list_add(&lower, bio);
1444 ++ /* now assemble so we handle the lowest level first */
1445 ++ bio_list_merge(&bio_list_on_stack[0], &lower);
1446 ++ bio_list_merge(&bio_list_on_stack[0], &same);
1447 ++ bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
1448 + } else {
1449 +- struct bio *bio_next = bio_list_pop(current->bio_list);
1450 +-
1451 + bio_io_error(bio);
1452 +- bio = bio_next;
1453 + }
1454 ++ bio = bio_list_pop(&bio_list_on_stack[0]);
1455 + } while (bio);
1456 + current->bio_list = NULL; /* deactivate */
1457 +
1458 +diff --git a/crypto/lrw.c b/crypto/lrw.c
1459 +index ecd8474018e3..3ea095adafd9 100644
1460 +--- a/crypto/lrw.c
1461 ++++ b/crypto/lrw.c
1462 +@@ -286,8 +286,11 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
1463 +
1464 + subreq->cryptlen = LRW_BUFFER_SIZE;
1465 + if (req->cryptlen > LRW_BUFFER_SIZE) {
1466 +- subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE);
1467 +- rctx->ext = kmalloc(subreq->cryptlen, gfp);
1468 ++ unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE);
1469 ++
1470 ++ rctx->ext = kmalloc(n, gfp);
1471 ++ if (rctx->ext)
1472 ++ subreq->cryptlen = n;
1473 + }
1474 +
1475 + rctx->src = req->src;
1476 +diff --git a/crypto/xts.c b/crypto/xts.c
1477 +index baeb34dd8582..c976bfac29da 100644
1478 +--- a/crypto/xts.c
1479 ++++ b/crypto/xts.c
1480 +@@ -230,8 +230,11 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
1481 +
1482 + subreq->cryptlen = XTS_BUFFER_SIZE;
1483 + if (req->cryptlen > XTS_BUFFER_SIZE) {
1484 +- subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE);
1485 +- rctx->ext = kmalloc(subreq->cryptlen, gfp);
1486 ++ unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE);
1487 ++
1488 ++ rctx->ext = kmalloc(n, gfp);
1489 ++ if (rctx->ext)
1490 ++ subreq->cryptlen = n;
1491 + }
1492 +
1493 + rctx->src = req->src;
1494 +diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
1495 +index 9ed087853dee..4c5678cfa9c4 100644
1496 +--- a/drivers/acpi/Makefile
1497 ++++ b/drivers/acpi/Makefile
1498 +@@ -2,7 +2,6 @@
1499 + # Makefile for the Linux ACPI interpreter
1500 + #
1501 +
1502 +-ccflags-y := -Os
1503 + ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT
1504 +
1505 + #
1506 +diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
1507 +index b4c1a6a51da4..03250e1f1103 100644
1508 +--- a/drivers/acpi/acpi_platform.c
1509 ++++ b/drivers/acpi/acpi_platform.c
1510 +@@ -25,9 +25,11 @@
1511 + ACPI_MODULE_NAME("platform");
1512 +
1513 + static const struct acpi_device_id forbidden_id_list[] = {
1514 +- {"PNP0000", 0}, /* PIC */
1515 +- {"PNP0100", 0}, /* Timer */
1516 +- {"PNP0200", 0}, /* AT DMA Controller */
1517 ++ {"PNP0000", 0}, /* PIC */
1518 ++ {"PNP0100", 0}, /* Timer */
1519 ++ {"PNP0200", 0}, /* AT DMA Controller */
1520 ++ {"ACPI0009", 0}, /* IOxAPIC */
1521 ++ {"ACPI000A", 0}, /* IOAPIC */
1522 + {"", 0},
1523 + };
1524 +
1525 +diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
1526 +index 612898b4aaad..3422f203455d 100644
1527 +--- a/drivers/crypto/ccp/ccp-dev-v5.c
1528 ++++ b/drivers/crypto/ccp/ccp-dev-v5.c
1529 +@@ -1014,6 +1014,7 @@ const struct ccp_vdata ccpv5a = {
1530 +
1531 + const struct ccp_vdata ccpv5b = {
1532 + .version = CCP_VERSION(5, 0),
1533 ++ .dma_chan_attr = DMA_PRIVATE,
1534 + .setup = ccp5other_config,
1535 + .perform = &ccp5_actions,
1536 + .bar = 2,
1537 +diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
1538 +index 649e5610a5ce..cd9a7051da3c 100644
1539 +--- a/drivers/crypto/ccp/ccp-dev.h
1540 ++++ b/drivers/crypto/ccp/ccp-dev.h
1541 +@@ -179,6 +179,10 @@
1542 +
1543 + /* ------------------------ General CCP Defines ------------------------ */
1544 +
1545 ++#define CCP_DMA_DFLT 0x0
1546 ++#define CCP_DMA_PRIV 0x1
1547 ++#define CCP_DMA_PUB 0x2
1548 ++
1549 + #define CCP_DMAPOOL_MAX_SIZE 64
1550 + #define CCP_DMAPOOL_ALIGN BIT(5)
1551 +
1552 +@@ -635,6 +639,7 @@ struct ccp_actions {
1553 + /* Structure to hold CCP version-specific values */
1554 + struct ccp_vdata {
1555 + const unsigned int version;
1556 ++ const unsigned int dma_chan_attr;
1557 + void (*setup)(struct ccp_device *);
1558 + const struct ccp_actions *perform;
1559 + const unsigned int bar;
1560 +diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
1561 +index 8d0eeb46d4a2..e00be01fbf5a 100644
1562 +--- a/drivers/crypto/ccp/ccp-dmaengine.c
1563 ++++ b/drivers/crypto/ccp/ccp-dmaengine.c
1564 +@@ -10,6 +10,7 @@
1565 + * published by the Free Software Foundation.
1566 + */
1567 +
1568 ++#include <linux/module.h>
1569 + #include <linux/kernel.h>
1570 + #include <linux/dmaengine.h>
1571 + #include <linux/spinlock.h>
1572 +@@ -25,6 +26,37 @@
1573 + (mask == 0) ? 64 : fls64(mask); \
1574 + })
1575 +
1576 ++/* The CCP as a DMA provider can be configured for public or private
1577 ++ * channels. Default is specified in the vdata for the device (PCI ID).
1578 ++ * This module parameter will override for all channels on all devices:
1579 ++ * dma_chan_attr = 0x2 to force all channels public
1580 ++ * = 0x1 to force all channels private
1581 ++ * = 0x0 to defer to the vdata setting
1582 ++ * = any other value: warning, revert to 0x0
1583 ++ */
1584 ++static unsigned int dma_chan_attr = CCP_DMA_DFLT;
1585 ++module_param(dma_chan_attr, uint, 0444);
1586 ++MODULE_PARM_DESC(dma_chan_attr, "Set DMA channel visibility: 0 (default) = device defaults, 1 = make private, 2 = make public");
1587 ++
1588 ++unsigned int ccp_get_dma_chan_attr(struct ccp_device *ccp)
1589 ++{
1590 ++ switch (dma_chan_attr) {
1591 ++ case CCP_DMA_DFLT:
1592 ++ return ccp->vdata->dma_chan_attr;
1593 ++
1594 ++ case CCP_DMA_PRIV:
1595 ++ return DMA_PRIVATE;
1596 ++
1597 ++ case CCP_DMA_PUB:
1598 ++ return 0;
1599 ++
1600 ++ default:
1601 ++ dev_info_once(ccp->dev, "Invalid value for dma_chan_attr: %d\n",
1602 ++ dma_chan_attr);
1603 ++ return ccp->vdata->dma_chan_attr;
1604 ++ }
1605 ++}
1606 ++
1607 + static void ccp_free_cmd_resources(struct ccp_device *ccp,
1608 + struct list_head *list)
1609 + {
1610 +@@ -675,6 +707,15 @@ int ccp_dmaengine_register(struct ccp_device *ccp)
1611 + dma_cap_set(DMA_SG, dma_dev->cap_mask);
1612 + dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
1613 +
1614 ++ /* The DMA channels for this device can be set to public or private,
1615 ++ * and overridden by the module parameter dma_chan_attr.
1616 ++ * Default: according to the value in vdata (dma_chan_attr=0)
1617 ++ * dma_chan_attr=0x1: all channels private (override vdata)
1618 ++ * dma_chan_attr=0x2: all channels public (override vdata)
1619 ++ */
1620 ++ if (ccp_get_dma_chan_attr(ccp) == DMA_PRIVATE)
1621 ++ dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
1622 ++
1623 + INIT_LIST_HEAD(&dma_dev->channels);
1624 + for (i = 0; i < ccp->cmd_q_count; i++) {
1625 + chan = ccp->ccp_dma_chan + i;
1626 +diff --git a/drivers/gpu/drm/armada/Makefile b/drivers/gpu/drm/armada/Makefile
1627 +index a18f156c8b66..64c0b4546fb2 100644
1628 +--- a/drivers/gpu/drm/armada/Makefile
1629 ++++ b/drivers/gpu/drm/armada/Makefile
1630 +@@ -4,3 +4,5 @@ armada-y += armada_510.o
1631 + armada-$(CONFIG_DEBUG_FS) += armada_debugfs.o
1632 +
1633 + obj-$(CONFIG_DRM_ARMADA) := armada.o
1634 ++
1635 ++CFLAGS_armada_trace.o := -I$(src)
1636 +diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
1637 +index 0a67124bb2a4..db0a43a090d0 100644
1638 +--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
1639 ++++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
1640 +@@ -1303,6 +1303,8 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
1641 + goto out_pm_put;
1642 + }
1643 +
1644 ++ mutex_lock(&gpu->lock);
1645 ++
1646 + fence = etnaviv_gpu_fence_alloc(gpu);
1647 + if (!fence) {
1648 + event_free(gpu, event);
1649 +@@ -1310,8 +1312,6 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
1650 + goto out_pm_put;
1651 + }
1652 +
1653 +- mutex_lock(&gpu->lock);
1654 +-
1655 + gpu->event[event].fence = fence;
1656 + submit->fence = fence->seqno;
1657 + gpu->active_fence = submit->fence;
1658 +diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
1659 +index 3f656e3a6e5a..325cb9b55989 100644
1660 +--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
1661 ++++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
1662 +@@ -1334,6 +1334,7 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
1663 + vgpu->handle = (unsigned long)info;
1664 + info->vgpu = vgpu;
1665 + info->kvm = kvm;
1666 ++ kvm_get_kvm(info->kvm);
1667 +
1668 + kvmgt_protect_table_init(info);
1669 + gvt_cache_init(vgpu);
1670 +@@ -1353,6 +1354,7 @@ static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
1671 + }
1672 +
1673 + kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
1674 ++ kvm_put_kvm(info->kvm);
1675 + kvmgt_protect_table_destroy(info);
1676 + gvt_cache_destroy(info->vgpu);
1677 + vfree(info);
1678 +diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
1679 +index b4bde1452f2a..6924a8e79da9 100644
1680 +--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
1681 ++++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
1682 +@@ -735,10 +735,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
1683 + GEM_BUG_ON(pte_end > GEN8_PTES);
1684 +
1685 + bitmap_clear(pt->used_ptes, pte, num_entries);
1686 +-
1687 +- if (bitmap_empty(pt->used_ptes, GEN8_PTES)) {
1688 +- free_pt(to_i915(vm->dev), pt);
1689 +- return true;
1690 ++ if (USES_FULL_PPGTT(vm->i915)) {
1691 ++ if (bitmap_empty(pt->used_ptes, GEN8_PTES))
1692 ++ return true;
1693 + }
1694 +
1695 + pt_vaddr = kmap_px(pt);
1696 +@@ -775,13 +774,12 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
1697 + pde_vaddr = kmap_px(pd);
1698 + pde_vaddr[pde] = scratch_pde;
1699 + kunmap_px(ppgtt, pde_vaddr);
1700 ++ free_pt(to_i915(vm->dev), pt);
1701 + }
1702 + }
1703 +
1704 +- if (bitmap_empty(pd->used_pdes, I915_PDES)) {
1705 +- free_pd(to_i915(vm->dev), pd);
1706 ++ if (bitmap_empty(pd->used_pdes, I915_PDES))
1707 + return true;
1708 +- }
1709 +
1710 + return false;
1711 + }
1712 +@@ -795,7 +793,6 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
1713 + uint64_t length)
1714 + {
1715 + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1716 +- struct drm_i915_private *dev_priv = to_i915(vm->dev);
1717 + struct i915_page_directory *pd;
1718 + uint64_t pdpe;
1719 + gen8_ppgtt_pdpe_t *pdpe_vaddr;
1720 +@@ -813,16 +810,14 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
1721 + pdpe_vaddr[pdpe] = scratch_pdpe;
1722 + kunmap_px(ppgtt, pdpe_vaddr);
1723 + }
1724 ++ free_pd(to_i915(vm->dev), pd);
1725 + }
1726 + }
1727 +
1728 + mark_tlbs_dirty(ppgtt);
1729 +
1730 +- if (USES_FULL_48BIT_PPGTT(dev_priv) &&
1731 +- bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv))) {
1732 +- free_pdp(dev_priv, pdp);
1733 ++ if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)))
1734 + return true;
1735 +- }
1736 +
1737 + return false;
1738 + }
1739 +@@ -836,6 +831,7 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
1740 + uint64_t start,
1741 + uint64_t length)
1742 + {
1743 ++ struct drm_i915_private *dev_priv = to_i915(vm->dev);
1744 + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1745 + struct i915_page_directory_pointer *pdp;
1746 + uint64_t pml4e;
1747 +@@ -854,6 +850,7 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
1748 + pml4e_vaddr = kmap_px(pml4);
1749 + pml4e_vaddr[pml4e] = scratch_pml4e;
1750 + kunmap_px(ppgtt, pml4e_vaddr);
1751 ++ free_pdp(dev_priv, pdp);
1752 + }
1753 + }
1754 + }
1755 +diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
1756 +index beabc17e7c8a..2af4522d60e6 100644
1757 +--- a/drivers/gpu/drm/i915/intel_lrc.c
1758 ++++ b/drivers/gpu/drm/i915/intel_lrc.c
1759 +@@ -362,7 +362,8 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
1760 + static u64 execlists_update_context(struct drm_i915_gem_request *rq)
1761 + {
1762 + struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
1763 +- struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
1764 ++ struct i915_hw_ppgtt *ppgtt =
1765 ++ rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
1766 + u32 *reg_state = ce->lrc_reg_state;
1767 +
1768 + reg_state[CTX_RING_TAIL+1] = rq->tail;
1769 +diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
1770 +index 0cf03ccbf0a7..445a907552c1 100644
1771 +--- a/drivers/gpu/drm/radeon/radeon_ttm.c
1772 ++++ b/drivers/gpu/drm/radeon/radeon_ttm.c
1773 +@@ -213,8 +213,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
1774 + rbo->placement.num_busy_placement = 0;
1775 + for (i = 0; i < rbo->placement.num_placement; i++) {
1776 + if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
1777 +- if (rbo->placements[0].fpfn < fpfn)
1778 +- rbo->placements[0].fpfn = fpfn;
1779 ++ if (rbo->placements[i].fpfn < fpfn)
1780 ++ rbo->placements[i].fpfn = fpfn;
1781 + } else {
1782 + rbo->placement.busy_placement =
1783 + &rbo->placements[i];
1784 +diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
1785 +index 7aadce1f7e7a..c7e6c9839c9a 100644
1786 +--- a/drivers/gpu/drm/vc4/vc4_crtc.c
1787 ++++ b/drivers/gpu/drm/vc4/vc4_crtc.c
1788 +@@ -842,6 +842,17 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc,
1789 + drm_atomic_helper_crtc_destroy_state(crtc, state);
1790 + }
1791 +
1792 ++static void
1793 ++vc4_crtc_reset(struct drm_crtc *crtc)
1794 ++{
1795 ++ if (crtc->state)
1796 ++ __drm_atomic_helper_crtc_destroy_state(crtc->state);
1797 ++
1798 ++ crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL);
1799 ++ if (crtc->state)
1800 ++ crtc->state->crtc = crtc;
1801 ++}
1802 ++
1803 + static const struct drm_crtc_funcs vc4_crtc_funcs = {
1804 + .set_config = drm_atomic_helper_set_config,
1805 + .destroy = vc4_crtc_destroy,
1806 +@@ -849,7 +860,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = {
1807 + .set_property = NULL,
1808 + .cursor_set = NULL, /* handled by drm_mode_cursor_universal */
1809 + .cursor_move = NULL, /* handled by drm_mode_cursor_universal */
1810 +- .reset = drm_atomic_helper_crtc_reset,
1811 ++ .reset = vc4_crtc_reset,
1812 + .atomic_duplicate_state = vc4_crtc_duplicate_state,
1813 + .atomic_destroy_state = vc4_crtc_destroy_state,
1814 + .gamma_set = vc4_crtc_gamma_set,
1815 +diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
1816 +index 8aeca038cc73..5f282bb0ea10 100644
1817 +--- a/drivers/hid/wacom_sys.c
1818 ++++ b/drivers/hid/wacom_sys.c
1819 +@@ -2081,6 +2081,14 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
1820 +
1821 + wacom_update_name(wacom, wireless ? " (WL)" : "");
1822 +
1823 ++ /* pen only Bamboo neither support touch nor pad */
1824 ++ if ((features->type == BAMBOO_PEN) &&
1825 ++ ((features->device_type & WACOM_DEVICETYPE_TOUCH) ||
1826 ++ (features->device_type & WACOM_DEVICETYPE_PAD))) {
1827 ++ error = -ENODEV;
1828 ++ goto fail;
1829 ++ }
1830 ++
1831 + error = wacom_add_shared_data(hdev);
1832 + if (error)
1833 + goto fail;
1834 +@@ -2128,14 +2136,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
1835 + goto fail_quirks;
1836 + }
1837 +
1838 +- /* pen only Bamboo neither support touch nor pad */
1839 +- if ((features->type == BAMBOO_PEN) &&
1840 +- ((features->device_type & WACOM_DEVICETYPE_TOUCH) ||
1841 +- (features->device_type & WACOM_DEVICETYPE_PAD))) {
1842 +- error = -ENODEV;
1843 +- goto fail_quirks;
1844 +- }
1845 +-
1846 + if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR)
1847 + error = hid_hw_open(hdev);
1848 +
1849 +diff --git a/drivers/md/dm.c b/drivers/md/dm.c
1850 +index 0ff5469c03d2..b78bc2916664 100644
1851 +--- a/drivers/md/dm.c
1852 ++++ b/drivers/md/dm.c
1853 +@@ -986,26 +986,29 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
1854 + struct dm_offload *o = container_of(cb, struct dm_offload, cb);
1855 + struct bio_list list;
1856 + struct bio *bio;
1857 ++ int i;
1858 +
1859 + INIT_LIST_HEAD(&o->cb.list);
1860 +
1861 + if (unlikely(!current->bio_list))
1862 + return;
1863 +
1864 +- list = *current->bio_list;
1865 +- bio_list_init(current->bio_list);
1866 +-
1867 +- while ((bio = bio_list_pop(&list))) {
1868 +- struct bio_set *bs = bio->bi_pool;
1869 +- if (unlikely(!bs) || bs == fs_bio_set) {
1870 +- bio_list_add(current->bio_list, bio);
1871 +- continue;
1872 ++ for (i = 0; i < 2; i++) {
1873 ++ list = current->bio_list[i];
1874 ++ bio_list_init(&current->bio_list[i]);
1875 ++
1876 ++ while ((bio = bio_list_pop(&list))) {
1877 ++ struct bio_set *bs = bio->bi_pool;
1878 ++ if (unlikely(!bs) || bs == fs_bio_set) {
1879 ++ bio_list_add(&current->bio_list[i], bio);
1880 ++ continue;
1881 ++ }
1882 ++
1883 ++ spin_lock(&bs->rescue_lock);
1884 ++ bio_list_add(&bs->rescue_list, bio);
1885 ++ queue_work(bs->rescue_workqueue, &bs->rescue_work);
1886 ++ spin_unlock(&bs->rescue_lock);
1887 + }
1888 +-
1889 +- spin_lock(&bs->rescue_lock);
1890 +- bio_list_add(&bs->rescue_list, bio);
1891 +- queue_work(bs->rescue_workqueue, &bs->rescue_work);
1892 +- spin_unlock(&bs->rescue_lock);
1893 + }
1894 + }
1895 +
1896 +diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
1897 +index 87f14080c2cd..41693890e2b8 100644
1898 +--- a/drivers/md/raid10.c
1899 ++++ b/drivers/md/raid10.c
1900 +@@ -974,7 +974,8 @@ static void wait_barrier(struct r10conf *conf)
1901 + !conf->barrier ||
1902 + (atomic_read(&conf->nr_pending) &&
1903 + current->bio_list &&
1904 +- !bio_list_empty(current->bio_list)),
1905 ++ (!bio_list_empty(&current->bio_list[0]) ||
1906 ++ !bio_list_empty(&current->bio_list[1]))),
1907 + conf->resync_lock);
1908 + conf->nr_waiting--;
1909 + if (!conf->nr_waiting)
1910 +diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
1911 +index 7fd964256faa..d5430ed02a67 100644
1912 +--- a/drivers/mmc/host/sdhci-of-at91.c
1913 ++++ b/drivers/mmc/host/sdhci-of-at91.c
1914 +@@ -29,6 +29,8 @@
1915 +
1916 + #include "sdhci-pltfm.h"
1917 +
1918 ++#define SDMMC_MC1R 0x204
1919 ++#define SDMMC_MC1R_DDR BIT(3)
1920 + #define SDMMC_CACR 0x230
1921 + #define SDMMC_CACR_CAPWREN BIT(0)
1922 + #define SDMMC_CACR_KEY (0x46 << 8)
1923 +@@ -103,11 +105,18 @@ static void sdhci_at91_set_power(struct sdhci_host *host, unsigned char mode,
1924 + sdhci_set_power_noreg(host, mode, vdd);
1925 + }
1926 +
1927 ++void sdhci_at91_set_uhs_signaling(struct sdhci_host *host, unsigned int timing)
1928 ++{
1929 ++ if (timing == MMC_TIMING_MMC_DDR52)
1930 ++ sdhci_writeb(host, SDMMC_MC1R_DDR, SDMMC_MC1R);
1931 ++ sdhci_set_uhs_signaling(host, timing);
1932 ++}
1933 ++
1934 + static const struct sdhci_ops sdhci_at91_sama5d2_ops = {
1935 + .set_clock = sdhci_at91_set_clock,
1936 + .set_bus_width = sdhci_set_bus_width,
1937 + .reset = sdhci_reset,
1938 +- .set_uhs_signaling = sdhci_set_uhs_signaling,
1939 ++ .set_uhs_signaling = sdhci_at91_set_uhs_signaling,
1940 + .set_power = sdhci_at91_set_power,
1941 + };
1942 +
1943 +diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
1944 +index d0819d18ad08..d2a4adc50a84 100644
1945 +--- a/drivers/mmc/host/sdhci.c
1946 ++++ b/drivers/mmc/host/sdhci.c
1947 +@@ -1830,6 +1830,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
1948 + struct sdhci_host *host = mmc_priv(mmc);
1949 + unsigned long flags;
1950 +
1951 ++ if (enable)
1952 ++ pm_runtime_get_noresume(host->mmc->parent);
1953 ++
1954 + spin_lock_irqsave(&host->lock, flags);
1955 + if (enable)
1956 + host->flags |= SDHCI_SDIO_IRQ_ENABLED;
1957 +@@ -1838,6 +1841,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
1958 +
1959 + sdhci_enable_sdio_irq_nolock(host, enable);
1960 + spin_unlock_irqrestore(&host->lock, flags);
1961 ++
1962 ++ if (!enable)
1963 ++ pm_runtime_put_noidle(host->mmc->parent);
1964 + }
1965 +
1966 + static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
1967 +diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
1968 +index 8a3c3e32a704..3818ff609d55 100644
1969 +--- a/drivers/nvme/host/core.c
1970 ++++ b/drivers/nvme/host/core.c
1971 +@@ -2034,9 +2034,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
1972 + * Revalidating a dead namespace sets capacity to 0. This will
1973 + * end buffered writers dirtying pages that can't be synced.
1974 + */
1975 +- if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags))
1976 +- revalidate_disk(ns->disk);
1977 +-
1978 ++ if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
1979 ++ continue;
1980 ++ revalidate_disk(ns->disk);
1981 + blk_set_queue_dying(ns->queue);
1982 + blk_mq_abort_requeue_list(ns->queue);
1983 + blk_mq_start_stopped_hw_queues(ns->queue, true);
1984 +diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
1985 +index 3faefabf339c..410c3d15b0cb 100644
1986 +--- a/drivers/nvme/host/pci.c
1987 ++++ b/drivers/nvme/host/pci.c
1988 +@@ -1990,8 +1990,10 @@ static void nvme_remove(struct pci_dev *pdev)
1989 +
1990 + pci_set_drvdata(pdev, NULL);
1991 +
1992 +- if (!pci_device_is_present(pdev))
1993 ++ if (!pci_device_is_present(pdev)) {
1994 + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
1995 ++ nvme_dev_disable(dev, false);
1996 ++ }
1997 +
1998 + flush_work(&dev->reset_work);
1999 + nvme_uninit_ctrl(&dev->ctrl);
2000 +diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c
2001 +index af722eb0ca75..e354010fb006 100644
2002 +--- a/drivers/pci/host/pci-thunder-pem.c
2003 ++++ b/drivers/pci/host/pci-thunder-pem.c
2004 +@@ -331,7 +331,7 @@ static int thunder_pem_acpi_init(struct pci_config_window *cfg)
2005 + if (!res_pem)
2006 + return -ENOMEM;
2007 +
2008 +- ret = acpi_get_rc_resources(dev, "THRX0002", root->segment, res_pem);
2009 ++ ret = acpi_get_rc_resources(dev, "CAVA02B", root->segment, res_pem);
2010 + if (ret) {
2011 + dev_err(dev, "can't get rc base address\n");
2012 + return ret;
2013 +diff --git a/drivers/pci/host/pcie-iproc-bcma.c b/drivers/pci/host/pcie-iproc-bcma.c
2014 +index bd4c9ec25edc..384c27e664fe 100644
2015 +--- a/drivers/pci/host/pcie-iproc-bcma.c
2016 ++++ b/drivers/pci/host/pcie-iproc-bcma.c
2017 +@@ -44,8 +44,7 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev)
2018 + {
2019 + struct device *dev = &bdev->dev;
2020 + struct iproc_pcie *pcie;
2021 +- LIST_HEAD(res);
2022 +- struct resource res_mem;
2023 ++ LIST_HEAD(resources);
2024 + int ret;
2025 +
2026 + pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
2027 +@@ -63,22 +62,23 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev)
2028 +
2029 + pcie->base_addr = bdev->addr;
2030 +
2031 +- res_mem.start = bdev->addr_s[0];
2032 +- res_mem.end = bdev->addr_s[0] + SZ_128M - 1;
2033 +- res_mem.name = "PCIe MEM space";
2034 +- res_mem.flags = IORESOURCE_MEM;
2035 +- pci_add_resource(&res, &res_mem);
2036 ++ pcie->mem.start = bdev->addr_s[0];
2037 ++ pcie->mem.end = bdev->addr_s[0] + SZ_128M - 1;
2038 ++ pcie->mem.name = "PCIe MEM space";
2039 ++ pcie->mem.flags = IORESOURCE_MEM;
2040 ++ pci_add_resource(&resources, &pcie->mem);
2041 +
2042 + pcie->map_irq = iproc_pcie_bcma_map_irq;
2043 +
2044 +- ret = iproc_pcie_setup(pcie, &res);
2045 +- if (ret)
2046 ++ ret = iproc_pcie_setup(pcie, &resources);
2047 ++ if (ret) {
2048 + dev_err(dev, "PCIe controller setup failed\n");
2049 +-
2050 +- pci_free_resource_list(&res);
2051 ++ pci_free_resource_list(&resources);
2052 ++ return ret;
2053 ++ }
2054 +
2055 + bcma_set_drvdata(bdev, pcie);
2056 +- return ret;
2057 ++ return 0;
2058 + }
2059 +
2060 + static void iproc_pcie_bcma_remove(struct bcma_device *bdev)
2061 +diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c
2062 +index 22d814a78a78..f95564ac37df 100644
2063 +--- a/drivers/pci/host/pcie-iproc-platform.c
2064 ++++ b/drivers/pci/host/pcie-iproc-platform.c
2065 +@@ -52,7 +52,7 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
2066 + struct device_node *np = dev->of_node;
2067 + struct resource reg;
2068 + resource_size_t iobase = 0;
2069 +- LIST_HEAD(res);
2070 ++ LIST_HEAD(resources);
2071 + int ret;
2072 +
2073 + of_id = of_match_device(iproc_pcie_of_match_table, dev);
2074 +@@ -101,10 +101,10 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
2075 + pcie->phy = NULL;
2076 + }
2077 +
2078 +- ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &res, &iobase);
2079 ++ ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &resources,
2080 ++ &iobase);
2081 + if (ret) {
2082 +- dev_err(dev,
2083 +- "unable to get PCI host bridge resources\n");
2084 ++ dev_err(dev, "unable to get PCI host bridge resources\n");
2085 + return ret;
2086 + }
2087 +
2088 +@@ -117,14 +117,15 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
2089 + pcie->map_irq = of_irq_parse_and_map_pci;
2090 + }
2091 +
2092 +- ret = iproc_pcie_setup(pcie, &res);
2093 +- if (ret)
2094 ++ ret = iproc_pcie_setup(pcie, &resources);
2095 ++ if (ret) {
2096 + dev_err(dev, "PCIe controller setup failed\n");
2097 +-
2098 +- pci_free_resource_list(&res);
2099 ++ pci_free_resource_list(&resources);
2100 ++ return ret;
2101 ++ }
2102 +
2103 + platform_set_drvdata(pdev, pcie);
2104 +- return ret;
2105 ++ return 0;
2106 + }
2107 +
2108 + static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
2109 +diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h
2110 +index 04fed8e907f1..0bbe2ea44f3e 100644
2111 +--- a/drivers/pci/host/pcie-iproc.h
2112 ++++ b/drivers/pci/host/pcie-iproc.h
2113 +@@ -90,6 +90,7 @@ struct iproc_pcie {
2114 + #ifdef CONFIG_ARM
2115 + struct pci_sys_data sysdata;
2116 + #endif
2117 ++ struct resource mem;
2118 + struct pci_bus *root_bus;
2119 + struct phy *phy;
2120 + int (*map_irq)(const struct pci_dev *, u8, u8);
2121 +diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
2122 +index d704752b6332..6021cb9ea910 100644
2123 +--- a/drivers/scsi/device_handler/scsi_dh_alua.c
2124 ++++ b/drivers/scsi/device_handler/scsi_dh_alua.c
2125 +@@ -113,7 +113,7 @@ struct alua_queue_data {
2126 + #define ALUA_POLICY_SWITCH_ALL 1
2127 +
2128 + static void alua_rtpg_work(struct work_struct *work);
2129 +-static void alua_rtpg_queue(struct alua_port_group *pg,
2130 ++static bool alua_rtpg_queue(struct alua_port_group *pg,
2131 + struct scsi_device *sdev,
2132 + struct alua_queue_data *qdata, bool force);
2133 + static void alua_check(struct scsi_device *sdev, bool force);
2134 +@@ -866,7 +866,13 @@ static void alua_rtpg_work(struct work_struct *work)
2135 + kref_put(&pg->kref, release_port_group);
2136 + }
2137 +
2138 +-static void alua_rtpg_queue(struct alua_port_group *pg,
2139 ++/**
2140 ++ * alua_rtpg_queue() - cause RTPG to be submitted asynchronously
2141 ++ *
2142 ++ * Returns true if and only if alua_rtpg_work() will be called asynchronously.
2143 ++ * That function is responsible for calling @qdata->fn().
2144 ++ */
2145 ++static bool alua_rtpg_queue(struct alua_port_group *pg,
2146 + struct scsi_device *sdev,
2147 + struct alua_queue_data *qdata, bool force)
2148 + {
2149 +@@ -874,8 +880,8 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
2150 + unsigned long flags;
2151 + struct workqueue_struct *alua_wq = kaluad_wq;
2152 +
2153 +- if (!pg)
2154 +- return;
2155 ++ if (!pg || scsi_device_get(sdev))
2156 ++ return false;
2157 +
2158 + spin_lock_irqsave(&pg->lock, flags);
2159 + if (qdata) {
2160 +@@ -888,14 +894,12 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
2161 + pg->flags |= ALUA_PG_RUN_RTPG;
2162 + kref_get(&pg->kref);
2163 + pg->rtpg_sdev = sdev;
2164 +- scsi_device_get(sdev);
2165 + start_queue = 1;
2166 + } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
2167 + pg->flags |= ALUA_PG_RUN_RTPG;
2168 + /* Do not queue if the worker is already running */
2169 + if (!(pg->flags & ALUA_PG_RUNNING)) {
2170 + kref_get(&pg->kref);
2171 +- sdev = NULL;
2172 + start_queue = 1;
2173 + }
2174 + }
2175 +@@ -904,13 +908,17 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
2176 + alua_wq = kaluad_sync_wq;
2177 + spin_unlock_irqrestore(&pg->lock, flags);
2178 +
2179 +- if (start_queue &&
2180 +- !queue_delayed_work(alua_wq, &pg->rtpg_work,
2181 +- msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) {
2182 +- if (sdev)
2183 +- scsi_device_put(sdev);
2184 +- kref_put(&pg->kref, release_port_group);
2185 ++ if (start_queue) {
2186 ++ if (queue_delayed_work(alua_wq, &pg->rtpg_work,
2187 ++ msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS)))
2188 ++ sdev = NULL;
2189 ++ else
2190 ++ kref_put(&pg->kref, release_port_group);
2191 + }
2192 ++ if (sdev)
2193 ++ scsi_device_put(sdev);
2194 ++
2195 ++ return true;
2196 + }
2197 +
2198 + /*
2199 +@@ -1011,11 +1019,13 @@ static int alua_activate(struct scsi_device *sdev,
2200 + mutex_unlock(&h->init_mutex);
2201 + goto out;
2202 + }
2203 +- fn = NULL;
2204 + rcu_read_unlock();
2205 + mutex_unlock(&h->init_mutex);
2206 +
2207 +- alua_rtpg_queue(pg, sdev, qdata, true);
2208 ++ if (alua_rtpg_queue(pg, sdev, qdata, true))
2209 ++ fn = NULL;
2210 ++ else
2211 ++ err = SCSI_DH_DEV_OFFLINED;
2212 + kref_put(&pg->kref, release_port_group);
2213 + out:
2214 + if (fn)
2215 +diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
2216 +index 763f012fdeca..87f5e694dbed 100644
2217 +--- a/drivers/scsi/libsas/sas_ata.c
2218 ++++ b/drivers/scsi/libsas/sas_ata.c
2219 +@@ -221,7 +221,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
2220 + task->num_scatter = qc->n_elem;
2221 + } else {
2222 + for_each_sg(qc->sg, sg, qc->n_elem, si)
2223 +- xfer += sg->length;
2224 ++ xfer += sg_dma_len(sg);
2225 +
2226 + task->total_xfer_len = xfer;
2227 + task->num_scatter = si;
2228 +diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
2229 +index 121de0aaa6ad..f753df25ba34 100644
2230 +--- a/drivers/scsi/sg.c
2231 ++++ b/drivers/scsi/sg.c
2232 +@@ -998,6 +998,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
2233 + result = get_user(val, ip);
2234 + if (result)
2235 + return result;
2236 ++ if (val > SG_MAX_CDB_SIZE)
2237 ++ return -ENOMEM;
2238 + sfp->next_cmd_len = (val > 0) ? val : 0;
2239 + return 0;
2240 + case SG_GET_VERSION_NUM:
2241 +diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
2242 +index fabbe76203bb..4d079cdaa7a3 100644
2243 +--- a/drivers/tty/serial/atmel_serial.c
2244 ++++ b/drivers/tty/serial/atmel_serial.c
2245 +@@ -1938,6 +1938,11 @@ static void atmel_flush_buffer(struct uart_port *port)
2246 + atmel_uart_writel(port, ATMEL_PDC_TCR, 0);
2247 + atmel_port->pdc_tx.ofs = 0;
2248 + }
2249 ++ /*
2250 ++ * in uart_flush_buffer(), the xmit circular buffer has just
2251 ++ * been cleared, so we have to reset tx_len accordingly.
2252 ++ */
2253 ++ atmel_port->tx_len = 0;
2254 + }
2255 +
2256 + /*
2257 +@@ -2471,6 +2476,9 @@ static void atmel_console_write(struct console *co, const char *s, u_int count)
2258 + pdc_tx = atmel_uart_readl(port, ATMEL_PDC_PTSR) & ATMEL_PDC_TXTEN;
2259 + atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS);
2260 +
2261 ++ /* Make sure that tx path is actually able to send characters */
2262 ++ atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN);
2263 ++
2264 + uart_console_write(port, s, count, atmel_console_putchar);
2265 +
2266 + /*
2267 +diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
2268 +index 8c1c9112b3fd..181972b03845 100644
2269 +--- a/drivers/tty/serial/mxs-auart.c
2270 ++++ b/drivers/tty/serial/mxs-auart.c
2271 +@@ -1085,7 +1085,7 @@ static void mxs_auart_settermios(struct uart_port *u,
2272 + AUART_LINECTRL_BAUD_DIV_MAX);
2273 + baud_max = u->uartclk * 32 / AUART_LINECTRL_BAUD_DIV_MIN;
2274 + baud = uart_get_baud_rate(u, termios, old, baud_min, baud_max);
2275 +- div = u->uartclk * 32 / baud;
2276 ++ div = DIV_ROUND_CLOSEST(u->uartclk * 32, baud);
2277 + }
2278 +
2279 + ctrl |= AUART_LINECTRL_BAUD_DIVFRAC(div & 0x3F);
2280 +diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
2281 +index 479e223f9cff..f029aad67183 100644
2282 +--- a/drivers/usb/core/hcd.c
2283 ++++ b/drivers/usb/core/hcd.c
2284 +@@ -520,8 +520,10 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb)
2285 + */
2286 + tbuf_size = max_t(u16, sizeof(struct usb_hub_descriptor), wLength);
2287 + tbuf = kzalloc(tbuf_size, GFP_KERNEL);
2288 +- if (!tbuf)
2289 +- return -ENOMEM;
2290 ++ if (!tbuf) {
2291 ++ status = -ENOMEM;
2292 ++ goto err_alloc;
2293 ++ }
2294 +
2295 + bufp = tbuf;
2296 +
2297 +@@ -734,6 +736,7 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb)
2298 + }
2299 +
2300 + kfree(tbuf);
2301 ++ err_alloc:
2302 +
2303 + /* any errors get returned through the urb completion */
2304 + spin_lock_irq(&hcd_root_hub_lock);
2305 +diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
2306 +index e32029a31ca4..4c101f4161f8 100644
2307 +--- a/drivers/usb/host/xhci-ring.c
2308 ++++ b/drivers/usb/host/xhci-ring.c
2309 +@@ -2000,6 +2000,9 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
2310 + case TRB_NORMAL:
2311 + td->urb->actual_length = requested - remaining;
2312 + goto finish_td;
2313 ++ case TRB_STATUS:
2314 ++ td->urb->actual_length = requested;
2315 ++ goto finish_td;
2316 + default:
2317 + xhci_warn(xhci, "WARN: unexpected TRB Type %d\n",
2318 + trb_type);
2319 +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
2320 +index 0a436c4a28ad..2c48e2528600 100644
2321 +--- a/fs/nfs/nfs4proc.c
2322 ++++ b/fs/nfs/nfs4proc.c
2323 +@@ -2550,17 +2550,14 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
2324 + }
2325 +
2326 + nfs4_stateid_copy(&stateid, &delegation->stateid);
2327 +- if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
2328 ++ if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) ||
2329 ++ !test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED,
2330 ++ &delegation->flags)) {
2331 + rcu_read_unlock();
2332 + nfs_finish_clear_delegation_stateid(state, &stateid);
2333 + return;
2334 + }
2335 +
2336 +- if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) {
2337 +- rcu_read_unlock();
2338 +- return;
2339 +- }
2340 +-
2341 + cred = get_rpccred(delegation->cred);
2342 + rcu_read_unlock();
2343 + status = nfs41_test_and_free_expired_stateid(server, &stateid, cred);
2344 +diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
2345 +index 010aff5c5a79..536009e50387 100644
2346 +--- a/fs/nfsd/nfsproc.c
2347 ++++ b/fs/nfsd/nfsproc.c
2348 +@@ -790,6 +790,7 @@ nfserrno (int errno)
2349 + { nfserr_serverfault, -ESERVERFAULT },
2350 + { nfserr_serverfault, -ENFILE },
2351 + { nfserr_io, -EUCLEAN },
2352 ++ { nfserr_perm, -ENOKEY },
2353 + };
2354 + int i;
2355 +
2356 +diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
2357 +index bfc00de5c6f1..3365ecb9074d 100644
2358 +--- a/fs/xfs/libxfs/xfs_bmap.c
2359 ++++ b/fs/xfs/libxfs/xfs_bmap.c
2360 +@@ -769,8 +769,8 @@ xfs_bmap_extents_to_btree(
2361 + args.type = XFS_ALLOCTYPE_START_BNO;
2362 + args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
2363 + } else if (dfops->dop_low) {
2364 +-try_another_ag:
2365 + args.type = XFS_ALLOCTYPE_START_BNO;
2366 ++try_another_ag:
2367 + args.fsbno = *firstblock;
2368 + } else {
2369 + args.type = XFS_ALLOCTYPE_NEAR_BNO;
2370 +@@ -796,17 +796,19 @@ xfs_bmap_extents_to_btree(
2371 + if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
2372 + args.fsbno == NULLFSBLOCK &&
2373 + args.type == XFS_ALLOCTYPE_NEAR_BNO) {
2374 +- dfops->dop_low = true;
2375 ++ args.type = XFS_ALLOCTYPE_FIRST_AG;
2376 + goto try_another_ag;
2377 + }
2378 ++ if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
2379 ++ xfs_iroot_realloc(ip, -1, whichfork);
2380 ++ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
2381 ++ return -ENOSPC;
2382 ++ }
2383 + /*
2384 + * Allocation can't fail, the space was reserved.
2385 + */
2386 +- ASSERT(args.fsbno != NULLFSBLOCK);
2387 + ASSERT(*firstblock == NULLFSBLOCK ||
2388 +- args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
2389 +- (dfops->dop_low &&
2390 +- args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
2391 ++ args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
2392 + *firstblock = cur->bc_private.b.firstblock = args.fsbno;
2393 + cur->bc_private.b.allocated++;
2394 + ip->i_d.di_nblocks++;
2395 +@@ -1278,7 +1280,6 @@ xfs_bmap_read_extents(
2396 + /* REFERENCED */
2397 + xfs_extnum_t room; /* number of entries there's room for */
2398 +
2399 +- bno = NULLFSBLOCK;
2400 + mp = ip->i_mount;
2401 + ifp = XFS_IFORK_PTR(ip, whichfork);
2402 + exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
2403 +@@ -1291,9 +1292,7 @@ xfs_bmap_read_extents(
2404 + ASSERT(level > 0);
2405 + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
2406 + bno = be64_to_cpu(*pp);
2407 +- ASSERT(bno != NULLFSBLOCK);
2408 +- ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
2409 +- ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
2410 ++
2411 + /*
2412 + * Go down the tree until leaf level is reached, following the first
2413 + * pointer (leftmost) at each level.
2414 +@@ -1864,6 +1863,7 @@ xfs_bmap_add_extent_delay_real(
2415 + */
2416 + trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2417 + xfs_bmbt_set_startblock(ep, new->br_startblock);
2418 ++ xfs_bmbt_set_state(ep, new->br_state);
2419 + trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2420 +
2421 + (*nextents)++;
2422 +@@ -2202,6 +2202,7 @@ STATIC int /* error */
2423 + xfs_bmap_add_extent_unwritten_real(
2424 + struct xfs_trans *tp,
2425 + xfs_inode_t *ip, /* incore inode pointer */
2426 ++ int whichfork,
2427 + xfs_extnum_t *idx, /* extent number to update/insert */
2428 + xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
2429 + xfs_bmbt_irec_t *new, /* new data to add to file extents */
2430 +@@ -2221,12 +2222,14 @@ xfs_bmap_add_extent_unwritten_real(
2431 + /* left is 0, right is 1, prev is 2 */
2432 + int rval=0; /* return value (logging flags) */
2433 + int state = 0;/* state bits, accessed thru macros */
2434 +- struct xfs_mount *mp = tp->t_mountp;
2435 ++ struct xfs_mount *mp = ip->i_mount;
2436 +
2437 + *logflagsp = 0;
2438 +
2439 + cur = *curp;
2440 +- ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
2441 ++ ifp = XFS_IFORK_PTR(ip, whichfork);
2442 ++ if (whichfork == XFS_COW_FORK)
2443 ++ state |= BMAP_COWFORK;
2444 +
2445 + ASSERT(*idx >= 0);
2446 + ASSERT(*idx <= xfs_iext_count(ifp));
2447 +@@ -2285,7 +2288,7 @@ xfs_bmap_add_extent_unwritten_real(
2448 + * Don't set contiguous if the combined extent would be too large.
2449 + * Also check for all-three-contiguous being too large.
2450 + */
2451 +- if (*idx < xfs_iext_count(&ip->i_df) - 1) {
2452 ++ if (*idx < xfs_iext_count(ifp) - 1) {
2453 + state |= BMAP_RIGHT_VALID;
2454 + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
2455 + if (isnullstartblock(RIGHT.br_startblock))
2456 +@@ -2325,7 +2328,8 @@ xfs_bmap_add_extent_unwritten_real(
2457 + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2458 +
2459 + xfs_iext_remove(ip, *idx + 1, 2, state);
2460 +- ip->i_d.di_nextents -= 2;
2461 ++ XFS_IFORK_NEXT_SET(ip, whichfork,
2462 ++ XFS_IFORK_NEXTENTS(ip, whichfork) - 2);
2463 + if (cur == NULL)
2464 + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2465 + else {
2466 +@@ -2368,7 +2372,8 @@ xfs_bmap_add_extent_unwritten_real(
2467 + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2468 +
2469 + xfs_iext_remove(ip, *idx + 1, 1, state);
2470 +- ip->i_d.di_nextents--;
2471 ++ XFS_IFORK_NEXT_SET(ip, whichfork,
2472 ++ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2473 + if (cur == NULL)
2474 + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2475 + else {
2476 +@@ -2403,7 +2408,8 @@ xfs_bmap_add_extent_unwritten_real(
2477 + xfs_bmbt_set_state(ep, newext);
2478 + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2479 + xfs_iext_remove(ip, *idx + 1, 1, state);
2480 +- ip->i_d.di_nextents--;
2481 ++ XFS_IFORK_NEXT_SET(ip, whichfork,
2482 ++ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2483 + if (cur == NULL)
2484 + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2485 + else {
2486 +@@ -2515,7 +2521,8 @@ xfs_bmap_add_extent_unwritten_real(
2487 + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2488 +
2489 + xfs_iext_insert(ip, *idx, 1, new, state);
2490 +- ip->i_d.di_nextents++;
2491 ++ XFS_IFORK_NEXT_SET(ip, whichfork,
2492 ++ XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2493 + if (cur == NULL)
2494 + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2495 + else {
2496 +@@ -2593,7 +2600,8 @@ xfs_bmap_add_extent_unwritten_real(
2497 + ++*idx;
2498 + xfs_iext_insert(ip, *idx, 1, new, state);
2499 +
2500 +- ip->i_d.di_nextents++;
2501 ++ XFS_IFORK_NEXT_SET(ip, whichfork,
2502 ++ XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2503 + if (cur == NULL)
2504 + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2505 + else {
2506 +@@ -2641,7 +2649,8 @@ xfs_bmap_add_extent_unwritten_real(
2507 + ++*idx;
2508 + xfs_iext_insert(ip, *idx, 2, &r[0], state);
2509 +
2510 +- ip->i_d.di_nextents += 2;
2511 ++ XFS_IFORK_NEXT_SET(ip, whichfork,
2512 ++ XFS_IFORK_NEXTENTS(ip, whichfork) + 2);
2513 + if (cur == NULL)
2514 + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2515 + else {
2516 +@@ -2695,17 +2704,17 @@ xfs_bmap_add_extent_unwritten_real(
2517 + }
2518 +
2519 + /* update reverse mappings */
2520 +- error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new);
2521 ++ error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new);
2522 + if (error)
2523 + goto done;
2524 +
2525 + /* convert to a btree if necessary */
2526 +- if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
2527 ++ if (xfs_bmap_needs_btree(ip, whichfork)) {
2528 + int tmp_logflags; /* partial log flag return val */
2529 +
2530 + ASSERT(cur == NULL);
2531 + error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
2532 +- 0, &tmp_logflags, XFS_DATA_FORK);
2533 ++ 0, &tmp_logflags, whichfork);
2534 + *logflagsp |= tmp_logflags;
2535 + if (error)
2536 + goto done;
2537 +@@ -2717,7 +2726,7 @@ xfs_bmap_add_extent_unwritten_real(
2538 + *curp = cur;
2539 + }
2540 +
2541 +- xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
2542 ++ xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2543 + done:
2544 + *logflagsp |= rval;
2545 + return error;
2546 +@@ -2809,7 +2818,8 @@ xfs_bmap_add_extent_hole_delay(
2547 + oldlen = startblockval(left.br_startblock) +
2548 + startblockval(new->br_startblock) +
2549 + startblockval(right.br_startblock);
2550 +- newlen = xfs_bmap_worst_indlen(ip, temp);
2551 ++ newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2552 ++ oldlen);
2553 + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
2554 + nullstartblock((int)newlen));
2555 + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2556 +@@ -2830,7 +2840,8 @@ xfs_bmap_add_extent_hole_delay(
2557 + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
2558 + oldlen = startblockval(left.br_startblock) +
2559 + startblockval(new->br_startblock);
2560 +- newlen = xfs_bmap_worst_indlen(ip, temp);
2561 ++ newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2562 ++ oldlen);
2563 + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
2564 + nullstartblock((int)newlen));
2565 + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2566 +@@ -2846,7 +2857,8 @@ xfs_bmap_add_extent_hole_delay(
2567 + temp = new->br_blockcount + right.br_blockcount;
2568 + oldlen = startblockval(new->br_startblock) +
2569 + startblockval(right.br_startblock);
2570 +- newlen = xfs_bmap_worst_indlen(ip, temp);
2571 ++ newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2572 ++ oldlen);
2573 + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
2574 + new->br_startoff,
2575 + nullstartblock((int)newlen), temp, right.br_state);
2576 +@@ -3822,17 +3834,13 @@ xfs_bmap_btalloc(
2577 + * the first block that was allocated.
2578 + */
2579 + ASSERT(*ap->firstblock == NULLFSBLOCK ||
2580 +- XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
2581 +- XFS_FSB_TO_AGNO(mp, args.fsbno) ||
2582 +- (ap->dfops->dop_low &&
2583 +- XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
2584 +- XFS_FSB_TO_AGNO(mp, args.fsbno)));
2585 ++ XFS_FSB_TO_AGNO(mp, *ap->firstblock) <=
2586 ++ XFS_FSB_TO_AGNO(mp, args.fsbno));
2587 +
2588 + ap->blkno = args.fsbno;
2589 + if (*ap->firstblock == NULLFSBLOCK)
2590 + *ap->firstblock = args.fsbno;
2591 +- ASSERT(nullfb || fb_agno == args.agno ||
2592 +- (ap->dfops->dop_low && fb_agno < args.agno));
2593 ++ ASSERT(nullfb || fb_agno <= args.agno);
2594 + ap->length = args.len;
2595 + if (!(ap->flags & XFS_BMAPI_COWFORK))
2596 + ap->ip->i_d.di_nblocks += args.len;
2597 +@@ -4156,6 +4164,19 @@ xfs_bmapi_read(
2598 + return 0;
2599 + }
2600 +
2601 ++/*
2602 ++ * Add a delayed allocation extent to an inode. Blocks are reserved from the
2603 ++ * global pool and the extent inserted into the inode in-core extent tree.
2604 ++ *
2605 ++ * On entry, got refers to the first extent beyond the offset of the extent to
2606 ++ * allocate or eof is specified if no such extent exists. On return, got refers
2607 ++ * to the extent record that was inserted to the inode fork.
2608 ++ *
2609 ++ * Note that the allocated extent may have been merged with contiguous extents
2610 ++ * during insertion into the inode fork. Thus, got does not reflect the current
2611 ++ * state of the inode fork on return. If necessary, the caller can use lastx to
2612 ++ * look up the updated record in the inode fork.
2613 ++ */
2614 + int
2615 + xfs_bmapi_reserve_delalloc(
2616 + struct xfs_inode *ip,
2617 +@@ -4242,13 +4263,8 @@ xfs_bmapi_reserve_delalloc(
2618 + got->br_startblock = nullstartblock(indlen);
2619 + got->br_blockcount = alen;
2620 + got->br_state = XFS_EXT_NORM;
2621 +- xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
2622 +
2623 +- /*
2624 +- * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
2625 +- * might have merged it into one of the neighbouring ones.
2626 +- */
2627 +- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
2628 ++ xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
2629 +
2630 + /*
2631 + * Tag the inode if blocks were preallocated. Note that COW fork
2632 +@@ -4260,10 +4276,6 @@ xfs_bmapi_reserve_delalloc(
2633 + if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
2634 + xfs_inode_set_cowblocks_tag(ip);
2635 +
2636 +- ASSERT(got->br_startoff <= aoff);
2637 +- ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
2638 +- ASSERT(isnullstartblock(got->br_startblock));
2639 +- ASSERT(got->br_state == XFS_EXT_NORM);
2640 + return 0;
2641 +
2642 + out_unreserve_blocks:
2643 +@@ -4368,10 +4380,16 @@ xfs_bmapi_allocate(
2644 + bma->got.br_state = XFS_EXT_NORM;
2645 +
2646 + /*
2647 +- * A wasdelay extent has been initialized, so shouldn't be flagged
2648 +- * as unwritten.
2649 ++ * In the data fork, a wasdelay extent has been initialized, so
2650 ++ * shouldn't be flagged as unwritten.
2651 ++ *
2652 ++ * For the cow fork, however, we convert delalloc reservations
2653 ++ * (extents allocated for speculative preallocation) to
2654 ++ * allocated unwritten extents, and only convert the unwritten
2655 ++ * extents to real extents when we're about to write the data.
2656 + */
2657 +- if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
2658 ++ if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
2659 ++ (bma->flags & XFS_BMAPI_PREALLOC) &&
2660 + xfs_sb_version_hasextflgbit(&mp->m_sb))
2661 + bma->got.br_state = XFS_EXT_UNWRITTEN;
2662 +
2663 +@@ -4422,8 +4440,6 @@ xfs_bmapi_convert_unwritten(
2664 + (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
2665 + return 0;
2666 +
2667 +- ASSERT(whichfork != XFS_COW_FORK);
2668 +-
2669 + /*
2670 + * Modify (by adding) the state flag, if writing.
2671 + */
2672 +@@ -4448,8 +4464,8 @@ xfs_bmapi_convert_unwritten(
2673 + return error;
2674 + }
2675 +
2676 +- error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
2677 +- &bma->cur, mval, bma->firstblock, bma->dfops,
2678 ++ error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
2679 ++ &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops,
2680 + &tmp_logflags);
2681 + /*
2682 + * Log the inode core unconditionally in the unwritten extent conversion
2683 +@@ -4458,8 +4474,12 @@ xfs_bmapi_convert_unwritten(
2684 + * in the transaction for the sake of fsync(), even if nothing has
2685 + * changed, because fsync() will not force the log for this transaction
2686 + * unless it sees the inode pinned.
2687 ++ *
2688 ++ * Note: If we're only converting cow fork extents, there aren't
2689 ++ * any on-disk updates to make, so we don't need to log anything.
2690 + */
2691 +- bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
2692 ++ if (whichfork != XFS_COW_FORK)
2693 ++ bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
2694 + if (error)
2695 + return error;
2696 +
2697 +@@ -4533,15 +4553,15 @@ xfs_bmapi_write(
2698 + ASSERT(*nmap >= 1);
2699 + ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
2700 + ASSERT(!(flags & XFS_BMAPI_IGSTATE));
2701 +- ASSERT(tp != NULL);
2702 ++ ASSERT(tp != NULL ||
2703 ++ (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) ==
2704 ++ (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK));
2705 + ASSERT(len > 0);
2706 + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
2707 + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2708 + ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK);
2709 + ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
2710 + ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
2711 +- ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK);
2712 +- ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK);
2713 +
2714 + /* zeroing is for currently only for data extents, not metadata */
2715 + ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
2716 +@@ -4746,13 +4766,9 @@ xfs_bmapi_write(
2717 + if (bma.cur) {
2718 + if (!error) {
2719 + ASSERT(*firstblock == NULLFSBLOCK ||
2720 +- XFS_FSB_TO_AGNO(mp, *firstblock) ==
2721 ++ XFS_FSB_TO_AGNO(mp, *firstblock) <=
2722 + XFS_FSB_TO_AGNO(mp,
2723 +- bma.cur->bc_private.b.firstblock) ||
2724 +- (dfops->dop_low &&
2725 +- XFS_FSB_TO_AGNO(mp, *firstblock) <
2726 +- XFS_FSB_TO_AGNO(mp,
2727 +- bma.cur->bc_private.b.firstblock)));
2728 ++ bma.cur->bc_private.b.firstblock));
2729 + *firstblock = bma.cur->bc_private.b.firstblock;
2730 + }
2731 + xfs_btree_del_cursor(bma.cur,
2732 +@@ -4787,34 +4803,59 @@ xfs_bmap_split_indlen(
2733 + xfs_filblks_t len2 = *indlen2;
2734 + xfs_filblks_t nres = len1 + len2; /* new total res. */
2735 + xfs_filblks_t stolen = 0;
2736 ++ xfs_filblks_t resfactor;
2737 +
2738 + /*
2739 + * Steal as many blocks as we can to try and satisfy the worst case
2740 + * indlen for both new extents.
2741 + */
2742 +- while (nres > ores && avail) {
2743 +- nres--;
2744 +- avail--;
2745 +- stolen++;
2746 +- }
2747 ++ if (ores < nres && avail)
2748 ++ stolen = XFS_FILBLKS_MIN(nres - ores, avail);
2749 ++ ores += stolen;
2750 ++
2751 ++ /* nothing else to do if we've satisfied the new reservation */
2752 ++ if (ores >= nres)
2753 ++ return stolen;
2754 ++
2755 ++ /*
2756 ++ * We can't meet the total required reservation for the two extents.
2757 ++ * Calculate the percent of the overall shortage between both extents
2758 ++ * and apply this percentage to each of the requested indlen values.
2759 ++ * This distributes the shortage fairly and reduces the chances that one
2760 ++ * of the two extents is left with nothing when extents are repeatedly
2761 ++ * split.
2762 ++ */
2763 ++ resfactor = (ores * 100);
2764 ++ do_div(resfactor, nres);
2765 ++ len1 *= resfactor;
2766 ++ do_div(len1, 100);
2767 ++ len2 *= resfactor;
2768 ++ do_div(len2, 100);
2769 ++ ASSERT(len1 + len2 <= ores);
2770 ++ ASSERT(len1 < *indlen1 && len2 < *indlen2);
2771 +
2772 + /*
2773 +- * The only blocks available are those reserved for the original
2774 +- * extent and what we can steal from the extent being removed.
2775 +- * If this still isn't enough to satisfy the combined
2776 +- * requirements for the two new extents, skim blocks off of each
2777 +- * of the new reservations until they match what is available.
2778 ++ * Hand out the remainder to each extent. If one of the two reservations
2779 ++ * is zero, we want to make sure that one gets a block first. The loop
2780 ++ * below starts with len1, so hand len2 a block right off the bat if it
2781 ++ * is zero.
2782 + */
2783 +- while (nres > ores) {
2784 +- if (len1) {
2785 +- len1--;
2786 +- nres--;
2787 ++ ores -= (len1 + len2);
2788 ++ ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
2789 ++ if (ores && !len2 && *indlen2) {
2790 ++ len2++;
2791 ++ ores--;
2792 ++ }
2793 ++ while (ores) {
2794 ++ if (len1 < *indlen1) {
2795 ++ len1++;
2796 ++ ores--;
2797 + }
2798 +- if (nres == ores)
2799 ++ if (!ores)
2800 + break;
2801 +- if (len2) {
2802 +- len2--;
2803 +- nres--;
2804 ++ if (len2 < *indlen2) {
2805 ++ len2++;
2806 ++ ores--;
2807 + }
2808 + }
2809 +
2810 +@@ -5556,8 +5597,8 @@ __xfs_bunmapi(
2811 + }
2812 + del.br_state = XFS_EXT_UNWRITTEN;
2813 + error = xfs_bmap_add_extent_unwritten_real(tp, ip,
2814 +- &lastx, &cur, &del, firstblock, dfops,
2815 +- &logflags);
2816 ++ whichfork, &lastx, &cur, &del,
2817 ++ firstblock, dfops, &logflags);
2818 + if (error)
2819 + goto error0;
2820 + goto nodelete;
2821 +@@ -5610,8 +5651,9 @@ __xfs_bunmapi(
2822 + prev.br_state = XFS_EXT_UNWRITTEN;
2823 + lastx--;
2824 + error = xfs_bmap_add_extent_unwritten_real(tp,
2825 +- ip, &lastx, &cur, &prev,
2826 +- firstblock, dfops, &logflags);
2827 ++ ip, whichfork, &lastx, &cur,
2828 ++ &prev, firstblock, dfops,
2829 ++ &logflags);
2830 + if (error)
2831 + goto error0;
2832 + goto nodelete;
2833 +@@ -5619,8 +5661,9 @@ __xfs_bunmapi(
2834 + ASSERT(del.br_state == XFS_EXT_NORM);
2835 + del.br_state = XFS_EXT_UNWRITTEN;
2836 + error = xfs_bmap_add_extent_unwritten_real(tp,
2837 +- ip, &lastx, &cur, &del,
2838 +- firstblock, dfops, &logflags);
2839 ++ ip, whichfork, &lastx, &cur,
2840 ++ &del, firstblock, dfops,
2841 ++ &logflags);
2842 + if (error)
2843 + goto error0;
2844 + goto nodelete;
2845 +diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
2846 +index d9be241fc86f..999cc5878890 100644
2847 +--- a/fs/xfs/libxfs/xfs_bmap_btree.c
2848 ++++ b/fs/xfs/libxfs/xfs_bmap_btree.c
2849 +@@ -453,8 +453,8 @@ xfs_bmbt_alloc_block(
2850 +
2851 + if (args.fsbno == NULLFSBLOCK) {
2852 + args.fsbno = be64_to_cpu(start->l);
2853 +-try_another_ag:
2854 + args.type = XFS_ALLOCTYPE_START_BNO;
2855 ++try_another_ag:
2856 + /*
2857 + * Make sure there is sufficient room left in the AG to
2858 + * complete a full tree split for an extent insert. If
2859 +@@ -494,8 +494,8 @@ xfs_bmbt_alloc_block(
2860 + if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
2861 + args.fsbno == NULLFSBLOCK &&
2862 + args.type == XFS_ALLOCTYPE_NEAR_BNO) {
2863 +- cur->bc_private.b.dfops->dop_low = true;
2864 + args.fsbno = cur->bc_private.b.firstblock;
2865 ++ args.type = XFS_ALLOCTYPE_FIRST_AG;
2866 + goto try_another_ag;
2867 + }
2868 +
2869 +@@ -512,7 +512,7 @@ xfs_bmbt_alloc_block(
2870 + goto error0;
2871 + cur->bc_private.b.dfops->dop_low = true;
2872 + }
2873 +- if (args.fsbno == NULLFSBLOCK) {
2874 ++ if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
2875 + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2876 + *stat = 0;
2877 + return 0;
2878 +diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
2879 +index 21e6a6ab6b9a..2849d3fa3d0b 100644
2880 +--- a/fs/xfs/libxfs/xfs_btree.c
2881 ++++ b/fs/xfs/libxfs/xfs_btree.c
2882 +@@ -810,7 +810,8 @@ xfs_btree_read_bufl(
2883 + xfs_daddr_t d; /* real disk block address */
2884 + int error;
2885 +
2886 +- ASSERT(fsbno != NULLFSBLOCK);
2887 ++ if (!XFS_FSB_SANITY_CHECK(mp, fsbno))
2888 ++ return -EFSCORRUPTED;
2889 + d = XFS_FSB_TO_DADDR(mp, fsbno);
2890 + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
2891 + mp->m_bsize, lock, &bp, ops);
2892 +diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
2893 +index b69b947c4c1b..33a8f8694d30 100644
2894 +--- a/fs/xfs/libxfs/xfs_btree.h
2895 ++++ b/fs/xfs/libxfs/xfs_btree.h
2896 +@@ -456,7 +456,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
2897 + #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b))
2898 +
2899 + #define XFS_FSB_SANITY_CHECK(mp,fsb) \
2900 +- (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
2901 ++ (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
2902 + XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
2903 +
2904 + /*
2905 +diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
2906 +index f2dc1a950c85..1bdf2888295b 100644
2907 +--- a/fs/xfs/libxfs/xfs_da_btree.c
2908 ++++ b/fs/xfs/libxfs/xfs_da_btree.c
2909 +@@ -2633,7 +2633,7 @@ xfs_da_read_buf(
2910 + /*
2911 + * Readahead the dir/attr block.
2912 + */
2913 +-xfs_daddr_t
2914 ++int
2915 + xfs_da_reada_buf(
2916 + struct xfs_inode *dp,
2917 + xfs_dablk_t bno,
2918 +@@ -2664,7 +2664,5 @@ xfs_da_reada_buf(
2919 + if (mapp != &map)
2920 + kmem_free(mapp);
2921 +
2922 +- if (error)
2923 +- return -1;
2924 +- return mappedbno;
2925 ++ return error;
2926 + }
2927 +diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
2928 +index 98c75cbe6ac2..4e29cb6a3627 100644
2929 +--- a/fs/xfs/libxfs/xfs_da_btree.h
2930 ++++ b/fs/xfs/libxfs/xfs_da_btree.h
2931 +@@ -201,7 +201,7 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
2932 + xfs_dablk_t bno, xfs_daddr_t mappedbno,
2933 + struct xfs_buf **bpp, int whichfork,
2934 + const struct xfs_buf_ops *ops);
2935 +-xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
2936 ++int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
2937 + xfs_daddr_t mapped_bno, int whichfork,
2938 + const struct xfs_buf_ops *ops);
2939 + int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
2940 +diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
2941 +index 75a557432d0f..bbd1238852b3 100644
2942 +--- a/fs/xfs/libxfs/xfs_dir2_node.c
2943 ++++ b/fs/xfs/libxfs/xfs_dir2_node.c
2944 +@@ -155,6 +155,42 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
2945 + .verify_write = xfs_dir3_free_write_verify,
2946 + };
2947 +
2948 ++/* Everything ok in the free block header? */
2949 ++static bool
2950 ++xfs_dir3_free_header_check(
2951 ++ struct xfs_inode *dp,
2952 ++ xfs_dablk_t fbno,
2953 ++ struct xfs_buf *bp)
2954 ++{
2955 ++ struct xfs_mount *mp = dp->i_mount;
2956 ++ unsigned int firstdb;
2957 ++ int maxbests;
2958 ++
2959 ++ maxbests = dp->d_ops->free_max_bests(mp->m_dir_geo);
2960 ++ firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) -
2961 ++ xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
2962 ++ maxbests;
2963 ++ if (xfs_sb_version_hascrc(&mp->m_sb)) {
2964 ++ struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
2965 ++
2966 ++ if (be32_to_cpu(hdr3->firstdb) != firstdb)
2967 ++ return false;
2968 ++ if (be32_to_cpu(hdr3->nvalid) > maxbests)
2969 ++ return false;
2970 ++ if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused))
2971 ++ return false;
2972 ++ } else {
2973 ++ struct xfs_dir2_free_hdr *hdr = bp->b_addr;
2974 ++
2975 ++ if (be32_to_cpu(hdr->firstdb) != firstdb)
2976 ++ return false;
2977 ++ if (be32_to_cpu(hdr->nvalid) > maxbests)
2978 ++ return false;
2979 ++ if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused))
2980 ++ return false;
2981 ++ }
2982 ++ return true;
2983 ++}
2984 +
2985 + static int
2986 + __xfs_dir3_free_read(
2987 +@@ -168,11 +204,22 @@ __xfs_dir3_free_read(
2988 +
2989 + err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
2990 + XFS_DATA_FORK, &xfs_dir3_free_buf_ops);
2991 ++ if (err || !*bpp)
2992 ++ return err;
2993 ++
2994 ++ /* Check things that we can't do in the verifier. */
2995 ++ if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) {
2996 ++ xfs_buf_ioerror(*bpp, -EFSCORRUPTED);
2997 ++ xfs_verifier_error(*bpp);
2998 ++ xfs_trans_brelse(tp, *bpp);
2999 ++ return -EFSCORRUPTED;
3000 ++ }
3001 +
3002 + /* try read returns without an error or *bpp if it lands in a hole */
3003 +- if (!err && tp && *bpp)
3004 ++ if (tp)
3005 + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF);
3006 +- return err;
3007 ++
3008 ++ return 0;
3009 + }
3010 +
3011 + int
3012 +diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
3013 +index f272abff11e1..d41ade5d293e 100644
3014 +--- a/fs/xfs/libxfs/xfs_ialloc.c
3015 ++++ b/fs/xfs/libxfs/xfs_ialloc.c
3016 +@@ -51,8 +51,7 @@ xfs_ialloc_cluster_alignment(
3017 + struct xfs_mount *mp)
3018 + {
3019 + if (xfs_sb_version_hasalign(&mp->m_sb) &&
3020 +- mp->m_sb.sb_inoalignmt >=
3021 +- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
3022 ++ mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
3023 + return mp->m_sb.sb_inoalignmt;
3024 + return 1;
3025 + }
3026 +diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
3027 +index 222e103356c6..25c1e078aef6 100644
3028 +--- a/fs/xfs/libxfs/xfs_inode_fork.c
3029 ++++ b/fs/xfs/libxfs/xfs_inode_fork.c
3030 +@@ -26,6 +26,7 @@
3031 + #include "xfs_inode.h"
3032 + #include "xfs_trans.h"
3033 + #include "xfs_inode_item.h"
3034 ++#include "xfs_btree.h"
3035 + #include "xfs_bmap_btree.h"
3036 + #include "xfs_bmap.h"
3037 + #include "xfs_error.h"
3038 +@@ -429,11 +430,13 @@ xfs_iformat_btree(
3039 + /* REFERENCED */
3040 + int nrecs;
3041 + int size;
3042 ++ int level;
3043 +
3044 + ifp = XFS_IFORK_PTR(ip, whichfork);
3045 + dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
3046 + size = XFS_BMAP_BROOT_SPACE(mp, dfp);
3047 + nrecs = be16_to_cpu(dfp->bb_numrecs);
3048 ++ level = be16_to_cpu(dfp->bb_level);
3049 +
3050 + /*
3051 + * blow out if -- fork has less extents than can fit in
3052 +@@ -446,7 +449,8 @@ xfs_iformat_btree(
3053 + XFS_IFORK_MAXEXT(ip, whichfork) ||
3054 + XFS_BMDR_SPACE_CALC(nrecs) >
3055 + XFS_DFORK_SIZE(dip, mp, whichfork) ||
3056 +- XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
3057 ++ XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) ||
3058 ++ level == 0 || level > XFS_BTREE_MAXLEVELS) {
3059 + xfs_warn(mp, "corrupt inode %Lu (btree).",
3060 + (unsigned long long) ip->i_ino);
3061 + XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
3062 +@@ -497,15 +501,14 @@ xfs_iread_extents(
3063 + * We know that the size is valid (it's checked in iformat_btree)
3064 + */
3065 + ifp->if_bytes = ifp->if_real_bytes = 0;
3066 +- ifp->if_flags |= XFS_IFEXTENTS;
3067 + xfs_iext_add(ifp, 0, nextents);
3068 + error = xfs_bmap_read_extents(tp, ip, whichfork);
3069 + if (error) {
3070 + xfs_iext_destroy(ifp);
3071 +- ifp->if_flags &= ~XFS_IFEXTENTS;
3072 + return error;
3073 + }
3074 + xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
3075 ++ ifp->if_flags |= XFS_IFEXTENTS;
3076 + return 0;
3077 + }
3078 + /*
3079 +diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
3080 +index 631e7c0e0a29..937d406d3c11 100644
3081 +--- a/fs/xfs/xfs_aops.c
3082 ++++ b/fs/xfs/xfs_aops.c
3083 +@@ -274,54 +274,49 @@ xfs_end_io(
3084 + struct xfs_ioend *ioend =
3085 + container_of(work, struct xfs_ioend, io_work);
3086 + struct xfs_inode *ip = XFS_I(ioend->io_inode);
3087 ++ xfs_off_t offset = ioend->io_offset;
3088 ++ size_t size = ioend->io_size;
3089 + int error = ioend->io_bio->bi_error;
3090 +
3091 + /*
3092 +- * Set an error if the mount has shut down and proceed with end I/O
3093 +- * processing so it can perform whatever cleanups are necessary.
3094 ++ * Just clean up the in-memory strutures if the fs has been shut down.
3095 + */
3096 +- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
3097 ++ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
3098 + error = -EIO;
3099 ++ goto done;
3100 ++ }
3101 +
3102 + /*
3103 +- * For a CoW extent, we need to move the mapping from the CoW fork
3104 +- * to the data fork. If instead an error happened, just dump the
3105 +- * new blocks.
3106 ++ * Clean up any COW blocks on an I/O error.
3107 + */
3108 +- if (ioend->io_type == XFS_IO_COW) {
3109 +- if (error)
3110 +- goto done;
3111 +- if (ioend->io_bio->bi_error) {
3112 +- error = xfs_reflink_cancel_cow_range(ip,
3113 +- ioend->io_offset, ioend->io_size);
3114 +- goto done;
3115 ++ if (unlikely(error)) {
3116 ++ switch (ioend->io_type) {
3117 ++ case XFS_IO_COW:
3118 ++ xfs_reflink_cancel_cow_range(ip, offset, size, true);
3119 ++ break;
3120 + }
3121 +- error = xfs_reflink_end_cow(ip, ioend->io_offset,
3122 +- ioend->io_size);
3123 +- if (error)
3124 +- goto done;
3125 ++
3126 ++ goto done;
3127 + }
3128 +
3129 + /*
3130 +- * For unwritten extents we need to issue transactions to convert a
3131 +- * range to normal written extens after the data I/O has finished.
3132 +- * Detecting and handling completion IO errors is done individually
3133 +- * for each case as different cleanup operations need to be performed
3134 +- * on error.
3135 ++ * Success: commit the COW or unwritten blocks if needed.
3136 + */
3137 +- if (ioend->io_type == XFS_IO_UNWRITTEN) {
3138 +- if (error)
3139 +- goto done;
3140 +- error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
3141 +- ioend->io_size);
3142 +- } else if (ioend->io_append_trans) {
3143 +- error = xfs_setfilesize_ioend(ioend, error);
3144 +- } else {
3145 +- ASSERT(!xfs_ioend_is_append(ioend) ||
3146 +- ioend->io_type == XFS_IO_COW);
3147 ++ switch (ioend->io_type) {
3148 ++ case XFS_IO_COW:
3149 ++ error = xfs_reflink_end_cow(ip, offset, size);
3150 ++ break;
3151 ++ case XFS_IO_UNWRITTEN:
3152 ++ error = xfs_iomap_write_unwritten(ip, offset, size);
3153 ++ break;
3154 ++ default:
3155 ++ ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
3156 ++ break;
3157 + }
3158 +
3159 + done:
3160 ++ if (ioend->io_append_trans)
3161 ++ error = xfs_setfilesize_ioend(ioend, error);
3162 + xfs_destroy_ioend(ioend, error);
3163 + }
3164 +
3165 +@@ -481,6 +476,12 @@ xfs_submit_ioend(
3166 + struct xfs_ioend *ioend,
3167 + int status)
3168 + {
3169 ++ /* Convert CoW extents to regular */
3170 ++ if (!status && ioend->io_type == XFS_IO_COW) {
3171 ++ status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
3172 ++ ioend->io_offset, ioend->io_size);
3173 ++ }
3174 ++
3175 + /* Reserve log space if we might write beyond the on-disk inode size. */
3176 + if (!status &&
3177 + ioend->io_type != XFS_IO_UNWRITTEN &&
3178 +diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
3179 +index c1417919ab0a..c516d7158a21 100644
3180 +--- a/fs/xfs/xfs_bmap_util.c
3181 ++++ b/fs/xfs/xfs_bmap_util.c
3182 +@@ -917,17 +917,18 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
3183 + */
3184 + int
3185 + xfs_free_eofblocks(
3186 +- xfs_mount_t *mp,
3187 +- xfs_inode_t *ip,
3188 +- bool need_iolock)
3189 ++ struct xfs_inode *ip)
3190 + {
3191 +- xfs_trans_t *tp;
3192 +- int error;
3193 +- xfs_fileoff_t end_fsb;
3194 +- xfs_fileoff_t last_fsb;
3195 +- xfs_filblks_t map_len;
3196 +- int nimaps;
3197 +- xfs_bmbt_irec_t imap;
3198 ++ struct xfs_trans *tp;
3199 ++ int error;
3200 ++ xfs_fileoff_t end_fsb;
3201 ++ xfs_fileoff_t last_fsb;
3202 ++ xfs_filblks_t map_len;
3203 ++ int nimaps;
3204 ++ struct xfs_bmbt_irec imap;
3205 ++ struct xfs_mount *mp = ip->i_mount;
3206 ++
3207 ++ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
3208 +
3209 + /*
3210 + * Figure out if there are any blocks beyond the end
3211 +@@ -944,6 +945,10 @@ xfs_free_eofblocks(
3212 + error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
3213 + xfs_iunlock(ip, XFS_ILOCK_SHARED);
3214 +
3215 ++ /*
3216 ++ * If there are blocks after the end of file, truncate the file to its
3217 ++ * current size to free them up.
3218 ++ */
3219 + if (!error && (nimaps != 0) &&
3220 + (imap.br_startblock != HOLESTARTBLOCK ||
3221 + ip->i_delayed_blks)) {
3222 +@@ -954,22 +959,13 @@ xfs_free_eofblocks(
3223 + if (error)
3224 + return error;
3225 +
3226 +- /*
3227 +- * There are blocks after the end of file.
3228 +- * Free them up now by truncating the file to
3229 +- * its current size.
3230 +- */
3231 +- if (need_iolock) {
3232 +- if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
3233 +- return -EAGAIN;
3234 +- }
3235 ++ /* wait on dio to ensure i_size has settled */
3236 ++ inode_dio_wait(VFS_I(ip));
3237 +
3238 + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
3239 + &tp);
3240 + if (error) {
3241 + ASSERT(XFS_FORCED_SHUTDOWN(mp));
3242 +- if (need_iolock)
3243 +- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
3244 + return error;
3245 + }
3246 +
3247 +@@ -997,8 +993,6 @@ xfs_free_eofblocks(
3248 + }
3249 +
3250 + xfs_iunlock(ip, XFS_ILOCK_EXCL);
3251 +- if (need_iolock)
3252 +- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
3253 + }
3254 + return error;
3255 + }
3256 +@@ -1393,10 +1387,16 @@ xfs_shift_file_space(
3257 + xfs_fileoff_t stop_fsb;
3258 + xfs_fileoff_t next_fsb;
3259 + xfs_fileoff_t shift_fsb;
3260 ++ uint resblks;
3261 +
3262 + ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
3263 +
3264 + if (direction == SHIFT_LEFT) {
3265 ++ /*
3266 ++ * Reserve blocks to cover potential extent merges after left
3267 ++ * shift operations.
3268 ++ */
3269 ++ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
3270 + next_fsb = XFS_B_TO_FSB(mp, offset + len);
3271 + stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
3272 + } else {
3273 +@@ -1404,6 +1404,7 @@ xfs_shift_file_space(
3274 + * If right shift, delegate the work of initialization of
3275 + * next_fsb to xfs_bmap_shift_extent as it has ilock held.
3276 + */
3277 ++ resblks = 0;
3278 + next_fsb = NULLFSBLOCK;
3279 + stop_fsb = XFS_B_TO_FSB(mp, offset);
3280 + }
3281 +@@ -1415,7 +1416,7 @@ xfs_shift_file_space(
3282 + * into the accessible region of the file.
3283 + */
3284 + if (xfs_can_free_eofblocks(ip, true)) {
3285 +- error = xfs_free_eofblocks(mp, ip, false);
3286 ++ error = xfs_free_eofblocks(ip);
3287 + if (error)
3288 + return error;
3289 + }
3290 +@@ -1445,21 +1446,14 @@ xfs_shift_file_space(
3291 + }
3292 +
3293 + while (!error && !done) {
3294 +- /*
3295 +- * We would need to reserve permanent block for transaction.
3296 +- * This will come into picture when after shifting extent into
3297 +- * hole we found that adjacent extents can be merged which
3298 +- * may lead to freeing of a block during record update.
3299 +- */
3300 +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
3301 +- XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
3302 ++ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
3303 ++ &tp);
3304 + if (error)
3305 + break;
3306 +
3307 + xfs_ilock(ip, XFS_ILOCK_EXCL);
3308 + error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
3309 +- ip->i_gdquot, ip->i_pdquot,
3310 +- XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
3311 ++ ip->i_gdquot, ip->i_pdquot, resblks, 0,
3312 + XFS_QMOPT_RES_REGBLKS);
3313 + if (error)
3314 + goto out_trans_cancel;
3315 +diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
3316 +index 68a621a8e0c0..f1005393785c 100644
3317 +--- a/fs/xfs/xfs_bmap_util.h
3318 ++++ b/fs/xfs/xfs_bmap_util.h
3319 +@@ -63,8 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
3320 +
3321 + /* EOF block manipulation functions */
3322 + bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
3323 +-int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
3324 +- bool need_iolock);
3325 ++int xfs_free_eofblocks(struct xfs_inode *ip);
3326 +
3327 + int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
3328 + struct xfs_swapext *sx);
3329 +diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
3330 +index 2975cb2319f4..0306168af332 100644
3331 +--- a/fs/xfs/xfs_buf_item.c
3332 ++++ b/fs/xfs/xfs_buf_item.c
3333 +@@ -1162,6 +1162,7 @@ xfs_buf_iodone_callbacks(
3334 + */
3335 + bp->b_last_error = 0;
3336 + bp->b_retries = 0;
3337 ++ bp->b_first_retry_time = 0;
3338 +
3339 + xfs_buf_do_callbacks(bp);
3340 + bp->b_fspriv = NULL;
3341 +diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
3342 +index 162dc186cf04..29c2f997aedf 100644
3343 +--- a/fs/xfs/xfs_extent_busy.c
3344 ++++ b/fs/xfs/xfs_extent_busy.c
3345 +@@ -45,18 +45,7 @@ xfs_extent_busy_insert(
3346 + struct rb_node **rbp;
3347 + struct rb_node *parent = NULL;
3348 +
3349 +- new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL);
3350 +- if (!new) {
3351 +- /*
3352 +- * No Memory! Since it is now not possible to track the free
3353 +- * block, make this a synchronous transaction to insure that
3354 +- * the block is not reused before this transaction commits.
3355 +- */
3356 +- trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len);
3357 +- xfs_trans_set_sync(tp);
3358 +- return;
3359 +- }
3360 +-
3361 ++ new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_SLEEP);
3362 + new->agno = agno;
3363 + new->bno = bno;
3364 + new->length = len;
3365 +diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
3366 +index bbb9eb6811b2..2a695a8f4fe7 100644
3367 +--- a/fs/xfs/xfs_file.c
3368 ++++ b/fs/xfs/xfs_file.c
3369 +@@ -527,6 +527,15 @@ xfs_file_dio_aio_write(
3370 + if ((iocb->ki_pos & mp->m_blockmask) ||
3371 + ((iocb->ki_pos + count) & mp->m_blockmask)) {
3372 + unaligned_io = 1;
3373 ++
3374 ++ /*
3375 ++ * We can't properly handle unaligned direct I/O to reflink
3376 ++ * files yet, as we can't unshare a partial block.
3377 ++ */
3378 ++ if (xfs_is_reflink_inode(ip)) {
3379 ++ trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count);
3380 ++ return -EREMCHG;
3381 ++ }
3382 + iolock = XFS_IOLOCK_EXCL;
3383 + } else {
3384 + iolock = XFS_IOLOCK_SHARED;
3385 +@@ -614,8 +623,10 @@ xfs_file_buffered_aio_write(
3386 + struct xfs_inode *ip = XFS_I(inode);
3387 + ssize_t ret;
3388 + int enospc = 0;
3389 +- int iolock = XFS_IOLOCK_EXCL;
3390 ++ int iolock;
3391 +
3392 ++write_retry:
3393 ++ iolock = XFS_IOLOCK_EXCL;
3394 + xfs_ilock(ip, iolock);
3395 +
3396 + ret = xfs_file_aio_write_checks(iocb, from, &iolock);
3397 +@@ -625,7 +636,6 @@ xfs_file_buffered_aio_write(
3398 + /* We can write back this queue in page reclaim */
3399 + current->backing_dev_info = inode_to_bdi(inode);
3400 +
3401 +-write_retry:
3402 + trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
3403 + ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
3404 + if (likely(ret >= 0))
3405 +@@ -641,18 +651,21 @@ xfs_file_buffered_aio_write(
3406 + * running at the same time.
3407 + */
3408 + if (ret == -EDQUOT && !enospc) {
3409 ++ xfs_iunlock(ip, iolock);
3410 + enospc = xfs_inode_free_quota_eofblocks(ip);
3411 + if (enospc)
3412 + goto write_retry;
3413 + enospc = xfs_inode_free_quota_cowblocks(ip);
3414 + if (enospc)
3415 + goto write_retry;
3416 ++ iolock = 0;
3417 + } else if (ret == -ENOSPC && !enospc) {
3418 + struct xfs_eofblocks eofb = {0};
3419 +
3420 + enospc = 1;
3421 + xfs_flush_inodes(ip->i_mount);
3422 +- eofb.eof_scan_owner = ip->i_ino; /* for locking */
3423 ++
3424 ++ xfs_iunlock(ip, iolock);
3425 + eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
3426 + xfs_icache_free_eofblocks(ip->i_mount, &eofb);
3427 + goto write_retry;
3428 +@@ -660,7 +673,8 @@ xfs_file_buffered_aio_write(
3429 +
3430 + current->backing_dev_info = NULL;
3431 + out:
3432 +- xfs_iunlock(ip, iolock);
3433 ++ if (iolock)
3434 ++ xfs_iunlock(ip, iolock);
3435 + return ret;
3436 + }
3437 +
3438 +@@ -908,9 +922,9 @@ xfs_dir_open(
3439 + */
3440 + mode = xfs_ilock_data_map_shared(ip);
3441 + if (ip->i_d.di_nextents > 0)
3442 +- xfs_dir3_data_readahead(ip, 0, -1);
3443 ++ error = xfs_dir3_data_readahead(ip, 0, -1);
3444 + xfs_iunlock(ip, mode);
3445 +- return 0;
3446 ++ return error;
3447 + }
3448 +
3449 + STATIC int
3450 +diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
3451 +index 70ca4f608321..3531f8f72fa5 100644
3452 +--- a/fs/xfs/xfs_icache.c
3453 ++++ b/fs/xfs/xfs_icache.c
3454 +@@ -1322,13 +1322,10 @@ xfs_inode_free_eofblocks(
3455 + int flags,
3456 + void *args)
3457 + {
3458 +- int ret;
3459 ++ int ret = 0;
3460 + struct xfs_eofblocks *eofb = args;
3461 +- bool need_iolock = true;
3462 + int match;
3463 +
3464 +- ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
3465 +-
3466 + if (!xfs_can_free_eofblocks(ip, false)) {
3467 + /* inode could be preallocated or append-only */
3468 + trace_xfs_inode_free_eofblocks_invalid(ip);
3469 +@@ -1356,21 +1353,19 @@ xfs_inode_free_eofblocks(
3470 + if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
3471 + XFS_ISIZE(ip) < eofb->eof_min_file_size)
3472 + return 0;
3473 +-
3474 +- /*
3475 +- * A scan owner implies we already hold the iolock. Skip it in
3476 +- * xfs_free_eofblocks() to avoid deadlock. This also eliminates
3477 +- * the possibility of EAGAIN being returned.
3478 +- */
3479 +- if (eofb->eof_scan_owner == ip->i_ino)
3480 +- need_iolock = false;
3481 + }
3482 +
3483 +- ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock);
3484 +-
3485 +- /* don't revisit the inode if we're not waiting */
3486 +- if (ret == -EAGAIN && !(flags & SYNC_WAIT))
3487 +- ret = 0;
3488 ++ /*
3489 ++ * If the caller is waiting, return -EAGAIN to keep the background
3490 ++ * scanner moving and revisit the inode in a subsequent pass.
3491 ++ */
3492 ++ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
3493 ++ if (flags & SYNC_WAIT)
3494 ++ ret = -EAGAIN;
3495 ++ return ret;
3496 ++ }
3497 ++ ret = xfs_free_eofblocks(ip);
3498 ++ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
3499 +
3500 + return ret;
3501 + }
3502 +@@ -1417,15 +1412,10 @@ __xfs_inode_free_quota_eofblocks(
3503 + struct xfs_eofblocks eofb = {0};
3504 + struct xfs_dquot *dq;
3505 +
3506 +- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
3507 +-
3508 + /*
3509 +- * Set the scan owner to avoid a potential livelock. Otherwise, the scan
3510 +- * can repeatedly trylock on the inode we're currently processing. We
3511 +- * run a sync scan to increase effectiveness and use the union filter to
3512 ++ * Run a sync scan to increase effectiveness and use the union filter to
3513 + * cover all applicable quotas in a single scan.
3514 + */
3515 +- eofb.eof_scan_owner = ip->i_ino;
3516 + eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC;
3517 +
3518 + if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
3519 +@@ -1577,12 +1567,9 @@ xfs_inode_free_cowblocks(
3520 + {
3521 + int ret;
3522 + struct xfs_eofblocks *eofb = args;
3523 +- bool need_iolock = true;
3524 + int match;
3525 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
3526 +
3527 +- ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
3528 +-
3529 + /*
3530 + * Just clear the tag if we have an empty cow fork or none at all. It's
3531 + * possible the inode was fully unshared since it was originally tagged.
3532 +@@ -1615,28 +1602,16 @@ xfs_inode_free_cowblocks(
3533 + if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
3534 + XFS_ISIZE(ip) < eofb->eof_min_file_size)
3535 + return 0;
3536 +-
3537 +- /*
3538 +- * A scan owner implies we already hold the iolock. Skip it in
3539 +- * xfs_free_eofblocks() to avoid deadlock. This also eliminates
3540 +- * the possibility of EAGAIN being returned.
3541 +- */
3542 +- if (eofb->eof_scan_owner == ip->i_ino)
3543 +- need_iolock = false;
3544 + }
3545 +
3546 + /* Free the CoW blocks */
3547 +- if (need_iolock) {
3548 +- xfs_ilock(ip, XFS_IOLOCK_EXCL);
3549 +- xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
3550 +- }
3551 ++ xfs_ilock(ip, XFS_IOLOCK_EXCL);
3552 ++ xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
3553 +
3554 +- ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
3555 ++ ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
3556 +
3557 +- if (need_iolock) {
3558 +- xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
3559 +- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
3560 +- }
3561 ++ xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
3562 ++ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
3563 +
3564 + return ret;
3565 + }
3566 +diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
3567 +index a1e02f4708ab..8a7c849b4dea 100644
3568 +--- a/fs/xfs/xfs_icache.h
3569 ++++ b/fs/xfs/xfs_icache.h
3570 +@@ -27,7 +27,6 @@ struct xfs_eofblocks {
3571 + kgid_t eof_gid;
3572 + prid_t eof_prid;
3573 + __u64 eof_min_file_size;
3574 +- xfs_ino_t eof_scan_owner;
3575 + };
3576 +
3577 + #define SYNC_WAIT 0x0001 /* wait for i/o to complete */
3578 +@@ -102,7 +101,6 @@ xfs_fs_eofblocks_from_user(
3579 + dst->eof_flags = src->eof_flags;
3580 + dst->eof_prid = src->eof_prid;
3581 + dst->eof_min_file_size = src->eof_min_file_size;
3582 +- dst->eof_scan_owner = NULLFSINO;
3583 +
3584 + dst->eof_uid = INVALID_UID;
3585 + if (src->eof_flags & XFS_EOF_FLAGS_UID) {
3586 +diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
3587 +index de32f0fe47c8..7eaf1ef74e3c 100644
3588 +--- a/fs/xfs/xfs_inode.c
3589 ++++ b/fs/xfs/xfs_inode.c
3590 +@@ -1615,7 +1615,7 @@ xfs_itruncate_extents(
3591 +
3592 + /* Remove all pending CoW reservations. */
3593 + error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block,
3594 +- last_block);
3595 ++ last_block, true);
3596 + if (error)
3597 + goto out;
3598 +
3599 +@@ -1692,32 +1692,34 @@ xfs_release(
3600 + if (xfs_can_free_eofblocks(ip, false)) {
3601 +
3602 + /*
3603 ++ * Check if the inode is being opened, written and closed
3604 ++ * frequently and we have delayed allocation blocks outstanding
3605 ++ * (e.g. streaming writes from the NFS server), truncating the
3606 ++ * blocks past EOF will cause fragmentation to occur.
3607 ++ *
3608 ++ * In this case don't do the truncation, but we have to be
3609 ++ * careful how we detect this case. Blocks beyond EOF show up as
3610 ++ * i_delayed_blks even when the inode is clean, so we need to
3611 ++ * truncate them away first before checking for a dirty release.
3612 ++ * Hence on the first dirty close we will still remove the
3613 ++ * speculative allocation, but after that we will leave it in
3614 ++ * place.
3615 ++ */
3616 ++ if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
3617 ++ return 0;
3618 ++ /*
3619 + * If we can't get the iolock just skip truncating the blocks
3620 + * past EOF because we could deadlock with the mmap_sem
3621 +- * otherwise. We'll get another chance to drop them once the
3622 ++ * otherwise. We'll get another chance to drop them once the
3623 + * last reference to the inode is dropped, so we'll never leak
3624 + * blocks permanently.
3625 +- *
3626 +- * Further, check if the inode is being opened, written and
3627 +- * closed frequently and we have delayed allocation blocks
3628 +- * outstanding (e.g. streaming writes from the NFS server),
3629 +- * truncating the blocks past EOF will cause fragmentation to
3630 +- * occur.
3631 +- *
3632 +- * In this case don't do the truncation, either, but we have to
3633 +- * be careful how we detect this case. Blocks beyond EOF show
3634 +- * up as i_delayed_blks even when the inode is clean, so we
3635 +- * need to truncate them away first before checking for a dirty
3636 +- * release. Hence on the first dirty close we will still remove
3637 +- * the speculative allocation, but after that we will leave it
3638 +- * in place.
3639 + */
3640 +- if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
3641 +- return 0;
3642 +-
3643 +- error = xfs_free_eofblocks(mp, ip, true);
3644 +- if (error && error != -EAGAIN)
3645 +- return error;
3646 ++ if (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
3647 ++ error = xfs_free_eofblocks(ip);
3648 ++ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
3649 ++ if (error)
3650 ++ return error;
3651 ++ }
3652 +
3653 + /* delalloc blocks after truncation means it really is dirty */
3654 + if (ip->i_delayed_blks)
3655 +@@ -1904,8 +1906,11 @@ xfs_inactive(
3656 + * cache. Post-eof blocks must be freed, lest we end up with
3657 + * broken free space accounting.
3658 + */
3659 +- if (xfs_can_free_eofblocks(ip, true))
3660 +- xfs_free_eofblocks(mp, ip, false);
3661 ++ if (xfs_can_free_eofblocks(ip, true)) {
3662 ++ xfs_ilock(ip, XFS_IOLOCK_EXCL);
3663 ++ xfs_free_eofblocks(ip);
3664 ++ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
3665 ++ }
3666 +
3667 + return;
3668 + }
3669 +diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
3670 +index fdecf79d2fa4..2326a6913fde 100644
3671 +--- a/fs/xfs/xfs_iomap.c
3672 ++++ b/fs/xfs/xfs_iomap.c
3673 +@@ -637,6 +637,11 @@ xfs_file_iomap_begin_delay(
3674 + goto out_unlock;
3675 + }
3676 +
3677 ++ /*
3678 ++ * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
3679 ++ * them out if the write happens to fail.
3680 ++ */
3681 ++ iomap->flags = IOMAP_F_NEW;
3682 + trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
3683 + done:
3684 + if (isnullstartblock(got.br_startblock))
3685 +@@ -685,7 +690,7 @@ xfs_iomap_write_allocate(
3686 + int nres;
3687 +
3688 + if (whichfork == XFS_COW_FORK)
3689 +- flags |= XFS_BMAPI_COWFORK;
3690 ++ flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
3691 +
3692 + /*
3693 + * Make sure that the dquots are there.
3694 +@@ -1026,17 +1031,7 @@ xfs_file_iomap_begin(
3695 + if (error)
3696 + goto out_unlock;
3697 +
3698 +- /*
3699 +- * We're here because we're trying to do a directio write to a
3700 +- * region that isn't aligned to a filesystem block. If the
3701 +- * extent is shared, fall back to buffered mode to handle the
3702 +- * RMW.
3703 +- */
3704 +- if (!(flags & IOMAP_REPORT) && shared) {
3705 +- trace_xfs_reflink_bounce_dio_write(ip, &imap);
3706 +- error = -EREMCHG;
3707 +- goto out_unlock;
3708 +- }
3709 ++ ASSERT((flags & IOMAP_REPORT) || !shared);
3710 + }
3711 +
3712 + if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
3713 +@@ -1095,7 +1090,8 @@ xfs_file_iomap_end_delalloc(
3714 + struct xfs_inode *ip,
3715 + loff_t offset,
3716 + loff_t length,
3717 +- ssize_t written)
3718 ++ ssize_t written,
3719 ++ struct iomap *iomap)
3720 + {
3721 + struct xfs_mount *mp = ip->i_mount;
3722 + xfs_fileoff_t start_fsb;
3723 +@@ -1114,14 +1110,14 @@ xfs_file_iomap_end_delalloc(
3724 + end_fsb = XFS_B_TO_FSB(mp, offset + length);
3725 +
3726 + /*
3727 +- * Trim back delalloc blocks if we didn't manage to write the whole
3728 +- * range reserved.
3729 ++ * Trim delalloc blocks if they were allocated by this write and we
3730 ++ * didn't manage to write the whole range.
3731 + *
3732 + * We don't need to care about racing delalloc as we hold i_mutex
3733 + * across the reserve/allocate/unreserve calls. If there are delalloc
3734 + * blocks in the range, they are ours.
3735 + */
3736 +- if (start_fsb < end_fsb) {
3737 ++ if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
3738 + truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
3739 + XFS_FSB_TO_B(mp, end_fsb) - 1);
3740 +
3741 +@@ -1151,7 +1147,7 @@ xfs_file_iomap_end(
3742 + {
3743 + if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
3744 + return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
3745 +- length, written);
3746 ++ length, written, iomap);
3747 + return 0;
3748 + }
3749 +
3750 +diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
3751 +index 9b9540db17a6..52d27cc4370a 100644
3752 +--- a/fs/xfs/xfs_mount.c
3753 ++++ b/fs/xfs/xfs_mount.c
3754 +@@ -187,7 +187,7 @@ xfs_initialize_perag(
3755 + xfs_agnumber_t *maxagi)
3756 + {
3757 + xfs_agnumber_t index;
3758 +- xfs_agnumber_t first_initialised = 0;
3759 ++ xfs_agnumber_t first_initialised = NULLAGNUMBER;
3760 + xfs_perag_t *pag;
3761 + int error = -ENOMEM;
3762 +
3763 +@@ -202,22 +202,20 @@ xfs_initialize_perag(
3764 + xfs_perag_put(pag);
3765 + continue;
3766 + }
3767 +- if (!first_initialised)
3768 +- first_initialised = index;
3769 +
3770 + pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
3771 + if (!pag)
3772 +- goto out_unwind;
3773 ++ goto out_unwind_new_pags;
3774 + pag->pag_agno = index;
3775 + pag->pag_mount = mp;
3776 + spin_lock_init(&pag->pag_ici_lock);
3777 + mutex_init(&pag->pag_ici_reclaim_lock);
3778 + INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
3779 + if (xfs_buf_hash_init(pag))
3780 +- goto out_unwind;
3781 ++ goto out_free_pag;
3782 +
3783 + if (radix_tree_preload(GFP_NOFS))
3784 +- goto out_unwind;
3785 ++ goto out_hash_destroy;
3786 +
3787 + spin_lock(&mp->m_perag_lock);
3788 + if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
3789 +@@ -225,10 +223,13 @@ xfs_initialize_perag(
3790 + spin_unlock(&mp->m_perag_lock);
3791 + radix_tree_preload_end();
3792 + error = -EEXIST;
3793 +- goto out_unwind;
3794 ++ goto out_hash_destroy;
3795 + }
3796 + spin_unlock(&mp->m_perag_lock);
3797 + radix_tree_preload_end();
3798 ++ /* first new pag is fully initialized */
3799 ++ if (first_initialised == NULLAGNUMBER)
3800 ++ first_initialised = index;
3801 + }
3802 +
3803 + index = xfs_set_inode_alloc(mp, agcount);
3804 +@@ -239,11 +240,16 @@ xfs_initialize_perag(
3805 + mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
3806 + return 0;
3807 +
3808 +-out_unwind:
3809 ++out_hash_destroy:
3810 + xfs_buf_hash_destroy(pag);
3811 ++out_free_pag:
3812 + kmem_free(pag);
3813 +- for (; index > first_initialised; index--) {
3814 ++out_unwind_new_pags:
3815 ++ /* unwind any prior newly initialized pags */
3816 ++ for (index = first_initialised; index < agcount; index++) {
3817 + pag = radix_tree_delete(&mp->m_perag_tree, index);
3818 ++ if (!pag)
3819 ++ break;
3820 + xfs_buf_hash_destroy(pag);
3821 + kmem_free(pag);
3822 + }
3823 +@@ -505,8 +511,7 @@ STATIC void
3824 + xfs_set_inoalignment(xfs_mount_t *mp)
3825 + {
3826 + if (xfs_sb_version_hasalign(&mp->m_sb) &&
3827 +- mp->m_sb.sb_inoalignmt >=
3828 +- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
3829 ++ mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
3830 + mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
3831 + else
3832 + mp->m_inoalign_mask = 0;
3833 +diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
3834 +index 07593a362cd0..a72cd2e3c048 100644
3835 +--- a/fs/xfs/xfs_reflink.c
3836 ++++ b/fs/xfs/xfs_reflink.c
3837 +@@ -82,11 +82,22 @@
3838 + * mappings are a reservation against the free space in the filesystem;
3839 + * adjacent mappings can also be combined into fewer larger mappings.
3840 + *
3841 ++ * As an optimization, the CoW extent size hint (cowextsz) creates
3842 ++ * outsized aligned delalloc reservations in the hope of landing out of
3843 ++ * order nearby CoW writes in a single extent on disk, thereby reducing
3844 ++ * fragmentation and improving future performance.
3845 ++ *
3846 ++ * D: --RRRRRRSSSRRRRRRRR--- (data fork)
3847 ++ * C: ------DDDDDDD--------- (CoW fork)
3848 ++ *
3849 + * When dirty pages are being written out (typically in writepage), the
3850 +- * delalloc reservations are converted into real mappings by allocating
3851 +- * blocks and replacing the delalloc mapping with real ones. A delalloc
3852 +- * mapping can be replaced by several real ones if the free space is
3853 +- * fragmented.
3854 ++ * delalloc reservations are converted into unwritten mappings by
3855 ++ * allocating blocks and replacing the delalloc mapping with real ones.
3856 ++ * A delalloc mapping can be replaced by several unwritten ones if the
3857 ++ * free space is fragmented.
3858 ++ *
3859 ++ * D: --RRRRRRSSSRRRRRRRR---
3860 ++ * C: ------UUUUUUU---------
3861 + *
3862 + * We want to adapt the delalloc mechanism for copy-on-write, since the
3863 + * write paths are similar. The first two steps (creating the reservation
3864 +@@ -101,13 +112,29 @@
3865 + * Block-aligned directio writes will use the same mechanism as buffered
3866 + * writes.
3867 + *
3868 ++ * Just prior to submitting the actual disk write requests, we convert
3869 ++ * the extents representing the range of the file actually being written
3870 ++ * (as opposed to extra pieces created for the cowextsize hint) to real
3871 ++ * extents. This will become important in the next step:
3872 ++ *
3873 ++ * D: --RRRRRRSSSRRRRRRRR---
3874 ++ * C: ------UUrrUUU---------
3875 ++ *
3876 + * CoW remapping must be done after the data block write completes,
3877 + * because we don't want to destroy the old data fork map until we're sure
3878 + * the new block has been written. Since the new mappings are kept in a
3879 + * separate fork, we can simply iterate these mappings to find the ones
3880 + * that cover the file blocks that we just CoW'd. For each extent, simply
3881 + * unmap the corresponding range in the data fork, map the new range into
3882 +- * the data fork, and remove the extent from the CoW fork.
3883 ++ * the data fork, and remove the extent from the CoW fork. Because of
3884 ++ * the presence of the cowextsize hint, however, we must be careful
3885 ++ * only to remap the blocks that we've actually written out -- we must
3886 ++ * never remap delalloc reservations nor CoW staging blocks that have
3887 ++ * yet to be written. This corresponds exactly to the real extents in
3888 ++ * the CoW fork:
3889 ++ *
3890 ++ * D: --RRRRRRrrSRRRRRRRR---
3891 ++ * C: ------UU--UUU---------
3892 + *
3893 + * Since the remapping operation can be applied to an arbitrary file
3894 + * range, we record the need for the remap step as a flag in the ioend
3895 +@@ -296,6 +323,65 @@ xfs_reflink_reserve_cow(
3896 + return 0;
3897 + }
3898 +
3899 ++/* Convert part of an unwritten CoW extent to a real one. */
3900 ++STATIC int
3901 ++xfs_reflink_convert_cow_extent(
3902 ++ struct xfs_inode *ip,
3903 ++ struct xfs_bmbt_irec *imap,
3904 ++ xfs_fileoff_t offset_fsb,
3905 ++ xfs_filblks_t count_fsb,
3906 ++ struct xfs_defer_ops *dfops)
3907 ++{
3908 ++ struct xfs_bmbt_irec irec = *imap;
3909 ++ xfs_fsblock_t first_block;
3910 ++ int nimaps = 1;
3911 ++
3912 ++ if (imap->br_state == XFS_EXT_NORM)
3913 ++ return 0;
3914 ++
3915 ++ xfs_trim_extent(&irec, offset_fsb, count_fsb);
3916 ++ trace_xfs_reflink_convert_cow(ip, &irec);
3917 ++ if (irec.br_blockcount == 0)
3918 ++ return 0;
3919 ++ return xfs_bmapi_write(NULL, ip, irec.br_startoff, irec.br_blockcount,
3920 ++ XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block,
3921 ++ 0, &irec, &nimaps, dfops);
3922 ++}
3923 ++
3924 ++/* Convert all of the unwritten CoW extents in a file's range to real ones. */
3925 ++int
3926 ++xfs_reflink_convert_cow(
3927 ++ struct xfs_inode *ip,
3928 ++ xfs_off_t offset,
3929 ++ xfs_off_t count)
3930 ++{
3931 ++ struct xfs_bmbt_irec got;
3932 ++ struct xfs_defer_ops dfops;
3933 ++ struct xfs_mount *mp = ip->i_mount;
3934 ++ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
3935 ++ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
3936 ++ xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
3937 ++ xfs_extnum_t idx;
3938 ++ bool found;
3939 ++ int error = 0;
3940 ++
3941 ++ xfs_ilock(ip, XFS_ILOCK_EXCL);
3942 ++
3943 ++ /* Convert all the extents to real from unwritten. */
3944 ++ for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got);
3945 ++ found && got.br_startoff < end_fsb;
3946 ++ found = xfs_iext_get_extent(ifp, ++idx, &got)) {
3947 ++ error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb,
3948 ++ end_fsb - offset_fsb, &dfops);
3949 ++ if (error)
3950 ++ break;
3951 ++ }
3952 ++
3953 ++ /* Finish up. */
3954 ++ xfs_iunlock(ip, XFS_ILOCK_EXCL);
3955 ++ return error;
3956 ++}
3957 ++
3958 + /* Allocate all CoW reservations covering a range of blocks in a file. */
3959 + static int
3960 + __xfs_reflink_allocate_cow(
3961 +@@ -328,6 +414,7 @@ __xfs_reflink_allocate_cow(
3962 + goto out_unlock;
3963 + ASSERT(nimaps == 1);
3964 +
3965 ++ /* Make sure there's a CoW reservation for it. */
3966 + error = xfs_reflink_reserve_cow(ip, &imap, &shared);
3967 + if (error)
3968 + goto out_trans_cancel;
3969 +@@ -337,14 +424,16 @@ __xfs_reflink_allocate_cow(
3970 + goto out_trans_cancel;
3971 + }
3972 +
3973 ++ /* Allocate the entire reservation as unwritten blocks. */
3974 + xfs_trans_ijoin(tp, ip, 0);
3975 + error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
3976 +- XFS_BMAPI_COWFORK, &first_block,
3977 ++ XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
3978 + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
3979 + &imap, &nimaps, &dfops);
3980 + if (error)
3981 + goto out_trans_cancel;
3982 +
3983 ++ /* Finish up. */
3984 + error = xfs_defer_finish(&tp, &dfops, NULL);
3985 + if (error)
3986 + goto out_trans_cancel;
3987 +@@ -389,11 +478,12 @@ xfs_reflink_allocate_cow_range(
3988 + if (error) {
3989 + trace_xfs_reflink_allocate_cow_range_error(ip, error,
3990 + _RET_IP_);
3991 +- break;
3992 ++ return error;
3993 + }
3994 + }
3995 +
3996 +- return error;
3997 ++ /* Convert the CoW extents to regular. */
3998 ++ return xfs_reflink_convert_cow(ip, offset, count);
3999 + }
4000 +
4001 + /*
4002 +@@ -459,14 +549,18 @@ xfs_reflink_trim_irec_to_next_cow(
4003 + }
4004 +
4005 + /*
4006 +- * Cancel all pending CoW reservations for some block range of an inode.
4007 ++ * Cancel CoW reservations for some block range of an inode.
4008 ++ *
4009 ++ * If cancel_real is true this function cancels all COW fork extents for the
4010 ++ * inode; if cancel_real is false, real extents are not cleared.
4011 + */
4012 + int
4013 + xfs_reflink_cancel_cow_blocks(
4014 + struct xfs_inode *ip,
4015 + struct xfs_trans **tpp,
4016 + xfs_fileoff_t offset_fsb,
4017 +- xfs_fileoff_t end_fsb)
4018 ++ xfs_fileoff_t end_fsb,
4019 ++ bool cancel_real)
4020 + {
4021 + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
4022 + struct xfs_bmbt_irec got, del;
4023 +@@ -490,7 +584,7 @@ xfs_reflink_cancel_cow_blocks(
4024 + &idx, &got, &del);
4025 + if (error)
4026 + break;
4027 +- } else {
4028 ++ } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
4029 + xfs_trans_ijoin(*tpp, ip, 0);
4030 + xfs_defer_init(&dfops, &firstfsb);
4031 +
4032 +@@ -532,13 +626,17 @@ xfs_reflink_cancel_cow_blocks(
4033 + }
4034 +
4035 + /*
4036 +- * Cancel all pending CoW reservations for some byte range of an inode.
4037 ++ * Cancel CoW reservations for some byte range of an inode.
4038 ++ *
4039 ++ * If cancel_real is true this function cancels all COW fork extents for the
4040 ++ * inode; if cancel_real is false, real extents are not cleared.
4041 + */
4042 + int
4043 + xfs_reflink_cancel_cow_range(
4044 + struct xfs_inode *ip,
4045 + xfs_off_t offset,
4046 +- xfs_off_t count)
4047 ++ xfs_off_t count,
4048 ++ bool cancel_real)
4049 + {
4050 + struct xfs_trans *tp;
4051 + xfs_fileoff_t offset_fsb;
4052 +@@ -564,7 +662,8 @@ xfs_reflink_cancel_cow_range(
4053 + xfs_trans_ijoin(tp, ip, 0);
4054 +
4055 + /* Scrape out the old CoW reservations */
4056 +- error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb);
4057 ++ error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb,
4058 ++ cancel_real);
4059 + if (error)
4060 + goto out_cancel;
4061 +
4062 +@@ -641,6 +740,16 @@ xfs_reflink_end_cow(
4063 +
4064 + ASSERT(!isnullstartblock(got.br_startblock));
4065 +
4066 ++ /*
4067 ++ * Don't remap unwritten extents; these are
4068 ++ * speculatively preallocated CoW extents that have been
4069 ++ * allocated but have not yet been involved in a write.
4070 ++ */
4071 ++ if (got.br_state == XFS_EXT_UNWRITTEN) {
4072 ++ idx--;
4073 ++ goto next_extent;
4074 ++ }
4075 ++
4076 + /* Unmap the old blocks in the data fork. */
4077 + xfs_defer_init(&dfops, &firstfsb);
4078 + rlen = del.br_blockcount;
4079 +@@ -855,13 +964,14 @@ STATIC int
4080 + xfs_reflink_update_dest(
4081 + struct xfs_inode *dest,
4082 + xfs_off_t newlen,
4083 +- xfs_extlen_t cowextsize)
4084 ++ xfs_extlen_t cowextsize,
4085 ++ bool is_dedupe)
4086 + {
4087 + struct xfs_mount *mp = dest->i_mount;
4088 + struct xfs_trans *tp;
4089 + int error;
4090 +
4091 +- if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
4092 ++ if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
4093 + return 0;
4094 +
4095 + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
4096 +@@ -882,6 +992,10 @@ xfs_reflink_update_dest(
4097 + dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
4098 + }
4099 +
4100 ++ if (!is_dedupe) {
4101 ++ xfs_trans_ichgtime(tp, dest,
4102 ++ XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
4103 ++ }
4104 + xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
4105 +
4106 + error = xfs_trans_commit(tp);
4107 +@@ -1195,7 +1309,8 @@ xfs_reflink_remap_range(
4108 + !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
4109 + cowextsize = src->i_d.di_cowextsize;
4110 +
4111 +- ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize);
4112 ++ ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
4113 ++ is_dedupe);
4114 +
4115 + out_unlock:
4116 + xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
4117 +@@ -1345,7 +1460,7 @@ xfs_reflink_clear_inode_flag(
4118 + * We didn't find any shared blocks so turn off the reflink flag.
4119 + * First, get rid of any leftover CoW mappings.
4120 + */
4121 +- error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF);
4122 ++ error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
4123 + if (error)
4124 + return error;
4125 +
4126 +diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
4127 +index aa6a4d64bd35..b715bacb2ea2 100644
4128 +--- a/fs/xfs/xfs_reflink.h
4129 ++++ b/fs/xfs/xfs_reflink.h
4130 +@@ -30,6 +30,8 @@ extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
4131 + struct xfs_bmbt_irec *imap, bool *shared);
4132 + extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
4133 + xfs_off_t offset, xfs_off_t count);
4134 ++extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
4135 ++ xfs_off_t count);
4136 + extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
4137 + struct xfs_bmbt_irec *imap);
4138 + extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
4139 +@@ -37,9 +39,9 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
4140 +
4141 + extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
4142 + struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
4143 +- xfs_fileoff_t end_fsb);
4144 ++ xfs_fileoff_t end_fsb, bool cancel_real);
4145 + extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
4146 +- xfs_off_t count);
4147 ++ xfs_off_t count, bool cancel_real);
4148 + extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
4149 + xfs_off_t count);
4150 + extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
4151 +diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
4152 +index eecbaac08eba..d80187b0e726 100644
4153 +--- a/fs/xfs/xfs_super.c
4154 ++++ b/fs/xfs/xfs_super.c
4155 +@@ -953,7 +953,7 @@ xfs_fs_destroy_inode(
4156 + XFS_STATS_INC(ip->i_mount, vn_remove);
4157 +
4158 + if (xfs_is_reflink_inode(ip)) {
4159 +- error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
4160 ++ error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
4161 + if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))
4162 + xfs_warn(ip->i_mount,
4163 + "Error %d while evicting CoW blocks for inode %llu.",
4164 +diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
4165 +index 69c5bcd9a51b..375c5e030e5b 100644
4166 +--- a/fs/xfs/xfs_trace.h
4167 ++++ b/fs/xfs/xfs_trace.h
4168 +@@ -3089,6 +3089,7 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class,
4169 + __field(xfs_fileoff_t, lblk)
4170 + __field(xfs_extlen_t, len)
4171 + __field(xfs_fsblock_t, pblk)
4172 ++ __field(int, state)
4173 + ),
4174 + TP_fast_assign(
4175 + __entry->dev = VFS_I(ip)->i_sb->s_dev;
4176 +@@ -3096,13 +3097,15 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class,
4177 + __entry->lblk = irec->br_startoff;
4178 + __entry->len = irec->br_blockcount;
4179 + __entry->pblk = irec->br_startblock;
4180 ++ __entry->state = irec->br_state;
4181 + ),
4182 +- TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu",
4183 ++ TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu st %d",
4184 + MAJOR(__entry->dev), MINOR(__entry->dev),
4185 + __entry->ino,
4186 + __entry->lblk,
4187 + __entry->len,
4188 +- __entry->pblk)
4189 ++ __entry->pblk,
4190 ++ __entry->state)
4191 + );
4192 + #define DEFINE_INODE_IREC_EVENT(name) \
4193 + DEFINE_EVENT(xfs_inode_irec_class, name, \
4194 +@@ -3242,11 +3245,12 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared);
4195 + DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
4196 + DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
4197 + DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
4198 ++DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow);
4199 +
4200 + DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
4201 + DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
4202 +
4203 +-DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
4204 ++DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write);
4205 + DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping);
4206 + DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec);
4207 +
4208 +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
4209 +index 1c5190dab2c1..e3d146dadceb 100644
4210 +--- a/include/linux/kvm_host.h
4211 ++++ b/include/linux/kvm_host.h
4212 +@@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
4213 + int len, void *val);
4214 + int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
4215 + int len, struct kvm_io_device *dev);
4216 +-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
4217 +- struct kvm_io_device *dev);
4218 ++void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
4219 ++ struct kvm_io_device *dev);
4220 + struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
4221 + gpa_t addr);
4222 +
4223 +diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
4224 +index 254698856b8f..8b35bdbdc214 100644
4225 +--- a/include/linux/memcontrol.h
4226 ++++ b/include/linux/memcontrol.h
4227 +@@ -739,6 +739,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
4228 + return false;
4229 + }
4230 +
4231 ++static inline void mem_cgroup_update_page_stat(struct page *page,
4232 ++ enum mem_cgroup_stat_index idx,
4233 ++ int nr)
4234 ++{
4235 ++}
4236 ++
4237 + static inline void mem_cgroup_inc_page_stat(struct page *page,
4238 + enum mem_cgroup_stat_index idx)
4239 + {
4240 +diff --git a/kernel/padata.c b/kernel/padata.c
4241 +index 05316c9f32da..3202aa17492c 100644
4242 +--- a/kernel/padata.c
4243 ++++ b/kernel/padata.c
4244 +@@ -186,19 +186,20 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
4245 +
4246 + reorder = &next_queue->reorder;
4247 +
4248 ++ spin_lock(&reorder->lock);
4249 + if (!list_empty(&reorder->list)) {
4250 + padata = list_entry(reorder->list.next,
4251 + struct padata_priv, list);
4252 +
4253 +- spin_lock(&reorder->lock);
4254 + list_del_init(&padata->list);
4255 + atomic_dec(&pd->reorder_objects);
4256 +- spin_unlock(&reorder->lock);
4257 +
4258 + pd->processed++;
4259 +
4260 ++ spin_unlock(&reorder->lock);
4261 + goto out;
4262 + }
4263 ++ spin_unlock(&reorder->lock);
4264 +
4265 + if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
4266 + padata = ERR_PTR(-ENODATA);
4267 +diff --git a/lib/syscall.c b/lib/syscall.c
4268 +index 63239e097b13..a72cd0996230 100644
4269 +--- a/lib/syscall.c
4270 ++++ b/lib/syscall.c
4271 +@@ -11,6 +11,7 @@ static int collect_syscall(struct task_struct *target, long *callno,
4272 +
4273 + if (!try_get_task_stack(target)) {
4274 + /* Task has no stack, so the task isn't in a syscall. */
4275 ++ *sp = *pc = 0;
4276 + *callno = -1;
4277 + return 0;
4278 + }
4279 +diff --git a/mm/hugetlb.c b/mm/hugetlb.c
4280 +index c7025c132670..968b547f3b90 100644
4281 +--- a/mm/hugetlb.c
4282 ++++ b/mm/hugetlb.c
4283 +@@ -4474,6 +4474,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
4284 + {
4285 + struct page *page = NULL;
4286 + spinlock_t *ptl;
4287 ++ pte_t pte;
4288 + retry:
4289 + ptl = pmd_lockptr(mm, pmd);
4290 + spin_lock(ptl);
4291 +@@ -4483,12 +4484,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
4292 + */
4293 + if (!pmd_huge(*pmd))
4294 + goto out;
4295 +- if (pmd_present(*pmd)) {
4296 ++ pte = huge_ptep_get((pte_t *)pmd);
4297 ++ if (pte_present(pte)) {
4298 + page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
4299 + if (flags & FOLL_GET)
4300 + get_page(page);
4301 + } else {
4302 +- if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) {
4303 ++ if (is_hugetlb_entry_migration(pte)) {
4304 + spin_unlock(ptl);
4305 + __migration_entry_wait(mm, (pte_t *)pmd, ptl);
4306 + goto retry;
4307 +diff --git a/mm/rmap.c b/mm/rmap.c
4308 +index 91619fd70939..a40d990eede0 100644
4309 +--- a/mm/rmap.c
4310 ++++ b/mm/rmap.c
4311 +@@ -1294,7 +1294,7 @@ void page_add_file_rmap(struct page *page, bool compound)
4312 + goto out;
4313 + }
4314 + __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr);
4315 +- mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
4316 ++ mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, nr);
4317 + out:
4318 + unlock_page_memcg(page);
4319 + }
4320 +@@ -1334,7 +1334,7 @@ static void page_remove_file_rmap(struct page *page, bool compound)
4321 + * pte lock(a spinlock) is held, which implies preemption disabled.
4322 + */
4323 + __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr);
4324 +- mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
4325 ++ mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, -nr);
4326 +
4327 + if (unlikely(PageMlocked(page)))
4328 + clear_page_mlock(page);
4329 +diff --git a/mm/workingset.c b/mm/workingset.c
4330 +index a67f5796b995..dda16cf9599f 100644
4331 +--- a/mm/workingset.c
4332 ++++ b/mm/workingset.c
4333 +@@ -533,7 +533,7 @@ static int __init workingset_init(void)
4334 + pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
4335 + timestamp_bits, max_order, bucket_order);
4336 +
4337 +- ret = list_lru_init_key(&shadow_nodes, &shadow_nodes_key);
4338 ++ ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key);
4339 + if (ret)
4340 + goto err;
4341 + ret = register_shrinker(&workingset_shadow_shrinker);
4342 +diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
4343 +index 770c52701efa..140b067d5d57 100644
4344 +--- a/net/ceph/messenger.c
4345 ++++ b/net/ceph/messenger.c
4346 +@@ -7,6 +7,7 @@
4347 + #include <linux/kthread.h>
4348 + #include <linux/net.h>
4349 + #include <linux/nsproxy.h>
4350 ++#include <linux/sched.h>
4351 + #include <linux/slab.h>
4352 + #include <linux/socket.h>
4353 + #include <linux/string.h>
4354 +@@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con)
4355 + {
4356 + struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
4357 + struct socket *sock;
4358 ++ unsigned int noio_flag;
4359 + int ret;
4360 +
4361 + BUG_ON(con->sock);
4362 ++
4363 ++ /* sock_create_kern() allocates with GFP_KERNEL */
4364 ++ noio_flag = memalloc_noio_save();
4365 + ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
4366 + SOCK_STREAM, IPPROTO_TCP, &sock);
4367 ++ memalloc_noio_restore(noio_flag);
4368 + if (ret)
4369 + return ret;
4370 + sock->sk->sk_allocation = GFP_NOFS;
4371 +diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
4372 +index 3f4efcb85df5..3490d21ab9e7 100644
4373 +--- a/sound/core/seq/seq_fifo.c
4374 ++++ b/sound/core/seq/seq_fifo.c
4375 +@@ -265,6 +265,10 @@ int snd_seq_fifo_resize(struct snd_seq_fifo *f, int poolsize)
4376 + /* NOTE: overflow flag is not cleared */
4377 + spin_unlock_irqrestore(&f->lock, flags);
4378 +
4379 ++ /* close the old pool and wait until all users are gone */
4380 ++ snd_seq_pool_mark_closing(oldpool);
4381 ++ snd_use_lock_sync(&f->use_lock);
4382 ++
4383 + /* release cells in old pool */
4384 + for (cell = oldhead; cell; cell = next) {
4385 + next = cell->next;
4386 +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
4387 +index c813ad857650..152c7ed65254 100644
4388 +--- a/sound/pci/hda/patch_realtek.c
4389 ++++ b/sound/pci/hda/patch_realtek.c
4390 +@@ -4846,6 +4846,7 @@ enum {
4391 + ALC292_FIXUP_DISABLE_AAMIX,
4392 + ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK,
4393 + ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
4394 ++ ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
4395 + ALC275_FIXUP_DELL_XPS,
4396 + ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
4397 + ALC293_FIXUP_LENOVO_SPK_NOISE,
4398 +@@ -5446,6 +5447,15 @@ static const struct hda_fixup alc269_fixups[] = {
4399 + .chained = true,
4400 + .chain_id = ALC269_FIXUP_HEADSET_MODE
4401 + },
4402 ++ [ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE] = {
4403 ++ .type = HDA_FIXUP_PINS,
4404 ++ .v.pins = (const struct hda_pintbl[]) {
4405 ++ { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */
4406 ++ { }
4407 ++ },
4408 ++ .chained = true,
4409 ++ .chain_id = ALC269_FIXUP_HEADSET_MODE
4410 ++ },
4411 + [ALC275_FIXUP_DELL_XPS] = {
4412 + .type = HDA_FIXUP_VERBS,
4413 + .v.verbs = (const struct hda_verb[]) {
4414 +@@ -5518,7 +5528,7 @@ static const struct hda_fixup alc269_fixups[] = {
4415 + .type = HDA_FIXUP_FUNC,
4416 + .v.func = alc298_fixup_speaker_volume,
4417 + .chained = true,
4418 +- .chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
4419 ++ .chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
4420 + },
4421 + [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = {
4422 + .type = HDA_FIXUP_PINS,
4423 +diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c
4424 +index 89ac5f5a93eb..7ae46c2647d4 100644
4425 +--- a/sound/soc/atmel/atmel-classd.c
4426 ++++ b/sound/soc/atmel/atmel-classd.c
4427 +@@ -349,7 +349,7 @@ static int atmel_classd_codec_dai_digital_mute(struct snd_soc_dai *codec_dai,
4428 + }
4429 +
4430 + #define CLASSD_ACLK_RATE_11M2896_MPY_8 (112896 * 100 * 8)
4431 +-#define CLASSD_ACLK_RATE_12M288_MPY_8 (12228 * 1000 * 8)
4432 ++#define CLASSD_ACLK_RATE_12M288_MPY_8 (12288 * 1000 * 8)
4433 +
4434 + static struct {
4435 + int rate;
4436 +diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c
4437 +index 324461e985b3..fe2cf1ed8237 100644
4438 +--- a/sound/soc/codecs/rt5665.c
4439 ++++ b/sound/soc/codecs/rt5665.c
4440 +@@ -1241,7 +1241,7 @@ static irqreturn_t rt5665_irq(int irq, void *data)
4441 + static void rt5665_jd_check_handler(struct work_struct *work)
4442 + {
4443 + struct rt5665_priv *rt5665 = container_of(work, struct rt5665_priv,
4444 +- calibrate_work.work);
4445 ++ jd_check_work.work);
4446 +
4447 + if (snd_soc_read(rt5665->codec, RT5665_AJD1_CTRL) & 0x0010) {
4448 + /* jack out */
4449 +diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
4450 +index bd313c907b20..172d7db1653c 100644
4451 +--- a/sound/soc/intel/skylake/skl-topology.c
4452 ++++ b/sound/soc/intel/skylake/skl-topology.c
4453 +@@ -486,7 +486,7 @@ static int skl_tplg_set_module_init_data(struct snd_soc_dapm_widget *w)
4454 + if (bc->set_params != SKL_PARAM_INIT)
4455 + continue;
4456 +
4457 +- mconfig->formats_config.caps = (u32 *)&bc->params;
4458 ++ mconfig->formats_config.caps = (u32 *)bc->params;
4459 + mconfig->formats_config.caps_size = bc->size;
4460 +
4461 + break;
4462 +diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
4463 +index a29786dd9522..4d28a9ddbee0 100644
4464 +--- a/virt/kvm/eventfd.c
4465 ++++ b/virt/kvm/eventfd.c
4466 +@@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
4467 + continue;
4468 +
4469 + kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
4470 +- kvm->buses[bus_idx]->ioeventfd_count--;
4471 ++ if (kvm->buses[bus_idx])
4472 ++ kvm->buses[bus_idx]->ioeventfd_count--;
4473 + ioeventfd_release(p);
4474 + ret = 0;
4475 + break;
4476 +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
4477 +index 482612b4e496..da5db473afb0 100644
4478 +--- a/virt/kvm/kvm_main.c
4479 ++++ b/virt/kvm/kvm_main.c
4480 +@@ -723,8 +723,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
4481 + list_del(&kvm->vm_list);
4482 + spin_unlock(&kvm_lock);
4483 + kvm_free_irq_routing(kvm);
4484 +- for (i = 0; i < KVM_NR_BUSES; i++)
4485 +- kvm_io_bus_destroy(kvm->buses[i]);
4486 ++ for (i = 0; i < KVM_NR_BUSES; i++) {
4487 ++ if (kvm->buses[i])
4488 ++ kvm_io_bus_destroy(kvm->buses[i]);
4489 ++ kvm->buses[i] = NULL;
4490 ++ }
4491 + kvm_coalesced_mmio_free(kvm);
4492 + #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
4493 + mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
4494 +@@ -3473,6 +3476,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
4495 + };
4496 +
4497 + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
4498 ++ if (!bus)
4499 ++ return -ENOMEM;
4500 + r = __kvm_io_bus_write(vcpu, bus, &range, val);
4501 + return r < 0 ? r : 0;
4502 + }
4503 +@@ -3490,6 +3495,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
4504 + };
4505 +
4506 + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
4507 ++ if (!bus)
4508 ++ return -ENOMEM;
4509 +
4510 + /* First try the device referenced by cookie. */
4511 + if ((cookie >= 0) && (cookie < bus->dev_count) &&
4512 +@@ -3540,6 +3547,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
4513 + };
4514 +
4515 + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
4516 ++ if (!bus)
4517 ++ return -ENOMEM;
4518 + r = __kvm_io_bus_read(vcpu, bus, &range, val);
4519 + return r < 0 ? r : 0;
4520 + }
4521 +@@ -3552,6 +3561,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
4522 + struct kvm_io_bus *new_bus, *bus;
4523 +
4524 + bus = kvm->buses[bus_idx];
4525 ++ if (!bus)
4526 ++ return -ENOMEM;
4527 ++
4528 + /* exclude ioeventfd which is limited by maximum fd */
4529 + if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
4530 + return -ENOSPC;
4531 +@@ -3571,37 +3583,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
4532 + }
4533 +
4534 + /* Caller must hold slots_lock. */
4535 +-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
4536 +- struct kvm_io_device *dev)
4537 ++void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
4538 ++ struct kvm_io_device *dev)
4539 + {
4540 +- int i, r;
4541 ++ int i;
4542 + struct kvm_io_bus *new_bus, *bus;
4543 +
4544 + bus = kvm->buses[bus_idx];
4545 +- r = -ENOENT;
4546 ++ if (!bus)
4547 ++ return;
4548 ++
4549 + for (i = 0; i < bus->dev_count; i++)
4550 + if (bus->range[i].dev == dev) {
4551 +- r = 0;
4552 + break;
4553 + }
4554 +
4555 +- if (r)
4556 +- return r;
4557 ++ if (i == bus->dev_count)
4558 ++ return;
4559 +
4560 + new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
4561 + sizeof(struct kvm_io_range)), GFP_KERNEL);
4562 +- if (!new_bus)
4563 +- return -ENOMEM;
4564 ++ if (!new_bus) {
4565 ++ pr_err("kvm: failed to shrink bus, removing it completely\n");
4566 ++ goto broken;
4567 ++ }
4568 +
4569 + memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
4570 + new_bus->dev_count--;
4571 + memcpy(new_bus->range + i, bus->range + i + 1,
4572 + (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
4573 +
4574 ++broken:
4575 + rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
4576 + synchronize_srcu_expedited(&kvm->srcu);
4577 + kfree(bus);
4578 +- return r;
4579 ++ return;
4580 + }
4581 +
4582 + struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
4583 +@@ -3614,6 +3630,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
4584 + srcu_idx = srcu_read_lock(&kvm->srcu);
4585 +
4586 + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
4587 ++ if (!bus)
4588 ++ goto out_unlock;
4589 +
4590 + dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1);
4591 + if (dev_idx < 0)