1 |
commit: 967c9f3fb4a6516ac6fdd36c4c8cf03b92724745 |
2 |
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
3 |
AuthorDate: Wed Aug 15 16:44:45 2018 +0000 |
4 |
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
5 |
CommitDate: Wed Aug 15 16:44:45 2018 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=967c9f3f |
7 |
|
8 |
Linux patch 4.4.148 |
9 |
|
10 |
0000_README | 4 + |
11 |
1147_linux-4.4.148.patch | 1873 ++++++++++++++++++++++++++++++++++++++++++++++ |
12 |
2 files changed, 1877 insertions(+) |
13 |
|
14 |
diff --git a/0000_README b/0000_README |
15 |
index 84033e9..99860c8 100644 |
16 |
--- a/0000_README |
17 |
+++ b/0000_README |
18 |
@@ -631,6 +631,10 @@ Patch: 1146_linux-4.4.147.patch |
19 |
From: http://www.kernel.org |
20 |
Desc: Linux 4.4.147 |
21 |
|
22 |
+Patch: 1147_linux-4.4.148.patch |
23 |
+From: http://www.kernel.org |
24 |
+Desc: Linux 4.4.148 |
25 |
+ |
26 |
Patch: 1500_XATTR_USER_PREFIX.patch |
27 |
From: https://bugs.gentoo.org/show_bug.cgi?id=470644 |
28 |
Desc: Support for namespace user.pax.* on tmpfs. |
29 |
|
30 |
diff --git a/1147_linux-4.4.148.patch b/1147_linux-4.4.148.patch |
31 |
new file mode 100644 |
32 |
index 0000000..ea24e41 |
33 |
--- /dev/null |
34 |
+++ b/1147_linux-4.4.148.patch |
35 |
@@ -0,0 +1,1873 @@ |
36 |
+diff --git a/Makefile b/Makefile |
37 |
+index ee92a12e3a4b..9b795164122e 100644 |
38 |
+--- a/Makefile |
39 |
++++ b/Makefile |
40 |
+@@ -1,6 +1,6 @@ |
41 |
+ VERSION = 4 |
42 |
+ PATCHLEVEL = 4 |
43 |
+-SUBLEVEL = 147 |
44 |
++SUBLEVEL = 148 |
45 |
+ EXTRAVERSION = |
46 |
+ NAME = Blurry Fish Butt |
47 |
+ |
48 |
+diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi |
49 |
+index 167f77b3bd43..6963dff815dc 100644 |
50 |
+--- a/arch/arm/boot/dts/imx6sx.dtsi |
51 |
++++ b/arch/arm/boot/dts/imx6sx.dtsi |
52 |
+@@ -1250,7 +1250,7 @@ |
53 |
+ /* non-prefetchable memory */ |
54 |
+ 0x82000000 0 0x08000000 0x08000000 0 0x00f00000>; |
55 |
+ num-lanes = <1>; |
56 |
+- interrupts = <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>; |
57 |
++ interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>; |
58 |
+ clocks = <&clks IMX6SX_CLK_PCIE_REF_125M>, |
59 |
+ <&clks IMX6SX_CLK_PCIE_AXI>, |
60 |
+ <&clks IMX6SX_CLK_LVDS1_OUT>, |
61 |
+diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig |
62 |
+index 729f89163bc3..210b3d675261 100644 |
63 |
+--- a/arch/parisc/Kconfig |
64 |
++++ b/arch/parisc/Kconfig |
65 |
+@@ -177,7 +177,7 @@ config PREFETCH |
66 |
+ |
67 |
+ config MLONGCALLS |
68 |
+ bool "Enable the -mlong-calls compiler option for big kernels" |
69 |
+- def_bool y if (!MODULES) |
70 |
++ default y |
71 |
+ depends on PA8X00 |
72 |
+ help |
73 |
+ If you configure the kernel to include many drivers built-in instead |
74 |
+diff --git a/arch/parisc/include/asm/barrier.h b/arch/parisc/include/asm/barrier.h |
75 |
+new file mode 100644 |
76 |
+index 000000000000..dbaaca84f27f |
77 |
+--- /dev/null |
78 |
++++ b/arch/parisc/include/asm/barrier.h |
79 |
+@@ -0,0 +1,32 @@ |
80 |
++/* SPDX-License-Identifier: GPL-2.0 */ |
81 |
++#ifndef __ASM_BARRIER_H |
82 |
++#define __ASM_BARRIER_H |
83 |
++ |
84 |
++#ifndef __ASSEMBLY__ |
85 |
++ |
86 |
++/* The synchronize caches instruction executes as a nop on systems in |
87 |
++ which all memory references are performed in order. */ |
88 |
++#define synchronize_caches() __asm__ __volatile__ ("sync" : : : "memory") |
89 |
++ |
90 |
++#if defined(CONFIG_SMP) |
91 |
++#define mb() do { synchronize_caches(); } while (0) |
92 |
++#define rmb() mb() |
93 |
++#define wmb() mb() |
94 |
++#define dma_rmb() mb() |
95 |
++#define dma_wmb() mb() |
96 |
++#else |
97 |
++#define mb() barrier() |
98 |
++#define rmb() barrier() |
99 |
++#define wmb() barrier() |
100 |
++#define dma_rmb() barrier() |
101 |
++#define dma_wmb() barrier() |
102 |
++#endif |
103 |
++ |
104 |
++#define __smp_mb() mb() |
105 |
++#define __smp_rmb() mb() |
106 |
++#define __smp_wmb() mb() |
107 |
++ |
108 |
++#include <asm-generic/barrier.h> |
109 |
++ |
110 |
++#endif /* !__ASSEMBLY__ */ |
111 |
++#endif /* __ASM_BARRIER_H */ |
112 |
+diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S |
113 |
+index 5dc831955de5..13cb2461fef5 100644 |
114 |
+--- a/arch/parisc/kernel/entry.S |
115 |
++++ b/arch/parisc/kernel/entry.S |
116 |
+@@ -481,6 +481,8 @@ |
117 |
+ /* Release pa_tlb_lock lock without reloading lock address. */ |
118 |
+ .macro tlb_unlock0 spc,tmp |
119 |
+ #ifdef CONFIG_SMP |
120 |
++ or,COND(=) %r0,\spc,%r0 |
121 |
++ sync |
122 |
+ or,COND(=) %r0,\spc,%r0 |
123 |
+ stw \spc,0(\tmp) |
124 |
+ #endif |
125 |
+diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S |
126 |
+index 16073f472118..b3434a7fd3c9 100644 |
127 |
+--- a/arch/parisc/kernel/pacache.S |
128 |
++++ b/arch/parisc/kernel/pacache.S |
129 |
+@@ -354,6 +354,7 @@ ENDPROC(flush_data_cache_local) |
130 |
+ .macro tlb_unlock la,flags,tmp |
131 |
+ #ifdef CONFIG_SMP |
132 |
+ ldi 1,\tmp |
133 |
++ sync |
134 |
+ stw \tmp,0(\la) |
135 |
+ mtsm \flags |
136 |
+ #endif |
137 |
+diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S |
138 |
+index 9f22195b90ed..f68eedc72484 100644 |
139 |
+--- a/arch/parisc/kernel/syscall.S |
140 |
++++ b/arch/parisc/kernel/syscall.S |
141 |
+@@ -631,6 +631,7 @@ cas_action: |
142 |
+ sub,<> %r28, %r25, %r0 |
143 |
+ 2: stw,ma %r24, 0(%r26) |
144 |
+ /* Free lock */ |
145 |
++ sync |
146 |
+ stw,ma %r20, 0(%sr2,%r20) |
147 |
+ #if ENABLE_LWS_DEBUG |
148 |
+ /* Clear thread register indicator */ |
149 |
+@@ -645,6 +646,7 @@ cas_action: |
150 |
+ 3: |
151 |
+ /* Error occurred on load or store */ |
152 |
+ /* Free lock */ |
153 |
++ sync |
154 |
+ stw %r20, 0(%sr2,%r20) |
155 |
+ #if ENABLE_LWS_DEBUG |
156 |
+ stw %r0, 4(%sr2,%r20) |
157 |
+@@ -846,6 +848,7 @@ cas2_action: |
158 |
+ |
159 |
+ cas2_end: |
160 |
+ /* Free lock */ |
161 |
++ sync |
162 |
+ stw,ma %r20, 0(%sr2,%r20) |
163 |
+ /* Enable interrupts */ |
164 |
+ ssm PSW_SM_I, %r0 |
165 |
+@@ -856,6 +859,7 @@ cas2_end: |
166 |
+ 22: |
167 |
+ /* Error occurred on load or store */ |
168 |
+ /* Free lock */ |
169 |
++ sync |
170 |
+ stw %r20, 0(%sr2,%r20) |
171 |
+ ssm PSW_SM_I, %r0 |
172 |
+ ldo 1(%r0),%r28 |
173 |
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h |
174 |
+index f4b175db70f4..dd2269dcbc47 100644 |
175 |
+--- a/arch/x86/include/asm/cpufeatures.h |
176 |
++++ b/arch/x86/include/asm/cpufeatures.h |
177 |
+@@ -193,12 +193,12 @@ |
178 |
+ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ |
179 |
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ |
180 |
+ |
181 |
++#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ |
182 |
++#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ |
183 |
++ |
184 |
+ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ |
185 |
+ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ |
186 |
+ |
187 |
+-#define X86_FEATURE_RETPOLINE ( 7*32+29) /* "" Generic Retpoline mitigation for Spectre variant 2 */ |
188 |
+-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */ |
189 |
+- |
190 |
+ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ |
191 |
+ #define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ |
192 |
+ |
193 |
+@@ -214,7 +214,7 @@ |
194 |
+ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ |
195 |
+ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ |
196 |
+ #define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ |
197 |
+- |
198 |
++#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ |
199 |
+ |
200 |
+ /* Virtualization flags: Linux defined, word 8 */ |
201 |
+ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ |
202 |
+@@ -310,6 +310,7 @@ |
203 |
+ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ |
204 |
+ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ |
205 |
+ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ |
206 |
++#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ |
207 |
+ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ |
208 |
+ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ |
209 |
+ |
210 |
+@@ -331,5 +332,6 @@ |
211 |
+ #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ |
212 |
+ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ |
213 |
+ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ |
214 |
++#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ |
215 |
+ |
216 |
+ #endif /* _ASM_X86_CPUFEATURES_H */ |
217 |
+diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h |
218 |
+index 0056bc945cd1..cb7f04981c6b 100644 |
219 |
+--- a/arch/x86/include/asm/irqflags.h |
220 |
++++ b/arch/x86/include/asm/irqflags.h |
221 |
+@@ -8,6 +8,8 @@ |
222 |
+ * Interrupt control: |
223 |
+ */ |
224 |
+ |
225 |
++/* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */ |
226 |
++extern inline unsigned long native_save_fl(void); |
227 |
+ extern inline unsigned long native_save_fl(void) |
228 |
+ { |
229 |
+ unsigned long flags; |
230 |
+diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h |
231 |
+index 3a52ee0e726d..bfceb5cc6347 100644 |
232 |
+--- a/arch/x86/include/asm/page_32_types.h |
233 |
++++ b/arch/x86/include/asm/page_32_types.h |
234 |
+@@ -27,8 +27,13 @@ |
235 |
+ #define N_EXCEPTION_STACKS 1 |
236 |
+ |
237 |
+ #ifdef CONFIG_X86_PAE |
238 |
+-/* 44=32+12, the limit we can fit into an unsigned long pfn */ |
239 |
+-#define __PHYSICAL_MASK_SHIFT 44 |
240 |
++/* |
241 |
++ * This is beyond the 44 bit limit imposed by the 32bit long pfns, |
242 |
++ * but we need the full mask to make sure inverted PROT_NONE |
243 |
++ * entries have all the host bits set in a guest. |
244 |
++ * The real limit is still 44 bits. |
245 |
++ */ |
246 |
++#define __PHYSICAL_MASK_SHIFT 52 |
247 |
+ #define __VIRTUAL_MASK_SHIFT 32 |
248 |
+ |
249 |
+ #else /* !CONFIG_X86_PAE */ |
250 |
+diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h |
251 |
+index fd74a11959de..89c50332a71e 100644 |
252 |
+--- a/arch/x86/include/asm/pgtable-2level.h |
253 |
++++ b/arch/x86/include/asm/pgtable-2level.h |
254 |
+@@ -77,4 +77,21 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi |
255 |
+ #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) |
256 |
+ #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) |
257 |
+ |
258 |
++/* No inverted PFNs on 2 level page tables */ |
259 |
++ |
260 |
++static inline u64 protnone_mask(u64 val) |
261 |
++{ |
262 |
++ return 0; |
263 |
++} |
264 |
++ |
265 |
++static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) |
266 |
++{ |
267 |
++ return val; |
268 |
++} |
269 |
++ |
270 |
++static inline bool __pte_needs_invert(u64 val) |
271 |
++{ |
272 |
++ return false; |
273 |
++} |
274 |
++ |
275 |
+ #endif /* _ASM_X86_PGTABLE_2LEVEL_H */ |
276 |
+diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h |
277 |
+index cdaa58c9b39e..5c686382d84b 100644 |
278 |
+--- a/arch/x86/include/asm/pgtable-3level.h |
279 |
++++ b/arch/x86/include/asm/pgtable-3level.h |
280 |
+@@ -177,11 +177,44 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) |
281 |
+ #endif |
282 |
+ |
283 |
+ /* Encode and de-code a swap entry */ |
284 |
++#define SWP_TYPE_BITS 5 |
285 |
++ |
286 |
++#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) |
287 |
++ |
288 |
++/* We always extract/encode the offset by shifting it all the way up, and then down again */ |
289 |
++#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS) |
290 |
++ |
291 |
+ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) |
292 |
+ #define __swp_type(x) (((x).val) & 0x1f) |
293 |
+ #define __swp_offset(x) ((x).val >> 5) |
294 |
+ #define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) |
295 |
+-#define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) |
296 |
+-#define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } }) |
297 |
++ |
298 |
++/* |
299 |
++ * Normally, __swp_entry() converts from arch-independent swp_entry_t to |
300 |
++ * arch-dependent swp_entry_t, and __swp_entry_to_pte() just stores the result |
301 |
++ * to pte. But here we have 32bit swp_entry_t and 64bit pte, and need to use the |
302 |
++ * whole 64 bits. Thus, we shift the "real" arch-dependent conversion to |
303 |
++ * __swp_entry_to_pte() through the following helper macro based on 64bit |
304 |
++ * __swp_entry(). |
305 |
++ */ |
306 |
++#define __swp_pteval_entry(type, offset) ((pteval_t) { \ |
307 |
++ (~(pteval_t)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ |
308 |
++ | ((pteval_t)(type) << (64 - SWP_TYPE_BITS)) }) |
309 |
++ |
310 |
++#define __swp_entry_to_pte(x) ((pte_t){ .pte = \ |
311 |
++ __swp_pteval_entry(__swp_type(x), __swp_offset(x)) }) |
312 |
++/* |
313 |
++ * Analogically, __pte_to_swp_entry() doesn't just extract the arch-dependent |
314 |
++ * swp_entry_t, but also has to convert it from 64bit to the 32bit |
315 |
++ * intermediate representation, using the following macros based on 64bit |
316 |
++ * __swp_type() and __swp_offset(). |
317 |
++ */ |
318 |
++#define __pteval_swp_type(x) ((unsigned long)((x).pte >> (64 - SWP_TYPE_BITS))) |
319 |
++#define __pteval_swp_offset(x) ((unsigned long)(~((x).pte) << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)) |
320 |
++ |
321 |
++#define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \ |
322 |
++ __pteval_swp_offset(pte))) |
323 |
++ |
324 |
++#include <asm/pgtable-invert.h> |
325 |
+ |
326 |
+ #endif /* _ASM_X86_PGTABLE_3LEVEL_H */ |
327 |
+diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h |
328 |
+new file mode 100644 |
329 |
+index 000000000000..44b1203ece12 |
330 |
+--- /dev/null |
331 |
++++ b/arch/x86/include/asm/pgtable-invert.h |
332 |
+@@ -0,0 +1,32 @@ |
333 |
++/* SPDX-License-Identifier: GPL-2.0 */ |
334 |
++#ifndef _ASM_PGTABLE_INVERT_H |
335 |
++#define _ASM_PGTABLE_INVERT_H 1 |
336 |
++ |
337 |
++#ifndef __ASSEMBLY__ |
338 |
++ |
339 |
++static inline bool __pte_needs_invert(u64 val) |
340 |
++{ |
341 |
++ return !(val & _PAGE_PRESENT); |
342 |
++} |
343 |
++ |
344 |
++/* Get a mask to xor with the page table entry to get the correct pfn. */ |
345 |
++static inline u64 protnone_mask(u64 val) |
346 |
++{ |
347 |
++ return __pte_needs_invert(val) ? ~0ull : 0; |
348 |
++} |
349 |
++ |
350 |
++static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) |
351 |
++{ |
352 |
++ /* |
353 |
++ * When a PTE transitions from NONE to !NONE or vice-versa |
354 |
++ * invert the PFN part to stop speculation. |
355 |
++ * pte_pfn undoes this when needed. |
356 |
++ */ |
357 |
++ if (__pte_needs_invert(oldval) != __pte_needs_invert(val)) |
358 |
++ val = (val & ~mask) | (~val & mask); |
359 |
++ return val; |
360 |
++} |
361 |
++ |
362 |
++#endif /* __ASSEMBLY__ */ |
363 |
++ |
364 |
++#endif |
365 |
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h |
366 |
+index 84c62d950023..4de6c282c02a 100644 |
367 |
+--- a/arch/x86/include/asm/pgtable.h |
368 |
++++ b/arch/x86/include/asm/pgtable.h |
369 |
+@@ -148,19 +148,29 @@ static inline int pte_special(pte_t pte) |
370 |
+ return pte_flags(pte) & _PAGE_SPECIAL; |
371 |
+ } |
372 |
+ |
373 |
++/* Entries that were set to PROT_NONE are inverted */ |
374 |
++ |
375 |
++static inline u64 protnone_mask(u64 val); |
376 |
++ |
377 |
+ static inline unsigned long pte_pfn(pte_t pte) |
378 |
+ { |
379 |
+- return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; |
380 |
++ phys_addr_t pfn = pte_val(pte); |
381 |
++ pfn ^= protnone_mask(pfn); |
382 |
++ return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT; |
383 |
+ } |
384 |
+ |
385 |
+ static inline unsigned long pmd_pfn(pmd_t pmd) |
386 |
+ { |
387 |
+- return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; |
388 |
++ phys_addr_t pfn = pmd_val(pmd); |
389 |
++ pfn ^= protnone_mask(pfn); |
390 |
++ return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; |
391 |
+ } |
392 |
+ |
393 |
+ static inline unsigned long pud_pfn(pud_t pud) |
394 |
+ { |
395 |
+- return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT; |
396 |
++ phys_addr_t pfn = pud_val(pud); |
397 |
++ pfn ^= protnone_mask(pfn); |
398 |
++ return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT; |
399 |
+ } |
400 |
+ |
401 |
+ #define pte_page(pte) pfn_to_page(pte_pfn(pte)) |
402 |
+@@ -305,11 +315,6 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) |
403 |
+ return pmd_set_flags(pmd, _PAGE_RW); |
404 |
+ } |
405 |
+ |
406 |
+-static inline pmd_t pmd_mknotpresent(pmd_t pmd) |
407 |
+-{ |
408 |
+- return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE); |
409 |
+-} |
410 |
+- |
411 |
+ #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY |
412 |
+ static inline int pte_soft_dirty(pte_t pte) |
413 |
+ { |
414 |
+@@ -359,19 +364,58 @@ static inline pgprotval_t massage_pgprot(pgprot_t pgprot) |
415 |
+ |
416 |
+ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) |
417 |
+ { |
418 |
+- return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | |
419 |
+- massage_pgprot(pgprot)); |
420 |
++ phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; |
421 |
++ pfn ^= protnone_mask(pgprot_val(pgprot)); |
422 |
++ pfn &= PTE_PFN_MASK; |
423 |
++ return __pte(pfn | massage_pgprot(pgprot)); |
424 |
+ } |
425 |
+ |
426 |
+ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) |
427 |
+ { |
428 |
+- return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | |
429 |
+- massage_pgprot(pgprot)); |
430 |
++ phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; |
431 |
++ pfn ^= protnone_mask(pgprot_val(pgprot)); |
432 |
++ pfn &= PHYSICAL_PMD_PAGE_MASK; |
433 |
++ return __pmd(pfn | massage_pgprot(pgprot)); |
434 |
++} |
435 |
++ |
436 |
++static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) |
437 |
++{ |
438 |
++ phys_addr_t pfn = page_nr << PAGE_SHIFT; |
439 |
++ pfn ^= protnone_mask(pgprot_val(pgprot)); |
440 |
++ pfn &= PHYSICAL_PUD_PAGE_MASK; |
441 |
++ return __pud(pfn | massage_pgprot(pgprot)); |
442 |
++} |
443 |
++ |
444 |
++static inline pmd_t pmd_mknotpresent(pmd_t pmd) |
445 |
++{ |
446 |
++ return pfn_pmd(pmd_pfn(pmd), |
447 |
++ __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); |
448 |
+ } |
449 |
+ |
450 |
++static inline pud_t pud_set_flags(pud_t pud, pudval_t set) |
451 |
++{ |
452 |
++ pudval_t v = native_pud_val(pud); |
453 |
++ |
454 |
++ return __pud(v | set); |
455 |
++} |
456 |
++ |
457 |
++static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) |
458 |
++{ |
459 |
++ pudval_t v = native_pud_val(pud); |
460 |
++ |
461 |
++ return __pud(v & ~clear); |
462 |
++} |
463 |
++ |
464 |
++static inline pud_t pud_mkhuge(pud_t pud) |
465 |
++{ |
466 |
++ return pud_set_flags(pud, _PAGE_PSE); |
467 |
++} |
468 |
++ |
469 |
++static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); |
470 |
++ |
471 |
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) |
472 |
+ { |
473 |
+- pteval_t val = pte_val(pte); |
474 |
++ pteval_t val = pte_val(pte), oldval = val; |
475 |
+ |
476 |
+ /* |
477 |
+ * Chop off the NX bit (if present), and add the NX portion of |
478 |
+@@ -379,17 +423,17 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) |
479 |
+ */ |
480 |
+ val &= _PAGE_CHG_MASK; |
481 |
+ val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK; |
482 |
+- |
483 |
++ val = flip_protnone_guard(oldval, val, PTE_PFN_MASK); |
484 |
+ return __pte(val); |
485 |
+ } |
486 |
+ |
487 |
+ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) |
488 |
+ { |
489 |
+- pmdval_t val = pmd_val(pmd); |
490 |
++ pmdval_t val = pmd_val(pmd), oldval = val; |
491 |
+ |
492 |
+ val &= _HPAGE_CHG_MASK; |
493 |
+ val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK; |
494 |
+- |
495 |
++ val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK); |
496 |
+ return __pmd(val); |
497 |
+ } |
498 |
+ |
499 |
+@@ -926,6 +970,14 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) |
500 |
+ } |
501 |
+ #endif |
502 |
+ |
503 |
++#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1 |
504 |
++extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot); |
505 |
++ |
506 |
++static inline bool arch_has_pfn_modify_check(void) |
507 |
++{ |
508 |
++ return boot_cpu_has_bug(X86_BUG_L1TF); |
509 |
++} |
510 |
++ |
511 |
+ #include <asm-generic/pgtable.h> |
512 |
+ #endif /* __ASSEMBLY__ */ |
513 |
+ |
514 |
+diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h |
515 |
+index c810226e741a..221a32ed1372 100644 |
516 |
+--- a/arch/x86/include/asm/pgtable_64.h |
517 |
++++ b/arch/x86/include/asm/pgtable_64.h |
518 |
+@@ -163,18 +163,52 @@ static inline int pgd_large(pgd_t pgd) { return 0; } |
519 |
+ #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) |
520 |
+ #define pte_unmap(pte) ((void)(pte))/* NOP */ |
521 |
+ |
522 |
+-/* Encode and de-code a swap entry */ |
523 |
+-#define SWP_TYPE_BITS 5 |
524 |
+-#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) |
525 |
++/* |
526 |
++ * Encode and de-code a swap entry |
527 |
++ * |
528 |
++ * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number |
529 |
++ * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names |
530 |
++ * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry |
531 |
++ * |
532 |
++ * G (8) is aliased and used as a PROT_NONE indicator for |
533 |
++ * !present ptes. We need to start storing swap entries above |
534 |
++ * there. We also need to avoid using A and D because of an |
535 |
++ * erratum where they can be incorrectly set by hardware on |
536 |
++ * non-present PTEs. |
537 |
++ * |
538 |
++ * SD (1) in swp entry is used to store soft dirty bit, which helps us |
539 |
++ * remember soft dirty over page migration |
540 |
++ * |
541 |
++ * Bit 7 in swp entry should be 0 because pmd_present checks not only P, |
542 |
++ * but also L and G. |
543 |
++ * |
544 |
++ * The offset is inverted by a binary not operation to make the high |
545 |
++ * physical bits set. |
546 |
++ */ |
547 |
++#define SWP_TYPE_BITS 5 |
548 |
++ |
549 |
++#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) |
550 |
++ |
551 |
++/* We always extract/encode the offset by shifting it all the way up, and then down again */ |
552 |
++#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT+SWP_TYPE_BITS) |
553 |
+ |
554 |
+ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) |
555 |
+ |
556 |
+-#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ |
557 |
+- & ((1U << SWP_TYPE_BITS) - 1)) |
558 |
+-#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) |
559 |
+-#define __swp_entry(type, offset) ((swp_entry_t) { \ |
560 |
+- ((type) << (_PAGE_BIT_PRESENT + 1)) \ |
561 |
+- | ((offset) << SWP_OFFSET_SHIFT) }) |
562 |
++/* Extract the high bits for type */ |
563 |
++#define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS)) |
564 |
++ |
565 |
++/* Shift up (to get rid of type), then down to get value */ |
566 |
++#define __swp_offset(x) (~(x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT) |
567 |
++ |
568 |
++/* |
569 |
++ * Shift the offset up "too far" by TYPE bits, then down again |
570 |
++ * The offset is inverted by a binary not operation to make the high |
571 |
++ * physical bits set. |
572 |
++ */ |
573 |
++#define __swp_entry(type, offset) ((swp_entry_t) { \ |
574 |
++ (~(unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ |
575 |
++ | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) }) |
576 |
++ |
577 |
+ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) |
578 |
+ #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) |
579 |
+ |
580 |
+@@ -201,6 +235,8 @@ extern void cleanup_highmap(void); |
581 |
+ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); |
582 |
+ extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); |
583 |
+ |
584 |
++#include <asm/pgtable-invert.h> |
585 |
++ |
586 |
+ #endif /* !__ASSEMBLY__ */ |
587 |
+ |
588 |
+ #endif /* _ASM_X86_PGTABLE_64_H */ |
589 |
+diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h |
590 |
+index 8dba273da25a..7572ce32055e 100644 |
591 |
+--- a/arch/x86/include/asm/pgtable_types.h |
592 |
++++ b/arch/x86/include/asm/pgtable_types.h |
593 |
+@@ -70,15 +70,15 @@ |
594 |
+ /* |
595 |
+ * Tracking soft dirty bit when a page goes to a swap is tricky. |
596 |
+ * We need a bit which can be stored in pte _and_ not conflict |
597 |
+- * with swap entry format. On x86 bits 6 and 7 are *not* involved |
598 |
+- * into swap entry computation, but bit 6 is used for nonlinear |
599 |
+- * file mapping, so we borrow bit 7 for soft dirty tracking. |
600 |
++ * with swap entry format. On x86 bits 1-4 are *not* involved |
601 |
++ * into swap entry computation, but bit 7 is used for thp migration, |
602 |
++ * so we borrow bit 1 for soft dirty tracking. |
603 |
+ * |
604 |
+ * Please note that this bit must be treated as swap dirty page |
605 |
+- * mark if and only if the PTE has present bit clear! |
606 |
++ * mark if and only if the PTE/PMD has present bit clear! |
607 |
+ */ |
608 |
+ #ifdef CONFIG_MEM_SOFT_DIRTY |
609 |
+-#define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE |
610 |
++#define _PAGE_SWP_SOFT_DIRTY _PAGE_RW |
611 |
+ #else |
612 |
+ #define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0)) |
613 |
+ #endif |
614 |
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h |
615 |
+index 8e415cf65457..a3a53955f01c 100644 |
616 |
+--- a/arch/x86/include/asm/processor.h |
617 |
++++ b/arch/x86/include/asm/processor.h |
618 |
+@@ -172,6 +172,11 @@ extern const struct seq_operations cpuinfo_op; |
619 |
+ |
620 |
+ extern void cpu_detect(struct cpuinfo_x86 *c); |
621 |
+ |
622 |
++static inline unsigned long l1tf_pfn_limit(void) |
623 |
++{ |
624 |
++ return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1; |
625 |
++} |
626 |
++ |
627 |
+ extern void early_cpu_init(void); |
628 |
+ extern void identify_boot_cpu(void); |
629 |
+ extern void identify_secondary_cpu(struct cpuinfo_x86 *); |
630 |
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c |
631 |
+index 12a8867071f3..34e4aaaf03d2 100644 |
632 |
+--- a/arch/x86/kernel/cpu/bugs.c |
633 |
++++ b/arch/x86/kernel/cpu/bugs.c |
634 |
+@@ -26,9 +26,11 @@ |
635 |
+ #include <asm/pgtable.h> |
636 |
+ #include <asm/cacheflush.h> |
637 |
+ #include <asm/intel-family.h> |
638 |
++#include <asm/e820.h> |
639 |
+ |
640 |
+ static void __init spectre_v2_select_mitigation(void); |
641 |
+ static void __init ssb_select_mitigation(void); |
642 |
++static void __init l1tf_select_mitigation(void); |
643 |
+ |
644 |
+ /* |
645 |
+ * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any |
646 |
+@@ -80,6 +82,8 @@ void __init check_bugs(void) |
647 |
+ */ |
648 |
+ ssb_select_mitigation(); |
649 |
+ |
650 |
++ l1tf_select_mitigation(); |
651 |
++ |
652 |
+ #ifdef CONFIG_X86_32 |
653 |
+ /* |
654 |
+ * Check whether we are able to run this kernel safely on SMP. |
655 |
+@@ -309,23 +313,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) |
656 |
+ return cmd; |
657 |
+ } |
658 |
+ |
659 |
+-/* Check for Skylake-like CPUs (for RSB handling) */ |
660 |
+-static bool __init is_skylake_era(void) |
661 |
+-{ |
662 |
+- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && |
663 |
+- boot_cpu_data.x86 == 6) { |
664 |
+- switch (boot_cpu_data.x86_model) { |
665 |
+- case INTEL_FAM6_SKYLAKE_MOBILE: |
666 |
+- case INTEL_FAM6_SKYLAKE_DESKTOP: |
667 |
+- case INTEL_FAM6_SKYLAKE_X: |
668 |
+- case INTEL_FAM6_KABYLAKE_MOBILE: |
669 |
+- case INTEL_FAM6_KABYLAKE_DESKTOP: |
670 |
+- return true; |
671 |
+- } |
672 |
+- } |
673 |
+- return false; |
674 |
+-} |
675 |
+- |
676 |
+ static void __init spectre_v2_select_mitigation(void) |
677 |
+ { |
678 |
+ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); |
679 |
+@@ -386,22 +373,15 @@ retpoline_auto: |
680 |
+ pr_info("%s\n", spectre_v2_strings[mode]); |
681 |
+ |
682 |
+ /* |
683 |
+- * If neither SMEP nor PTI are available, there is a risk of |
684 |
+- * hitting userspace addresses in the RSB after a context switch |
685 |
+- * from a shallow call stack to a deeper one. To prevent this fill |
686 |
+- * the entire RSB, even when using IBRS. |
687 |
++ * If spectre v2 protection has been enabled, unconditionally fill |
688 |
++ * RSB during a context switch; this protects against two independent |
689 |
++ * issues: |
690 |
+ * |
691 |
+- * Skylake era CPUs have a separate issue with *underflow* of the |
692 |
+- * RSB, when they will predict 'ret' targets from the generic BTB. |
693 |
+- * The proper mitigation for this is IBRS. If IBRS is not supported |
694 |
+- * or deactivated in favour of retpolines the RSB fill on context |
695 |
+- * switch is required. |
696 |
++ * - RSB underflow (and switch to BTB) on Skylake+ |
697 |
++ * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs |
698 |
+ */ |
699 |
+- if ((!boot_cpu_has(X86_FEATURE_KAISER) && |
700 |
+- !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { |
701 |
+- setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); |
702 |
+- pr_info("Spectre v2 mitigation: Filling RSB on context switch\n"); |
703 |
+- } |
704 |
++ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); |
705 |
++ pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); |
706 |
+ |
707 |
+ /* Initialize Indirect Branch Prediction Barrier if supported */ |
708 |
+ if (boot_cpu_has(X86_FEATURE_IBPB)) { |
709 |
+@@ -652,6 +632,35 @@ void x86_spec_ctrl_setup_ap(void) |
710 |
+ x86_amd_ssb_disable(); |
711 |
+ } |
712 |
+ |
713 |
++#undef pr_fmt |
714 |
++#define pr_fmt(fmt) "L1TF: " fmt |
715 |
++static void __init l1tf_select_mitigation(void) |
716 |
++{ |
717 |
++ u64 half_pa; |
718 |
++ |
719 |
++ if (!boot_cpu_has_bug(X86_BUG_L1TF)) |
720 |
++ return; |
721 |
++ |
722 |
++#if CONFIG_PGTABLE_LEVELS == 2 |
723 |
++ pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n"); |
724 |
++ return; |
725 |
++#endif |
726 |
++ |
727 |
++ /* |
728 |
++ * This is extremely unlikely to happen because almost all |
729 |
++ * systems have far more MAX_PA/2 than RAM can be fit into |
730 |
++ * DIMM slots. |
731 |
++ */ |
732 |
++ half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT; |
733 |
++ if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) { |
734 |
++ pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n"); |
735 |
++ return; |
736 |
++ } |
737 |
++ |
738 |
++ setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV); |
739 |
++} |
740 |
++#undef pr_fmt |
741 |
++ |
742 |
+ #ifdef CONFIG_SYSFS |
743 |
+ |
744 |
+ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, |
745 |
+@@ -679,6 +688,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr |
746 |
+ case X86_BUG_SPEC_STORE_BYPASS: |
747 |
+ return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); |
748 |
+ |
749 |
++ case X86_BUG_L1TF: |
750 |
++ if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) |
751 |
++ return sprintf(buf, "Mitigation: Page Table Inversion\n"); |
752 |
++ break; |
753 |
++ |
754 |
+ default: |
755 |
+ break; |
756 |
+ } |
757 |
+@@ -705,4 +719,9 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute * |
758 |
+ { |
759 |
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS); |
760 |
+ } |
761 |
++ |
762 |
++ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf) |
763 |
++{ |
764 |
++ return cpu_show_common(dev, attr, buf, X86_BUG_L1TF); |
765 |
++} |
766 |
+ #endif |
767 |
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c |
768 |
+index 3d21b28f9826..4d3fa79c0f09 100644 |
769 |
+--- a/arch/x86/kernel/cpu/common.c |
770 |
++++ b/arch/x86/kernel/cpu/common.c |
771 |
+@@ -880,6 +880,21 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { |
772 |
+ {} |
773 |
+ }; |
774 |
+ |
775 |
++static const __initconst struct x86_cpu_id cpu_no_l1tf[] = { |
776 |
++ /* in addition to cpu_no_speculation */ |
777 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, |
778 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, |
779 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, |
780 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, |
781 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MOOREFIELD }, |
782 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT }, |
783 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON }, |
784 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GEMINI_LAKE }, |
785 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, |
786 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, |
787 |
++ {} |
788 |
++}; |
789 |
++ |
790 |
+ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) |
791 |
+ { |
792 |
+ u64 ia32_cap = 0; |
793 |
+@@ -905,6 +920,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) |
794 |
+ return; |
795 |
+ |
796 |
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); |
797 |
++ |
798 |
++ if (x86_match_cpu(cpu_no_l1tf)) |
799 |
++ return; |
800 |
++ |
801 |
++ setup_force_cpu_bug(X86_BUG_L1TF); |
802 |
+ } |
803 |
+ |
804 |
+ /* |
805 |
+diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c |
806 |
+index 1f5c47a49e35..c6f466d6cc57 100644 |
807 |
+--- a/arch/x86/kernel/kprobes/core.c |
808 |
++++ b/arch/x86/kernel/kprobes/core.c |
809 |
+@@ -393,7 +393,6 @@ int __copy_instruction(u8 *dest, u8 *src) |
810 |
+ newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest; |
811 |
+ if ((s64) (s32) newdisp != newdisp) { |
812 |
+ pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp); |
813 |
+- pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", src, dest, insn.displacement.value); |
814 |
+ return 0; |
815 |
+ } |
816 |
+ disp = (u8 *) dest + insn_offset_displacement(&insn); |
817 |
+@@ -609,8 +608,7 @@ static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs, |
818 |
+ * Raise a BUG or we'll continue in an endless reentering loop |
819 |
+ * and eventually a stack overflow. |
820 |
+ */ |
821 |
+- printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", |
822 |
+- p->addr); |
823 |
++ pr_err("Unrecoverable kprobe detected.\n"); |
824 |
+ dump_kprobe(p); |
825 |
+ BUG(); |
826 |
+ default: |
827 |
+diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c |
828 |
+index f534a0e3af53..632195b41688 100644 |
829 |
+--- a/arch/x86/kernel/paravirt.c |
830 |
++++ b/arch/x86/kernel/paravirt.c |
831 |
+@@ -97,10 +97,12 @@ unsigned paravirt_patch_call(void *insnbuf, |
832 |
+ struct branch *b = insnbuf; |
833 |
+ unsigned long delta = (unsigned long)target - (addr+5); |
834 |
+ |
835 |
+- if (tgt_clobbers & ~site_clobbers) |
836 |
+- return len; /* target would clobber too much for this site */ |
837 |
+- if (len < 5) |
838 |
++ if (len < 5) { |
839 |
++#ifdef CONFIG_RETPOLINE |
840 |
++ WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr); |
841 |
++#endif |
842 |
+ return len; /* call too long for patch site */ |
843 |
++ } |
844 |
+ |
845 |
+ b->opcode = 0xe8; /* call */ |
846 |
+ b->delta = delta; |
847 |
+@@ -115,8 +117,12 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target, |
848 |
+ struct branch *b = insnbuf; |
849 |
+ unsigned long delta = (unsigned long)target - (addr+5); |
850 |
+ |
851 |
+- if (len < 5) |
852 |
++ if (len < 5) { |
853 |
++#ifdef CONFIG_RETPOLINE |
854 |
++ WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr); |
855 |
++#endif |
856 |
+ return len; /* call too long for patch site */ |
857 |
++ } |
858 |
+ |
859 |
+ b->opcode = 0xe9; /* jmp */ |
860 |
+ b->delta = delta; |
861 |
+diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c |
862 |
+index bbaae4cf9e8e..31c4bc0d3372 100644 |
863 |
+--- a/arch/x86/kernel/setup.c |
864 |
++++ b/arch/x86/kernel/setup.c |
865 |
+@@ -851,6 +851,12 @@ void __init setup_arch(char **cmdline_p) |
866 |
+ memblock_reserve(__pa_symbol(_text), |
867 |
+ (unsigned long)__bss_stop - (unsigned long)_text); |
868 |
+ |
869 |
++ /* |
870 |
++ * Make sure page 0 is always reserved because on systems with |
871 |
++ * L1TF its contents can be leaked to user processes. |
872 |
++ */ |
873 |
++ memblock_reserve(0, PAGE_SIZE); |
874 |
++ |
875 |
+ early_reserve_initrd(); |
876 |
+ |
877 |
+ /* |
878 |
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c |
879 |
+index 151fd33e9043..4954a6cef50a 100644 |
880 |
+--- a/arch/x86/mm/init.c |
881 |
++++ b/arch/x86/mm/init.c |
882 |
+@@ -4,6 +4,8 @@ |
883 |
+ #include <linux/swap.h> |
884 |
+ #include <linux/memblock.h> |
885 |
+ #include <linux/bootmem.h> /* for max_low_pfn */ |
886 |
++#include <linux/swapfile.h> |
887 |
++#include <linux/swapops.h> |
888 |
+ |
889 |
+ #include <asm/cacheflush.h> |
890 |
+ #include <asm/e820.h> |
891 |
+@@ -767,3 +769,26 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) |
892 |
+ __cachemode2pte_tbl[cache] = __cm_idx2pte(entry); |
893 |
+ __pte2cachemode_tbl[entry] = cache; |
894 |
+ } |
895 |
++ |
896 |
++#ifdef CONFIG_SWAP |
897 |
++unsigned long max_swapfile_size(void) |
898 |
++{ |
899 |
++ unsigned long pages; |
900 |
++ |
901 |
++ pages = generic_max_swapfile_size(); |
902 |
++ |
903 |
++ if (boot_cpu_has_bug(X86_BUG_L1TF)) { |
904 |
++ /* Limit the swap file size to MAX_PA/2 for L1TF workaround */ |
905 |
++ unsigned long l1tf_limit = l1tf_pfn_limit() + 1; |
906 |
++ /* |
907 |
++ * We encode swap offsets also with 3 bits below those for pfn |
908 |
++ * which makes the usable limit higher. |
909 |
++ */ |
910 |
++#if CONFIG_PGTABLE_LEVELS > 2 |
911 |
++ l1tf_limit <<= PAGE_SHIFT - SWP_OFFSET_FIRST_BIT; |
912 |
++#endif |
913 |
++ pages = min_t(unsigned long, l1tf_limit, pages); |
914 |
++ } |
915 |
++ return pages; |
916 |
++} |
917 |
++#endif |
918 |
+diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c |
919 |
+index 76604c8a2a48..7bf14e74fc8f 100644 |
920 |
+--- a/arch/x86/mm/kmmio.c |
921 |
++++ b/arch/x86/mm/kmmio.c |
922 |
+@@ -125,24 +125,29 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr) |
923 |
+ |
924 |
+ static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old) |
925 |
+ { |
926 |
++ pmd_t new_pmd; |
927 |
+ pmdval_t v = pmd_val(*pmd); |
928 |
+ if (clear) { |
929 |
+- *old = v & _PAGE_PRESENT; |
930 |
+- v &= ~_PAGE_PRESENT; |
931 |
+- } else /* presume this has been called with clear==true previously */ |
932 |
+- v |= *old; |
933 |
+- set_pmd(pmd, __pmd(v)); |
934 |
++ *old = v; |
935 |
++ new_pmd = pmd_mknotpresent(*pmd); |
936 |
++ } else { |
937 |
++ /* Presume this has been called with clear==true previously */ |
938 |
++ new_pmd = __pmd(*old); |
939 |
++ } |
940 |
++ set_pmd(pmd, new_pmd); |
941 |
+ } |
942 |
+ |
943 |
+ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) |
944 |
+ { |
945 |
+ pteval_t v = pte_val(*pte); |
946 |
+ if (clear) { |
947 |
+- *old = v & _PAGE_PRESENT; |
948 |
+- v &= ~_PAGE_PRESENT; |
949 |
+- } else /* presume this has been called with clear==true previously */ |
950 |
+- v |= *old; |
951 |
+- set_pte_atomic(pte, __pte(v)); |
952 |
++ *old = v; |
953 |
++ /* Nothing should care about address */ |
954 |
++ pte_clear(&init_mm, 0, pte); |
955 |
++ } else { |
956 |
++ /* Presume this has been called with clear==true previously */ |
957 |
++ set_pte_atomic(pte, __pte(*old)); |
958 |
++ } |
959 |
+ } |
960 |
+ |
961 |
+ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) |
962 |
+diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c |
963 |
+index 307f60ecfc6d..9a055ea279eb 100644 |
964 |
+--- a/arch/x86/mm/mmap.c |
965 |
++++ b/arch/x86/mm/mmap.c |
966 |
+@@ -121,3 +121,24 @@ const char *arch_vma_name(struct vm_area_struct *vma) |
967 |
+ return "[mpx]"; |
968 |
+ return NULL; |
969 |
+ } |
970 |
++ |
971 |
++/* |
972 |
++ * Only allow root to set high MMIO mappings to PROT_NONE. |
973 |
++ * This prevents an unpriv. user to set them to PROT_NONE and invert |
974 |
++ * them, then pointing to valid memory for L1TF speculation. |
975 |
++ * |
976 |
++ * Note: for locked down kernels may want to disable the root override. |
977 |
++ */ |
978 |
++bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) |
979 |
++{ |
980 |
++ if (!boot_cpu_has_bug(X86_BUG_L1TF)) |
981 |
++ return true; |
982 |
++ if (!__pte_needs_invert(pgprot_val(prot))) |
983 |
++ return true; |
984 |
++ /* If it's real memory always allow */ |
985 |
++ if (pfn_valid(pfn)) |
986 |
++ return true; |
987 |
++ if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN)) |
988 |
++ return false; |
989 |
++ return true; |
990 |
++} |
991 |
+diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c |
992 |
+index 79377e2a7bcd..27610c2d1821 100644 |
993 |
+--- a/arch/x86/mm/pageattr.c |
994 |
++++ b/arch/x86/mm/pageattr.c |
995 |
+@@ -1006,8 +1006,8 @@ static int populate_pmd(struct cpa_data *cpa, |
996 |
+ |
997 |
+ pmd = pmd_offset(pud, start); |
998 |
+ |
999 |
+- set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | |
1000 |
+- massage_pgprot(pmd_pgprot))); |
1001 |
++ set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn, |
1002 |
++ canon_pgprot(pmd_pgprot)))); |
1003 |
+ |
1004 |
+ start += PMD_SIZE; |
1005 |
+ cpa->pfn += PMD_SIZE; |
1006 |
+@@ -1079,8 +1079,8 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, |
1007 |
+ * Map everything starting from the Gb boundary, possibly with 1G pages |
1008 |
+ */ |
1009 |
+ while (end - start >= PUD_SIZE) { |
1010 |
+- set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | |
1011 |
+- massage_pgprot(pud_pgprot))); |
1012 |
++ set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn, |
1013 |
++ canon_pgprot(pud_pgprot)))); |
1014 |
+ |
1015 |
+ start += PUD_SIZE; |
1016 |
+ cpa->pfn += PUD_SIZE; |
1017 |
+diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c |
1018 |
+index f9e0d09f7c66..8a0f77fb5181 100644 |
1019 |
+--- a/drivers/acpi/acpi_lpss.c |
1020 |
++++ b/drivers/acpi/acpi_lpss.c |
1021 |
+@@ -154,10 +154,12 @@ static const struct lpss_device_desc lpt_sdio_dev_desc = { |
1022 |
+ |
1023 |
+ static const struct lpss_device_desc byt_pwm_dev_desc = { |
1024 |
+ .flags = LPSS_SAVE_CTX, |
1025 |
++ .prv_offset = 0x800, |
1026 |
+ }; |
1027 |
+ |
1028 |
+ static const struct lpss_device_desc bsw_pwm_dev_desc = { |
1029 |
+ .flags = LPSS_SAVE_CTX | LPSS_NO_D3_DELAY, |
1030 |
++ .prv_offset = 0x800, |
1031 |
+ }; |
1032 |
+ |
1033 |
+ static const struct lpss_device_desc byt_uart_dev_desc = { |
1034 |
+diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c |
1035 |
+index 143edea1076f..41090ef5facb 100644 |
1036 |
+--- a/drivers/base/cpu.c |
1037 |
++++ b/drivers/base/cpu.c |
1038 |
+@@ -524,16 +524,24 @@ ssize_t __weak cpu_show_spec_store_bypass(struct device *dev, |
1039 |
+ return sprintf(buf, "Not affected\n"); |
1040 |
+ } |
1041 |
+ |
1042 |
++ssize_t __weak cpu_show_l1tf(struct device *dev, |
1043 |
++ struct device_attribute *attr, char *buf) |
1044 |
++{ |
1045 |
++ return sprintf(buf, "Not affected\n"); |
1046 |
++} |
1047 |
++ |
1048 |
+ static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); |
1049 |
+ static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); |
1050 |
+ static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); |
1051 |
+ static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); |
1052 |
++static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); |
1053 |
+ |
1054 |
+ static struct attribute *cpu_root_vulnerabilities_attrs[] = { |
1055 |
+ &dev_attr_meltdown.attr, |
1056 |
+ &dev_attr_spectre_v1.attr, |
1057 |
+ &dev_attr_spectre_v2.attr, |
1058 |
+ &dev_attr_spec_store_bypass.attr, |
1059 |
++ &dev_attr_l1tf.attr, |
1060 |
+ NULL |
1061 |
+ }; |
1062 |
+ |
1063 |
+diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c |
1064 |
+index 912ad30be585..4719aa781bf2 100644 |
1065 |
+--- a/drivers/char/tpm/tpm-dev.c |
1066 |
++++ b/drivers/char/tpm/tpm-dev.c |
1067 |
+@@ -25,7 +25,7 @@ struct file_priv { |
1068 |
+ struct tpm_chip *chip; |
1069 |
+ |
1070 |
+ /* Data passed to and from the tpm via the read/write calls */ |
1071 |
+- atomic_t data_pending; |
1072 |
++ size_t data_pending; |
1073 |
+ struct mutex buffer_mutex; |
1074 |
+ |
1075 |
+ struct timer_list user_read_timer; /* user needs to claim result */ |
1076 |
+@@ -46,7 +46,7 @@ static void timeout_work(struct work_struct *work) |
1077 |
+ struct file_priv *priv = container_of(work, struct file_priv, work); |
1078 |
+ |
1079 |
+ mutex_lock(&priv->buffer_mutex); |
1080 |
+- atomic_set(&priv->data_pending, 0); |
1081 |
++ priv->data_pending = 0; |
1082 |
+ memset(priv->data_buffer, 0, sizeof(priv->data_buffer)); |
1083 |
+ mutex_unlock(&priv->buffer_mutex); |
1084 |
+ } |
1085 |
+@@ -72,7 +72,6 @@ static int tpm_open(struct inode *inode, struct file *file) |
1086 |
+ } |
1087 |
+ |
1088 |
+ priv->chip = chip; |
1089 |
+- atomic_set(&priv->data_pending, 0); |
1090 |
+ mutex_init(&priv->buffer_mutex); |
1091 |
+ setup_timer(&priv->user_read_timer, user_reader_timeout, |
1092 |
+ (unsigned long)priv); |
1093 |
+@@ -86,28 +85,24 @@ static ssize_t tpm_read(struct file *file, char __user *buf, |
1094 |
+ size_t size, loff_t *off) |
1095 |
+ { |
1096 |
+ struct file_priv *priv = file->private_data; |
1097 |
+- ssize_t ret_size; |
1098 |
++ ssize_t ret_size = 0; |
1099 |
+ int rc; |
1100 |
+ |
1101 |
+ del_singleshot_timer_sync(&priv->user_read_timer); |
1102 |
+ flush_work(&priv->work); |
1103 |
+- ret_size = atomic_read(&priv->data_pending); |
1104 |
+- if (ret_size > 0) { /* relay data */ |
1105 |
+- ssize_t orig_ret_size = ret_size; |
1106 |
+- if (size < ret_size) |
1107 |
+- ret_size = size; |
1108 |
++ mutex_lock(&priv->buffer_mutex); |
1109 |
+ |
1110 |
+- mutex_lock(&priv->buffer_mutex); |
1111 |
++ if (priv->data_pending) { |
1112 |
++ ret_size = min_t(ssize_t, size, priv->data_pending); |
1113 |
+ rc = copy_to_user(buf, priv->data_buffer, ret_size); |
1114 |
+- memset(priv->data_buffer, 0, orig_ret_size); |
1115 |
++ memset(priv->data_buffer, 0, priv->data_pending); |
1116 |
+ if (rc) |
1117 |
+ ret_size = -EFAULT; |
1118 |
+ |
1119 |
+- mutex_unlock(&priv->buffer_mutex); |
1120 |
++ priv->data_pending = 0; |
1121 |
+ } |
1122 |
+ |
1123 |
+- atomic_set(&priv->data_pending, 0); |
1124 |
+- |
1125 |
++ mutex_unlock(&priv->buffer_mutex); |
1126 |
+ return ret_size; |
1127 |
+ } |
1128 |
+ |
1129 |
+@@ -118,18 +113,20 @@ static ssize_t tpm_write(struct file *file, const char __user *buf, |
1130 |
+ size_t in_size = size; |
1131 |
+ ssize_t out_size; |
1132 |
+ |
1133 |
+- /* cannot perform a write until the read has cleared |
1134 |
+- either via tpm_read or a user_read_timer timeout. |
1135 |
+- This also prevents splitted buffered writes from blocking here. |
1136 |
+- */ |
1137 |
+- if (atomic_read(&priv->data_pending) != 0) |
1138 |
+- return -EBUSY; |
1139 |
+- |
1140 |
+ if (in_size > TPM_BUFSIZE) |
1141 |
+ return -E2BIG; |
1142 |
+ |
1143 |
+ mutex_lock(&priv->buffer_mutex); |
1144 |
+ |
1145 |
++ /* Cannot perform a write until the read has cleared either via |
1146 |
++ * tpm_read or a user_read_timer timeout. This also prevents split |
1147 |
++ * buffered writes from blocking here. |
1148 |
++ */ |
1149 |
++ if (priv->data_pending != 0) { |
1150 |
++ mutex_unlock(&priv->buffer_mutex); |
1151 |
++ return -EBUSY; |
1152 |
++ } |
1153 |
++ |
1154 |
+ if (copy_from_user |
1155 |
+ (priv->data_buffer, (void __user *) buf, in_size)) { |
1156 |
+ mutex_unlock(&priv->buffer_mutex); |
1157 |
+@@ -153,7 +150,7 @@ static ssize_t tpm_write(struct file *file, const char __user *buf, |
1158 |
+ return out_size; |
1159 |
+ } |
1160 |
+ |
1161 |
+- atomic_set(&priv->data_pending, out_size); |
1162 |
++ priv->data_pending = out_size; |
1163 |
+ mutex_unlock(&priv->buffer_mutex); |
1164 |
+ |
1165 |
+ /* Set a timeout by which the reader must come claim the result */ |
1166 |
+@@ -172,7 +169,7 @@ static int tpm_release(struct inode *inode, struct file *file) |
1167 |
+ del_singleshot_timer_sync(&priv->user_read_timer); |
1168 |
+ flush_work(&priv->work); |
1169 |
+ file->private_data = NULL; |
1170 |
+- atomic_set(&priv->data_pending, 0); |
1171 |
++ priv->data_pending = 0; |
1172 |
+ clear_bit(0, &priv->chip->is_open); |
1173 |
+ kfree(priv); |
1174 |
+ return 0; |
1175 |
+diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c |
1176 |
+index 6790ebb366dd..98fd9a594841 100644 |
1177 |
+--- a/drivers/infiniband/core/umem.c |
1178 |
++++ b/drivers/infiniband/core/umem.c |
1179 |
+@@ -122,16 +122,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, |
1180 |
+ umem->address = addr; |
1181 |
+ umem->page_size = PAGE_SIZE; |
1182 |
+ umem->pid = get_task_pid(current, PIDTYPE_PID); |
1183 |
+- /* |
1184 |
+- * We ask for writable memory if any of the following |
1185 |
+- * access flags are set. "Local write" and "remote write" |
1186 |
+- * obviously require write access. "Remote atomic" can do |
1187 |
+- * things like fetch and add, which will modify memory, and |
1188 |
+- * "MW bind" can change permissions by binding a window. |
1189 |
+- */ |
1190 |
+- umem->writable = !!(access & |
1191 |
+- (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | |
1192 |
+- IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND)); |
1193 |
++ umem->writable = ib_access_writable(access); |
1194 |
+ |
1195 |
+ if (access & IB_ACCESS_ON_DEMAND) { |
1196 |
+ put_pid(umem->pid); |
1197 |
+diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c |
1198 |
+index ce87e9cc7eff..bf52e35dd506 100644 |
1199 |
+--- a/drivers/infiniband/hw/mlx4/mr.c |
1200 |
++++ b/drivers/infiniband/hw/mlx4/mr.c |
1201 |
+@@ -130,6 +130,40 @@ out: |
1202 |
+ return err; |
1203 |
+ } |
1204 |
+ |
1205 |
++static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start, |
1206 |
++ u64 length, u64 virt_addr, |
1207 |
++ int access_flags) |
1208 |
++{ |
1209 |
++ /* |
1210 |
++ * Force registering the memory as writable if the underlying pages |
1211 |
++ * are writable. This is so rereg can change the access permissions |
1212 |
++ * from readable to writable without having to run through ib_umem_get |
1213 |
++ * again |
1214 |
++ */ |
1215 |
++ if (!ib_access_writable(access_flags)) { |
1216 |
++ struct vm_area_struct *vma; |
1217 |
++ |
1218 |
++ down_read(¤t->mm->mmap_sem); |
1219 |
++ /* |
1220 |
++ * FIXME: Ideally this would iterate over all the vmas that |
1221 |
++ * cover the memory, but for now it requires a single vma to |
1222 |
++ * entirely cover the MR to support RO mappings. |
1223 |
++ */ |
1224 |
++ vma = find_vma(current->mm, start); |
1225 |
++ if (vma && vma->vm_end >= start + length && |
1226 |
++ vma->vm_start <= start) { |
1227 |
++ if (vma->vm_flags & VM_WRITE) |
1228 |
++ access_flags |= IB_ACCESS_LOCAL_WRITE; |
1229 |
++ } else { |
1230 |
++ access_flags |= IB_ACCESS_LOCAL_WRITE; |
1231 |
++ } |
1232 |
++ |
1233 |
++ up_read(¤t->mm->mmap_sem); |
1234 |
++ } |
1235 |
++ |
1236 |
++ return ib_umem_get(context, start, length, access_flags, 0); |
1237 |
++} |
1238 |
++ |
1239 |
+ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, |
1240 |
+ u64 virt_addr, int access_flags, |
1241 |
+ struct ib_udata *udata) |
1242 |
+@@ -144,10 +178,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, |
1243 |
+ if (!mr) |
1244 |
+ return ERR_PTR(-ENOMEM); |
1245 |
+ |
1246 |
+- /* Force registering the memory as writable. */ |
1247 |
+- /* Used for memory re-registeration. HCA protects the access */ |
1248 |
+- mr->umem = ib_umem_get(pd->uobject->context, start, length, |
1249 |
+- access_flags | IB_ACCESS_LOCAL_WRITE, 0); |
1250 |
++ mr->umem = mlx4_get_umem_mr(pd->uobject->context, start, length, |
1251 |
++ virt_addr, access_flags); |
1252 |
+ if (IS_ERR(mr->umem)) { |
1253 |
+ err = PTR_ERR(mr->umem); |
1254 |
+ goto err_free; |
1255 |
+@@ -214,6 +246,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, |
1256 |
+ } |
1257 |
+ |
1258 |
+ if (flags & IB_MR_REREG_ACCESS) { |
1259 |
++ if (ib_access_writable(mr_access_flags) && !mmr->umem->writable) |
1260 |
++ return -EPERM; |
1261 |
++ |
1262 |
+ err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry, |
1263 |
+ convert_access(mr_access_flags)); |
1264 |
+ |
1265 |
+@@ -227,10 +262,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, |
1266 |
+ |
1267 |
+ mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); |
1268 |
+ ib_umem_release(mmr->umem); |
1269 |
+- mmr->umem = ib_umem_get(mr->uobject->context, start, length, |
1270 |
+- mr_access_flags | |
1271 |
+- IB_ACCESS_LOCAL_WRITE, |
1272 |
+- 0); |
1273 |
++ mmr->umem = |
1274 |
++ mlx4_get_umem_mr(mr->uobject->context, start, length, |
1275 |
++ virt_addr, mr_access_flags); |
1276 |
+ if (IS_ERR(mmr->umem)) { |
1277 |
+ err = PTR_ERR(mmr->umem); |
1278 |
+ /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */ |
1279 |
+diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c |
1280 |
+index 748b63b86cbc..40242ead096f 100644 |
1281 |
+--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c |
1282 |
++++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c |
1283 |
+@@ -643,7 +643,7 @@ static ssize_t ocrdma_dbgfs_ops_write(struct file *filp, |
1284 |
+ struct ocrdma_stats *pstats = filp->private_data; |
1285 |
+ struct ocrdma_dev *dev = pstats->dev; |
1286 |
+ |
1287 |
+- if (count > 32) |
1288 |
++ if (*ppos != 0 || count == 0 || count > sizeof(tmp_str)) |
1289 |
+ goto err; |
1290 |
+ |
1291 |
+ if (copy_from_user(tmp_str, buffer, count)) |
1292 |
+diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c |
1293 |
+index bec9f099573b..68d0a5c9d437 100644 |
1294 |
+--- a/drivers/net/xen-netfront.c |
1295 |
++++ b/drivers/net/xen-netfront.c |
1296 |
+@@ -879,7 +879,6 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, |
1297 |
+ struct sk_buff *skb, |
1298 |
+ struct sk_buff_head *list) |
1299 |
+ { |
1300 |
+- struct skb_shared_info *shinfo = skb_shinfo(skb); |
1301 |
+ RING_IDX cons = queue->rx.rsp_cons; |
1302 |
+ struct sk_buff *nskb; |
1303 |
+ |
1304 |
+@@ -888,15 +887,16 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, |
1305 |
+ RING_GET_RESPONSE(&queue->rx, ++cons); |
1306 |
+ skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0]; |
1307 |
+ |
1308 |
+- if (shinfo->nr_frags == MAX_SKB_FRAGS) { |
1309 |
++ if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) { |
1310 |
+ unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; |
1311 |
+ |
1312 |
+ BUG_ON(pull_to <= skb_headlen(skb)); |
1313 |
+ __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); |
1314 |
+ } |
1315 |
+- BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS); |
1316 |
++ BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS); |
1317 |
+ |
1318 |
+- skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag), |
1319 |
++ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, |
1320 |
++ skb_frag_page(nfrag), |
1321 |
+ rx->offset, rx->status, PAGE_SIZE); |
1322 |
+ |
1323 |
+ skb_shinfo(nskb)->nr_frags = 0; |
1324 |
+diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c |
1325 |
+index de53c9694b68..5dc288fecace 100644 |
1326 |
+--- a/drivers/scsi/sr.c |
1327 |
++++ b/drivers/scsi/sr.c |
1328 |
+@@ -520,18 +520,26 @@ static int sr_init_command(struct scsi_cmnd *SCpnt) |
1329 |
+ static int sr_block_open(struct block_device *bdev, fmode_t mode) |
1330 |
+ { |
1331 |
+ struct scsi_cd *cd; |
1332 |
++ struct scsi_device *sdev; |
1333 |
+ int ret = -ENXIO; |
1334 |
+ |
1335 |
++ cd = scsi_cd_get(bdev->bd_disk); |
1336 |
++ if (!cd) |
1337 |
++ goto out; |
1338 |
++ |
1339 |
++ sdev = cd->device; |
1340 |
++ scsi_autopm_get_device(sdev); |
1341 |
+ check_disk_change(bdev); |
1342 |
+ |
1343 |
+ mutex_lock(&sr_mutex); |
1344 |
+- cd = scsi_cd_get(bdev->bd_disk); |
1345 |
+- if (cd) { |
1346 |
+- ret = cdrom_open(&cd->cdi, bdev, mode); |
1347 |
+- if (ret) |
1348 |
+- scsi_cd_put(cd); |
1349 |
+- } |
1350 |
++ ret = cdrom_open(&cd->cdi, bdev, mode); |
1351 |
+ mutex_unlock(&sr_mutex); |
1352 |
++ |
1353 |
++ scsi_autopm_put_device(sdev); |
1354 |
++ if (ret) |
1355 |
++ scsi_cd_put(cd); |
1356 |
++ |
1357 |
++out: |
1358 |
+ return ret; |
1359 |
+ } |
1360 |
+ |
1361 |
+@@ -559,6 +567,8 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, |
1362 |
+ if (ret) |
1363 |
+ goto out; |
1364 |
+ |
1365 |
++ scsi_autopm_get_device(sdev); |
1366 |
++ |
1367 |
+ /* |
1368 |
+ * Send SCSI addressing ioctls directly to mid level, send other |
1369 |
+ * ioctls to cdrom/block level. |
1370 |
+@@ -567,15 +577,18 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, |
1371 |
+ case SCSI_IOCTL_GET_IDLUN: |
1372 |
+ case SCSI_IOCTL_GET_BUS_NUMBER: |
1373 |
+ ret = scsi_ioctl(sdev, cmd, argp); |
1374 |
+- goto out; |
1375 |
++ goto put; |
1376 |
+ } |
1377 |
+ |
1378 |
+ ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg); |
1379 |
+ if (ret != -ENOSYS) |
1380 |
+- goto out; |
1381 |
++ goto put; |
1382 |
+ |
1383 |
+ ret = scsi_ioctl(sdev, cmd, argp); |
1384 |
+ |
1385 |
++put: |
1386 |
++ scsi_autopm_put_device(sdev); |
1387 |
++ |
1388 |
+ out: |
1389 |
+ mutex_unlock(&sr_mutex); |
1390 |
+ return ret; |
1391 |
+diff --git a/fs/dcache.c b/fs/dcache.c |
1392 |
+index 250c1222e30c..807efaab838e 100644 |
1393 |
+--- a/fs/dcache.c |
1394 |
++++ b/fs/dcache.c |
1395 |
+@@ -1954,10 +1954,12 @@ struct dentry *d_make_root(struct inode *root_inode) |
1396 |
+ static const struct qstr name = QSTR_INIT("/", 1); |
1397 |
+ |
1398 |
+ res = __d_alloc(root_inode->i_sb, &name); |
1399 |
+- if (res) |
1400 |
++ if (res) { |
1401 |
++ res->d_flags |= DCACHE_RCUACCESS; |
1402 |
+ d_instantiate(res, root_inode); |
1403 |
+- else |
1404 |
++ } else { |
1405 |
+ iput(root_inode); |
1406 |
++ } |
1407 |
+ } |
1408 |
+ return res; |
1409 |
+ } |
1410 |
+diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c |
1411 |
+index 041117fd8fd7..0963213e9cd3 100644 |
1412 |
+--- a/fs/ext4/ialloc.c |
1413 |
++++ b/fs/ext4/ialloc.c |
1414 |
+@@ -1308,7 +1308,10 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, |
1415 |
+ ext4_itable_unused_count(sb, gdp)), |
1416 |
+ sbi->s_inodes_per_block); |
1417 |
+ |
1418 |
+- if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { |
1419 |
++ if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group) || |
1420 |
++ ((group == 0) && ((EXT4_INODES_PER_GROUP(sb) - |
1421 |
++ ext4_itable_unused_count(sb, gdp)) < |
1422 |
++ EXT4_FIRST_INO(sb)))) { |
1423 |
+ ext4_error(sb, "Something is wrong with group %u: " |
1424 |
+ "used itable blocks: %d; " |
1425 |
+ "itable unused count: %u", |
1426 |
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c |
1427 |
+index 3e4d8ac1974e..8d18f6142da5 100644 |
1428 |
+--- a/fs/ext4/super.c |
1429 |
++++ b/fs/ext4/super.c |
1430 |
+@@ -2875,14 +2875,8 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) |
1431 |
+ if (!gdp) |
1432 |
+ continue; |
1433 |
+ |
1434 |
+- if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) |
1435 |
+- continue; |
1436 |
+- if (group != 0) |
1437 |
++ if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) |
1438 |
+ break; |
1439 |
+- ext4_error(sb, "Inode table for bg 0 marked as " |
1440 |
+- "needing zeroing"); |
1441 |
+- if (sb->s_flags & MS_RDONLY) |
1442 |
+- return ngroups; |
1443 |
+ } |
1444 |
+ |
1445 |
+ return group; |
1446 |
+diff --git a/fs/namespace.c b/fs/namespace.c |
1447 |
+index a879560ea144..b56b50e3da11 100644 |
1448 |
+--- a/fs/namespace.c |
1449 |
++++ b/fs/namespace.c |
1450 |
+@@ -603,12 +603,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) |
1451 |
+ return 0; |
1452 |
+ mnt = real_mount(bastard); |
1453 |
+ mnt_add_count(mnt, 1); |
1454 |
++ smp_mb(); // see mntput_no_expire() |
1455 |
+ if (likely(!read_seqretry(&mount_lock, seq))) |
1456 |
+ return 0; |
1457 |
+ if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { |
1458 |
+ mnt_add_count(mnt, -1); |
1459 |
+ return 1; |
1460 |
+ } |
1461 |
++ lock_mount_hash(); |
1462 |
++ if (unlikely(bastard->mnt_flags & MNT_DOOMED)) { |
1463 |
++ mnt_add_count(mnt, -1); |
1464 |
++ unlock_mount_hash(); |
1465 |
++ return 1; |
1466 |
++ } |
1467 |
++ unlock_mount_hash(); |
1468 |
++ /* caller will mntput() */ |
1469 |
+ return -1; |
1470 |
+ } |
1471 |
+ |
1472 |
+@@ -1124,12 +1133,27 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); |
1473 |
+ static void mntput_no_expire(struct mount *mnt) |
1474 |
+ { |
1475 |
+ rcu_read_lock(); |
1476 |
+- mnt_add_count(mnt, -1); |
1477 |
+- if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ |
1478 |
++ if (likely(READ_ONCE(mnt->mnt_ns))) { |
1479 |
++ /* |
1480 |
++ * Since we don't do lock_mount_hash() here, |
1481 |
++ * ->mnt_ns can change under us. However, if it's |
1482 |
++ * non-NULL, then there's a reference that won't |
1483 |
++ * be dropped until after an RCU delay done after |
1484 |
++ * turning ->mnt_ns NULL. So if we observe it |
1485 |
++ * non-NULL under rcu_read_lock(), the reference |
1486 |
++ * we are dropping is not the final one. |
1487 |
++ */ |
1488 |
++ mnt_add_count(mnt, -1); |
1489 |
+ rcu_read_unlock(); |
1490 |
+ return; |
1491 |
+ } |
1492 |
+ lock_mount_hash(); |
1493 |
++ /* |
1494 |
++ * make sure that if __legitimize_mnt() has not seen us grab |
1495 |
++ * mount_lock, we'll see their refcount increment here. |
1496 |
++ */ |
1497 |
++ smp_mb(); |
1498 |
++ mnt_add_count(mnt, -1); |
1499 |
+ if (mnt_get_count(mnt)) { |
1500 |
+ rcu_read_unlock(); |
1501 |
+ unlock_mount_hash(); |
1502 |
+diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h |
1503 |
+index 25b793325b09..dabecb661264 100644 |
1504 |
+--- a/include/asm-generic/pgtable.h |
1505 |
++++ b/include/asm-generic/pgtable.h |
1506 |
+@@ -799,6 +799,18 @@ static inline int pmd_free_pte_page(pmd_t *pmd) |
1507 |
+ } |
1508 |
+ #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ |
1509 |
+ |
1510 |
++#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED |
1511 |
++static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) |
1512 |
++{ |
1513 |
++ return true; |
1514 |
++} |
1515 |
++ |
1516 |
++static inline bool arch_has_pfn_modify_check(void) |
1517 |
++{ |
1518 |
++ return false; |
1519 |
++} |
1520 |
++#endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */ |
1521 |
++ |
1522 |
+ #endif /* !__ASSEMBLY__ */ |
1523 |
+ |
1524 |
+ #ifndef io_remap_pfn_range |
1525 |
+diff --git a/include/linux/cpu.h b/include/linux/cpu.h |
1526 |
+index 2f9d12022100..063c73ed6d78 100644 |
1527 |
+--- a/include/linux/cpu.h |
1528 |
++++ b/include/linux/cpu.h |
1529 |
+@@ -48,6 +48,8 @@ extern ssize_t cpu_show_spectre_v2(struct device *dev, |
1530 |
+ struct device_attribute *attr, char *buf); |
1531 |
+ extern ssize_t cpu_show_spec_store_bypass(struct device *dev, |
1532 |
+ struct device_attribute *attr, char *buf); |
1533 |
++extern ssize_t cpu_show_l1tf(struct device *dev, |
1534 |
++ struct device_attribute *attr, char *buf); |
1535 |
+ |
1536 |
+ extern __printf(4, 5) |
1537 |
+ struct device *cpu_device_create(struct device *parent, void *drvdata, |
1538 |
+diff --git a/include/linux/mm.h b/include/linux/mm.h |
1539 |
+index a100946607a5..1f4366567e7d 100644 |
1540 |
+--- a/include/linux/mm.h |
1541 |
++++ b/include/linux/mm.h |
1542 |
+@@ -2083,6 +2083,8 @@ int remap_pfn_range(struct vm_area_struct *, unsigned long addr, |
1543 |
+ int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); |
1544 |
+ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, |
1545 |
+ unsigned long pfn); |
1546 |
++int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, |
1547 |
++ unsigned long pfn, pgprot_t pgprot); |
1548 |
+ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, |
1549 |
+ unsigned long pfn); |
1550 |
+ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); |
1551 |
+diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h |
1552 |
+index 388293a91e8c..e4594de79bc4 100644 |
1553 |
+--- a/include/linux/swapfile.h |
1554 |
++++ b/include/linux/swapfile.h |
1555 |
+@@ -9,5 +9,7 @@ extern spinlock_t swap_lock; |
1556 |
+ extern struct plist_head swap_active_head; |
1557 |
+ extern struct swap_info_struct *swap_info[]; |
1558 |
+ extern int try_to_unuse(unsigned int, bool, unsigned long); |
1559 |
++extern unsigned long generic_max_swapfile_size(void); |
1560 |
++extern unsigned long max_swapfile_size(void); |
1561 |
+ |
1562 |
+ #endif /* _LINUX_SWAPFILE_H */ |
1563 |
+diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h |
1564 |
+index ff307b548ed3..646891f3bc1e 100644 |
1565 |
+--- a/include/linux/thread_info.h |
1566 |
++++ b/include/linux/thread_info.h |
1567 |
+@@ -55,11 +55,7 @@ extern long do_no_restart_syscall(struct restart_block *parm); |
1568 |
+ |
1569 |
+ #ifdef __KERNEL__ |
1570 |
+ |
1571 |
+-#ifdef CONFIG_DEBUG_STACK_USAGE |
1572 |
+-# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) |
1573 |
+-#else |
1574 |
+-# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK) |
1575 |
+-#endif |
1576 |
++#define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) |
1577 |
+ |
1578 |
+ /* |
1579 |
+ * flag set/clear/test wrappers |
1580 |
+diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h |
1581 |
+index 120da1d7f57e..10fefb0dc640 100644 |
1582 |
+--- a/include/rdma/ib_verbs.h |
1583 |
++++ b/include/rdma/ib_verbs.h |
1584 |
+@@ -3007,6 +3007,20 @@ static inline int ib_check_mr_access(int flags) |
1585 |
+ return 0; |
1586 |
+ } |
1587 |
+ |
1588 |
++static inline bool ib_access_writable(int access_flags) |
1589 |
++{ |
1590 |
++ /* |
1591 |
++ * We have writable memory backing the MR if any of the following |
1592 |
++ * access flags are set. "Local write" and "remote write" obviously |
1593 |
++ * require write access. "Remote atomic" can do things like fetch and |
1594 |
++ * add, which will modify memory, and "MW bind" can change permissions |
1595 |
++ * by binding a window. |
1596 |
++ */ |
1597 |
++ return access_flags & |
1598 |
++ (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | |
1599 |
++ IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND); |
1600 |
++} |
1601 |
++ |
1602 |
+ /** |
1603 |
+ * ib_check_mr_status: lightweight check of MR status. |
1604 |
+ * This routine may provide status checks on a selected |
1605 |
+diff --git a/mm/memory.c b/mm/memory.c |
1606 |
+index 177cb7d111a9..d5bb1465d30c 100644 |
1607 |
+--- a/mm/memory.c |
1608 |
++++ b/mm/memory.c |
1609 |
+@@ -1604,9 +1604,30 @@ out: |
1610 |
+ */ |
1611 |
+ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, |
1612 |
+ unsigned long pfn) |
1613 |
++{ |
1614 |
++ return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); |
1615 |
++} |
1616 |
++EXPORT_SYMBOL(vm_insert_pfn); |
1617 |
++ |
1618 |
++/** |
1619 |
++ * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot |
1620 |
++ * @vma: user vma to map to |
1621 |
++ * @addr: target user address of this page |
1622 |
++ * @pfn: source kernel pfn |
1623 |
++ * @pgprot: pgprot flags for the inserted page |
1624 |
++ * |
1625 |
++ * This is exactly like vm_insert_pfn, except that it allows drivers to |
1626 |
++ * to override pgprot on a per-page basis. |
1627 |
++ * |
1628 |
++ * This only makes sense for IO mappings, and it makes no sense for |
1629 |
++ * cow mappings. In general, using multiple vmas is preferable; |
1630 |
++ * vm_insert_pfn_prot should only be used if using multiple VMAs is |
1631 |
++ * impractical. |
1632 |
++ */ |
1633 |
++int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, |
1634 |
++ unsigned long pfn, pgprot_t pgprot) |
1635 |
+ { |
1636 |
+ int ret; |
1637 |
+- pgprot_t pgprot = vma->vm_page_prot; |
1638 |
+ /* |
1639 |
+ * Technically, architectures with pte_special can avoid all these |
1640 |
+ * restrictions (same for remap_pfn_range). However we would like |
1641 |
+@@ -1624,19 +1645,29 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, |
1642 |
+ if (track_pfn_insert(vma, &pgprot, pfn)) |
1643 |
+ return -EINVAL; |
1644 |
+ |
1645 |
++ if (!pfn_modify_allowed(pfn, pgprot)) |
1646 |
++ return -EACCES; |
1647 |
++ |
1648 |
+ ret = insert_pfn(vma, addr, pfn, pgprot); |
1649 |
+ |
1650 |
+ return ret; |
1651 |
+ } |
1652 |
+-EXPORT_SYMBOL(vm_insert_pfn); |
1653 |
++EXPORT_SYMBOL(vm_insert_pfn_prot); |
1654 |
+ |
1655 |
+ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, |
1656 |
+ unsigned long pfn) |
1657 |
+ { |
1658 |
++ pgprot_t pgprot = vma->vm_page_prot; |
1659 |
++ |
1660 |
+ BUG_ON(!(vma->vm_flags & VM_MIXEDMAP)); |
1661 |
+ |
1662 |
+ if (addr < vma->vm_start || addr >= vma->vm_end) |
1663 |
+ return -EFAULT; |
1664 |
++ if (track_pfn_insert(vma, &pgprot, pfn)) |
1665 |
++ return -EINVAL; |
1666 |
++ |
1667 |
++ if (!pfn_modify_allowed(pfn, pgprot)) |
1668 |
++ return -EACCES; |
1669 |
+ |
1670 |
+ /* |
1671 |
+ * If we don't have pte special, then we have to use the pfn_valid() |
1672 |
+@@ -1649,9 +1680,9 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, |
1673 |
+ struct page *page; |
1674 |
+ |
1675 |
+ page = pfn_to_page(pfn); |
1676 |
+- return insert_page(vma, addr, page, vma->vm_page_prot); |
1677 |
++ return insert_page(vma, addr, page, pgprot); |
1678 |
+ } |
1679 |
+- return insert_pfn(vma, addr, pfn, vma->vm_page_prot); |
1680 |
++ return insert_pfn(vma, addr, pfn, pgprot); |
1681 |
+ } |
1682 |
+ EXPORT_SYMBOL(vm_insert_mixed); |
1683 |
+ |
1684 |
+@@ -1666,6 +1697,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, |
1685 |
+ { |
1686 |
+ pte_t *pte; |
1687 |
+ spinlock_t *ptl; |
1688 |
++ int err = 0; |
1689 |
+ |
1690 |
+ pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); |
1691 |
+ if (!pte) |
1692 |
+@@ -1673,12 +1705,16 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, |
1693 |
+ arch_enter_lazy_mmu_mode(); |
1694 |
+ do { |
1695 |
+ BUG_ON(!pte_none(*pte)); |
1696 |
++ if (!pfn_modify_allowed(pfn, prot)) { |
1697 |
++ err = -EACCES; |
1698 |
++ break; |
1699 |
++ } |
1700 |
+ set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot))); |
1701 |
+ pfn++; |
1702 |
+ } while (pte++, addr += PAGE_SIZE, addr != end); |
1703 |
+ arch_leave_lazy_mmu_mode(); |
1704 |
+ pte_unmap_unlock(pte - 1, ptl); |
1705 |
+- return 0; |
1706 |
++ return err; |
1707 |
+ } |
1708 |
+ |
1709 |
+ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, |
1710 |
+@@ -1687,6 +1723,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, |
1711 |
+ { |
1712 |
+ pmd_t *pmd; |
1713 |
+ unsigned long next; |
1714 |
++ int err; |
1715 |
+ |
1716 |
+ pfn -= addr >> PAGE_SHIFT; |
1717 |
+ pmd = pmd_alloc(mm, pud, addr); |
1718 |
+@@ -1695,9 +1732,10 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, |
1719 |
+ VM_BUG_ON(pmd_trans_huge(*pmd)); |
1720 |
+ do { |
1721 |
+ next = pmd_addr_end(addr, end); |
1722 |
+- if (remap_pte_range(mm, pmd, addr, next, |
1723 |
+- pfn + (addr >> PAGE_SHIFT), prot)) |
1724 |
+- return -ENOMEM; |
1725 |
++ err = remap_pte_range(mm, pmd, addr, next, |
1726 |
++ pfn + (addr >> PAGE_SHIFT), prot); |
1727 |
++ if (err) |
1728 |
++ return err; |
1729 |
+ } while (pmd++, addr = next, addr != end); |
1730 |
+ return 0; |
1731 |
+ } |
1732 |
+@@ -1708,6 +1746,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, |
1733 |
+ { |
1734 |
+ pud_t *pud; |
1735 |
+ unsigned long next; |
1736 |
++ int err; |
1737 |
+ |
1738 |
+ pfn -= addr >> PAGE_SHIFT; |
1739 |
+ pud = pud_alloc(mm, pgd, addr); |
1740 |
+@@ -1715,9 +1754,10 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, |
1741 |
+ return -ENOMEM; |
1742 |
+ do { |
1743 |
+ next = pud_addr_end(addr, end); |
1744 |
+- if (remap_pmd_range(mm, pud, addr, next, |
1745 |
+- pfn + (addr >> PAGE_SHIFT), prot)) |
1746 |
+- return -ENOMEM; |
1747 |
++ err = remap_pmd_range(mm, pud, addr, next, |
1748 |
++ pfn + (addr >> PAGE_SHIFT), prot); |
1749 |
++ if (err) |
1750 |
++ return err; |
1751 |
+ } while (pud++, addr = next, addr != end); |
1752 |
+ return 0; |
1753 |
+ } |
1754 |
+diff --git a/mm/mprotect.c b/mm/mprotect.c |
1755 |
+index c0b4b2a49462..a277f3412a5d 100644 |
1756 |
+--- a/mm/mprotect.c |
1757 |
++++ b/mm/mprotect.c |
1758 |
+@@ -255,6 +255,42 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, |
1759 |
+ return pages; |
1760 |
+ } |
1761 |
+ |
1762 |
++static int prot_none_pte_entry(pte_t *pte, unsigned long addr, |
1763 |
++ unsigned long next, struct mm_walk *walk) |
1764 |
++{ |
1765 |
++ return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ? |
1766 |
++ 0 : -EACCES; |
1767 |
++} |
1768 |
++ |
1769 |
++static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask, |
1770 |
++ unsigned long addr, unsigned long next, |
1771 |
++ struct mm_walk *walk) |
1772 |
++{ |
1773 |
++ return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ? |
1774 |
++ 0 : -EACCES; |
1775 |
++} |
1776 |
++ |
1777 |
++static int prot_none_test(unsigned long addr, unsigned long next, |
1778 |
++ struct mm_walk *walk) |
1779 |
++{ |
1780 |
++ return 0; |
1781 |
++} |
1782 |
++ |
1783 |
++static int prot_none_walk(struct vm_area_struct *vma, unsigned long start, |
1784 |
++ unsigned long end, unsigned long newflags) |
1785 |
++{ |
1786 |
++ pgprot_t new_pgprot = vm_get_page_prot(newflags); |
1787 |
++ struct mm_walk prot_none_walk = { |
1788 |
++ .pte_entry = prot_none_pte_entry, |
1789 |
++ .hugetlb_entry = prot_none_hugetlb_entry, |
1790 |
++ .test_walk = prot_none_test, |
1791 |
++ .mm = current->mm, |
1792 |
++ .private = &new_pgprot, |
1793 |
++ }; |
1794 |
++ |
1795 |
++ return walk_page_range(start, end, &prot_none_walk); |
1796 |
++} |
1797 |
++ |
1798 |
+ int |
1799 |
+ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, |
1800 |
+ unsigned long start, unsigned long end, unsigned long newflags) |
1801 |
+@@ -272,6 +308,19 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, |
1802 |
+ return 0; |
1803 |
+ } |
1804 |
+ |
1805 |
++ /* |
1806 |
++ * Do PROT_NONE PFN permission checks here when we can still |
1807 |
++ * bail out without undoing a lot of state. This is a rather |
1808 |
++ * uncommon case, so doesn't need to be very optimized. |
1809 |
++ */ |
1810 |
++ if (arch_has_pfn_modify_check() && |
1811 |
++ (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && |
1812 |
++ (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) { |
1813 |
++ error = prot_none_walk(vma, start, end, newflags); |
1814 |
++ if (error) |
1815 |
++ return error; |
1816 |
++ } |
1817 |
++ |
1818 |
+ /* |
1819 |
+ * If we make a private mapping writable we increase our commit; |
1820 |
+ * but (without finer accounting) cannot reduce our commit if we |
1821 |
+diff --git a/mm/swapfile.c b/mm/swapfile.c |
1822 |
+index 674bf177ce44..8e25ff2b693a 100644 |
1823 |
+--- a/mm/swapfile.c |
1824 |
++++ b/mm/swapfile.c |
1825 |
+@@ -2206,6 +2206,35 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) |
1826 |
+ return 0; |
1827 |
+ } |
1828 |
+ |
1829 |
++ |
1830 |
++/* |
1831 |
++ * Find out how many pages are allowed for a single swap device. There |
1832 |
++ * are two limiting factors: |
1833 |
++ * 1) the number of bits for the swap offset in the swp_entry_t type, and |
1834 |
++ * 2) the number of bits in the swap pte, as defined by the different |
1835 |
++ * architectures. |
1836 |
++ * |
1837 |
++ * In order to find the largest possible bit mask, a swap entry with |
1838 |
++ * swap type 0 and swap offset ~0UL is created, encoded to a swap pte, |
1839 |
++ * decoded to a swp_entry_t again, and finally the swap offset is |
1840 |
++ * extracted. |
1841 |
++ * |
1842 |
++ * This will mask all the bits from the initial ~0UL mask that can't |
1843 |
++ * be encoded in either the swp_entry_t or the architecture definition |
1844 |
++ * of a swap pte. |
1845 |
++ */ |
1846 |
++unsigned long generic_max_swapfile_size(void) |
1847 |
++{ |
1848 |
++ return swp_offset(pte_to_swp_entry( |
1849 |
++ swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; |
1850 |
++} |
1851 |
++ |
1852 |
++/* Can be overridden by an architecture for additional checks. */ |
1853 |
++__weak unsigned long max_swapfile_size(void) |
1854 |
++{ |
1855 |
++ return generic_max_swapfile_size(); |
1856 |
++} |
1857 |
++ |
1858 |
+ static unsigned long read_swap_header(struct swap_info_struct *p, |
1859 |
+ union swap_header *swap_header, |
1860 |
+ struct inode *inode) |
1861 |
+@@ -2241,22 +2270,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p, |
1862 |
+ p->cluster_next = 1; |
1863 |
+ p->cluster_nr = 0; |
1864 |
+ |
1865 |
+- /* |
1866 |
+- * Find out how many pages are allowed for a single swap |
1867 |
+- * device. There are two limiting factors: 1) the number |
1868 |
+- * of bits for the swap offset in the swp_entry_t type, and |
1869 |
+- * 2) the number of bits in the swap pte as defined by the |
1870 |
+- * different architectures. In order to find the |
1871 |
+- * largest possible bit mask, a swap entry with swap type 0 |
1872 |
+- * and swap offset ~0UL is created, encoded to a swap pte, |
1873 |
+- * decoded to a swp_entry_t again, and finally the swap |
1874 |
+- * offset is extracted. This will mask all the bits from |
1875 |
+- * the initial ~0UL mask that can't be encoded in either |
1876 |
+- * the swp_entry_t or the architecture definition of a |
1877 |
+- * swap pte. |
1878 |
+- */ |
1879 |
+- maxpages = swp_offset(pte_to_swp_entry( |
1880 |
+- swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; |
1881 |
++ maxpages = max_swapfile_size(); |
1882 |
+ last_page = swap_header->info.last_page; |
1883 |
+ if (!last_page) { |
1884 |
+ pr_warn("Empty swap-file\n"); |
1885 |
+diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig |
1886 |
+index 93581bba8643..09d6c4a6b53d 100644 |
1887 |
+--- a/net/ipv4/Kconfig |
1888 |
++++ b/net/ipv4/Kconfig |
1889 |
+@@ -354,6 +354,7 @@ config INET_ESP |
1890 |
+ select CRYPTO_CBC |
1891 |
+ select CRYPTO_SHA1 |
1892 |
+ select CRYPTO_DES |
1893 |
++ select CRYPTO_ECHAINIV |
1894 |
+ ---help--- |
1895 |
+ Support for IPsec ESP. |
1896 |
+ |
1897 |
+diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig |
1898 |
+index 851d5c9e3ecc..0f50248bad17 100644 |
1899 |
+--- a/net/ipv6/Kconfig |
1900 |
++++ b/net/ipv6/Kconfig |
1901 |
+@@ -69,6 +69,7 @@ config INET6_ESP |
1902 |
+ select CRYPTO_CBC |
1903 |
+ select CRYPTO_SHA1 |
1904 |
+ select CRYPTO_DES |
1905 |
++ select CRYPTO_ECHAINIV |
1906 |
+ ---help--- |
1907 |
+ Support for IPsec ESP. |
1908 |
+ |