1 |
commit: ac9a704fb734a73e6d12926722e3d82046188f22 |
2 |
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
3 |
AuthorDate: Sat Jul 28 10:36:54 2018 +0000 |
4 |
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
5 |
CommitDate: Wed Nov 21 15:01:03 2018 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=ac9a704f |
7 |
|
8 |
Linux patch 4.4.144 and 4.4.145 |
9 |
|
10 |
Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org> |
11 |
|
12 |
0000_README | 8 + |
13 |
1143_linux-4.4.144.patch | 4228 ++++++++++++++++++++++++++++++++++++++++++++++ |
14 |
1144_linux-4.4.145.patch | 1006 +++++++++++ |
15 |
3 files changed, 5242 insertions(+) |
16 |
|
17 |
diff --git a/0000_README b/0000_README |
18 |
index 42e6d1f..5149ed7 100644 |
19 |
--- a/0000_README |
20 |
+++ b/0000_README |
21 |
@@ -615,6 +615,14 @@ Patch: 1142_linux-4.4.143.patch |
22 |
From: http://www.kernel.org |
23 |
Desc: Linux 4.4.143 |
24 |
|
25 |
+Patch: 1143_linux-4.4.144.patch |
26 |
+From: http://www.kernel.org |
27 |
+Desc: Linux 4.4.144 |
28 |
+ |
29 |
+Patch: 1144_linux-4.4.145.patch |
30 |
+From: http://www.kernel.org |
31 |
+Desc: Linux 4.4.145 |
32 |
+ |
33 |
Patch: 1500_XATTR_USER_PREFIX.patch |
34 |
From: https://bugs.gentoo.org/show_bug.cgi?id=470644 |
35 |
Desc: Support for namespace user.pax.* on tmpfs. |
36 |
|
37 |
diff --git a/1143_linux-4.4.144.patch b/1143_linux-4.4.144.patch |
38 |
new file mode 100644 |
39 |
index 0000000..d0155cc |
40 |
--- /dev/null |
41 |
+++ b/1143_linux-4.4.144.patch |
42 |
@@ -0,0 +1,4228 @@ |
43 |
+diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu |
44 |
+index ea6a043f5beb..50f95689ab38 100644 |
45 |
+--- a/Documentation/ABI/testing/sysfs-devices-system-cpu |
46 |
++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu |
47 |
+@@ -276,6 +276,7 @@ What: /sys/devices/system/cpu/vulnerabilities |
48 |
+ /sys/devices/system/cpu/vulnerabilities/meltdown |
49 |
+ /sys/devices/system/cpu/vulnerabilities/spectre_v1 |
50 |
+ /sys/devices/system/cpu/vulnerabilities/spectre_v2 |
51 |
++ /sys/devices/system/cpu/vulnerabilities/spec_store_bypass |
52 |
+ Date: January 2018 |
53 |
+ Contact: Linux kernel mailing list <linux-kernel@×××××××××××.org> |
54 |
+ Description: Information about CPU vulnerabilities |
55 |
+diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt |
56 |
+index e60d0b5809c1..3fd53e193b7f 100644 |
57 |
+--- a/Documentation/kernel-parameters.txt |
58 |
++++ b/Documentation/kernel-parameters.txt |
59 |
+@@ -2460,6 +2460,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. |
60 |
+ allow data leaks with this option, which is equivalent |
61 |
+ to spectre_v2=off. |
62 |
+ |
63 |
++ nospec_store_bypass_disable |
64 |
++ [HW] Disable all mitigations for the Speculative Store Bypass vulnerability |
65 |
++ |
66 |
+ noxsave [BUGS=X86] Disables x86 extended register state save |
67 |
+ and restore using xsave. The kernel will fallback to |
68 |
+ enabling legacy floating-point and sse state. |
69 |
+@@ -3623,6 +3626,48 @@ bytes respectively. Such letter suffixes can also be entirely omitted. |
70 |
+ Not specifying this option is equivalent to |
71 |
+ spectre_v2=auto. |
72 |
+ |
73 |
++ spec_store_bypass_disable= |
74 |
++ [HW] Control Speculative Store Bypass (SSB) Disable mitigation |
75 |
++ (Speculative Store Bypass vulnerability) |
76 |
++ |
77 |
++ Certain CPUs are vulnerable to an exploit against a |
78 |
++ a common industry wide performance optimization known |
79 |
++ as "Speculative Store Bypass" in which recent stores |
80 |
++ to the same memory location may not be observed by |
81 |
++ later loads during speculative execution. The idea |
82 |
++ is that such stores are unlikely and that they can |
83 |
++ be detected prior to instruction retirement at the |
84 |
++ end of a particular speculation execution window. |
85 |
++ |
86 |
++ In vulnerable processors, the speculatively forwarded |
87 |
++ store can be used in a cache side channel attack, for |
88 |
++ example to read memory to which the attacker does not |
89 |
++ directly have access (e.g. inside sandboxed code). |
90 |
++ |
91 |
++ This parameter controls whether the Speculative Store |
92 |
++ Bypass optimization is used. |
93 |
++ |
94 |
++ on - Unconditionally disable Speculative Store Bypass |
95 |
++ off - Unconditionally enable Speculative Store Bypass |
96 |
++ auto - Kernel detects whether the CPU model contains an |
97 |
++ implementation of Speculative Store Bypass and |
98 |
++ picks the most appropriate mitigation. If the |
99 |
++ CPU is not vulnerable, "off" is selected. If the |
100 |
++ CPU is vulnerable the default mitigation is |
101 |
++ architecture and Kconfig dependent. See below. |
102 |
++ prctl - Control Speculative Store Bypass per thread |
103 |
++ via prctl. Speculative Store Bypass is enabled |
104 |
++ for a process by default. The state of the control |
105 |
++ is inherited on fork. |
106 |
++ seccomp - Same as "prctl" above, but all seccomp threads |
107 |
++ will disable SSB unless they explicitly opt out. |
108 |
++ |
109 |
++ Not specifying this option is equivalent to |
110 |
++ spec_store_bypass_disable=auto. |
111 |
++ |
112 |
++ Default mitigations: |
113 |
++ X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl" |
114 |
++ |
115 |
+ spia_io_base= [HW,MTD] |
116 |
+ spia_fio_base= |
117 |
+ spia_pedr= |
118 |
+diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt |
119 |
+new file mode 100644 |
120 |
+index 000000000000..32f3d55c54b7 |
121 |
+--- /dev/null |
122 |
++++ b/Documentation/spec_ctrl.txt |
123 |
+@@ -0,0 +1,94 @@ |
124 |
++=================== |
125 |
++Speculation Control |
126 |
++=================== |
127 |
++ |
128 |
++Quite some CPUs have speculation-related misfeatures which are in |
129 |
++fact vulnerabilities causing data leaks in various forms even across |
130 |
++privilege domains. |
131 |
++ |
132 |
++The kernel provides mitigation for such vulnerabilities in various |
133 |
++forms. Some of these mitigations are compile-time configurable and some |
134 |
++can be supplied on the kernel command line. |
135 |
++ |
136 |
++There is also a class of mitigations which are very expensive, but they can |
137 |
++be restricted to a certain set of processes or tasks in controlled |
138 |
++environments. The mechanism to control these mitigations is via |
139 |
++:manpage:`prctl(2)`. |
140 |
++ |
141 |
++There are two prctl options which are related to this: |
142 |
++ |
143 |
++ * PR_GET_SPECULATION_CTRL |
144 |
++ |
145 |
++ * PR_SET_SPECULATION_CTRL |
146 |
++ |
147 |
++PR_GET_SPECULATION_CTRL |
148 |
++----------------------- |
149 |
++ |
150 |
++PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature |
151 |
++which is selected with arg2 of prctl(2). The return value uses bits 0-3 with |
152 |
++the following meaning: |
153 |
++ |
154 |
++==== ===================== =================================================== |
155 |
++Bit Define Description |
156 |
++==== ===================== =================================================== |
157 |
++0 PR_SPEC_PRCTL Mitigation can be controlled per task by |
158 |
++ PR_SET_SPECULATION_CTRL. |
159 |
++1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is |
160 |
++ disabled. |
161 |
++2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is |
162 |
++ enabled. |
163 |
++3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A |
164 |
++ subsequent prctl(..., PR_SPEC_ENABLE) will fail. |
165 |
++==== ===================== =================================================== |
166 |
++ |
167 |
++If all bits are 0 the CPU is not affected by the speculation misfeature. |
168 |
++ |
169 |
++If PR_SPEC_PRCTL is set, then the per-task control of the mitigation is |
170 |
++available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation |
171 |
++misfeature will fail. |
172 |
++ |
173 |
++PR_SET_SPECULATION_CTRL |
174 |
++----------------------- |
175 |
++ |
176 |
++PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which |
177 |
++is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand |
178 |
++in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or |
179 |
++PR_SPEC_FORCE_DISABLE. |
180 |
++ |
181 |
++Common error codes |
182 |
++------------------ |
183 |
++======= ================================================================= |
184 |
++Value Meaning |
185 |
++======= ================================================================= |
186 |
++EINVAL The prctl is not implemented by the architecture or unused |
187 |
++ prctl(2) arguments are not 0. |
188 |
++ |
189 |
++ENODEV arg2 is selecting a not supported speculation misfeature. |
190 |
++======= ================================================================= |
191 |
++ |
192 |
++PR_SET_SPECULATION_CTRL error codes |
193 |
++----------------------------------- |
194 |
++======= ================================================================= |
195 |
++Value Meaning |
196 |
++======= ================================================================= |
197 |
++0 Success |
198 |
++ |
199 |
++ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor |
200 |
++ PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE. |
201 |
++ |
202 |
++ENXIO Control of the selected speculation misfeature is not possible. |
203 |
++ See PR_GET_SPECULATION_CTRL. |
204 |
++ |
205 |
++EPERM Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller |
206 |
++ tried to enable it again. |
207 |
++======= ================================================================= |
208 |
++ |
209 |
++Speculation misfeature controls |
210 |
++------------------------------- |
211 |
++- PR_SPEC_STORE_BYPASS: Speculative Store Bypass |
212 |
++ |
213 |
++ Invocations: |
214 |
++ * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0); |
215 |
++ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); |
216 |
++ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); |
217 |
++ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0); |
218 |
+diff --git a/Makefile b/Makefile |
219 |
+index 54690fee0485..63f3e2438a26 100644 |
220 |
+--- a/Makefile |
221 |
++++ b/Makefile |
222 |
+@@ -1,6 +1,6 @@ |
223 |
+ VERSION = 4 |
224 |
+ PATCHLEVEL = 4 |
225 |
+-SUBLEVEL = 143 |
226 |
++SUBLEVEL = 144 |
227 |
+ EXTRAVERSION = |
228 |
+ NAME = Blurry Fish Butt |
229 |
+ |
230 |
+diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h |
231 |
+index 429957f1c236..8f1145ed0046 100644 |
232 |
+--- a/arch/arc/include/asm/page.h |
233 |
++++ b/arch/arc/include/asm/page.h |
234 |
+@@ -102,7 +102,7 @@ typedef pte_t * pgtable_t; |
235 |
+ #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) |
236 |
+ |
237 |
+ /* Default Permissions for stack/heaps pages (Non Executable) */ |
238 |
+-#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE) |
239 |
++#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) |
240 |
+ |
241 |
+ #define WANT_PAGE_VIRTUAL 1 |
242 |
+ |
243 |
+diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h |
244 |
+index e5fec320f158..c07d7b0a4058 100644 |
245 |
+--- a/arch/arc/include/asm/pgtable.h |
246 |
++++ b/arch/arc/include/asm/pgtable.h |
247 |
+@@ -372,7 +372,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, |
248 |
+ |
249 |
+ /* Decode a PTE containing swap "identifier "into constituents */ |
250 |
+ #define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f) |
251 |
+-#define __swp_offset(pte_lookalike) ((pte_lookalike).val << 13) |
252 |
++#define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13) |
253 |
+ |
254 |
+ /* NOPs, to keep generic kernel happy */ |
255 |
+ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) |
256 |
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S |
257 |
+index d03bf0e28b8b..48c27c3fdfdb 100644 |
258 |
+--- a/arch/x86/entry/entry_64_compat.S |
259 |
++++ b/arch/x86/entry/entry_64_compat.S |
260 |
+@@ -79,24 +79,33 @@ ENTRY(entry_SYSENTER_compat) |
261 |
+ ASM_CLAC /* Clear AC after saving FLAGS */ |
262 |
+ |
263 |
+ pushq $__USER32_CS /* pt_regs->cs */ |
264 |
+- xorq %r8,%r8 |
265 |
+- pushq %r8 /* pt_regs->ip = 0 (placeholder) */ |
266 |
++ pushq $0 /* pt_regs->ip = 0 (placeholder) */ |
267 |
+ pushq %rax /* pt_regs->orig_ax */ |
268 |
+ pushq %rdi /* pt_regs->di */ |
269 |
+ pushq %rsi /* pt_regs->si */ |
270 |
+ pushq %rdx /* pt_regs->dx */ |
271 |
+ pushq %rcx /* pt_regs->cx */ |
272 |
+ pushq $-ENOSYS /* pt_regs->ax */ |
273 |
+- pushq %r8 /* pt_regs->r8 = 0 */ |
274 |
+- pushq %r8 /* pt_regs->r9 = 0 */ |
275 |
+- pushq %r8 /* pt_regs->r10 = 0 */ |
276 |
+- pushq %r8 /* pt_regs->r11 = 0 */ |
277 |
++ pushq $0 /* pt_regs->r8 = 0 */ |
278 |
++ xorq %r8, %r8 /* nospec r8 */ |
279 |
++ pushq $0 /* pt_regs->r9 = 0 */ |
280 |
++ xorq %r9, %r9 /* nospec r9 */ |
281 |
++ pushq $0 /* pt_regs->r10 = 0 */ |
282 |
++ xorq %r10, %r10 /* nospec r10 */ |
283 |
++ pushq $0 /* pt_regs->r11 = 0 */ |
284 |
++ xorq %r11, %r11 /* nospec r11 */ |
285 |
+ pushq %rbx /* pt_regs->rbx */ |
286 |
++ xorl %ebx, %ebx /* nospec rbx */ |
287 |
+ pushq %rbp /* pt_regs->rbp (will be overwritten) */ |
288 |
+- pushq %r8 /* pt_regs->r12 = 0 */ |
289 |
+- pushq %r8 /* pt_regs->r13 = 0 */ |
290 |
+- pushq %r8 /* pt_regs->r14 = 0 */ |
291 |
+- pushq %r8 /* pt_regs->r15 = 0 */ |
292 |
++ xorl %ebp, %ebp /* nospec rbp */ |
293 |
++ pushq $0 /* pt_regs->r12 = 0 */ |
294 |
++ xorq %r12, %r12 /* nospec r12 */ |
295 |
++ pushq $0 /* pt_regs->r13 = 0 */ |
296 |
++ xorq %r13, %r13 /* nospec r13 */ |
297 |
++ pushq $0 /* pt_regs->r14 = 0 */ |
298 |
++ xorq %r14, %r14 /* nospec r14 */ |
299 |
++ pushq $0 /* pt_regs->r15 = 0 */ |
300 |
++ xorq %r15, %r15 /* nospec r15 */ |
301 |
+ cld |
302 |
+ |
303 |
+ /* |
304 |
+@@ -185,17 +194,26 @@ ENTRY(entry_SYSCALL_compat) |
305 |
+ pushq %rdx /* pt_regs->dx */ |
306 |
+ pushq %rbp /* pt_regs->cx (stashed in bp) */ |
307 |
+ pushq $-ENOSYS /* pt_regs->ax */ |
308 |
+- xorq %r8,%r8 |
309 |
+- pushq %r8 /* pt_regs->r8 = 0 */ |
310 |
+- pushq %r8 /* pt_regs->r9 = 0 */ |
311 |
+- pushq %r8 /* pt_regs->r10 = 0 */ |
312 |
+- pushq %r8 /* pt_regs->r11 = 0 */ |
313 |
++ pushq $0 /* pt_regs->r8 = 0 */ |
314 |
++ xorq %r8, %r8 /* nospec r8 */ |
315 |
++ pushq $0 /* pt_regs->r9 = 0 */ |
316 |
++ xorq %r9, %r9 /* nospec r9 */ |
317 |
++ pushq $0 /* pt_regs->r10 = 0 */ |
318 |
++ xorq %r10, %r10 /* nospec r10 */ |
319 |
++ pushq $0 /* pt_regs->r11 = 0 */ |
320 |
++ xorq %r11, %r11 /* nospec r11 */ |
321 |
+ pushq %rbx /* pt_regs->rbx */ |
322 |
++ xorl %ebx, %ebx /* nospec rbx */ |
323 |
+ pushq %rbp /* pt_regs->rbp (will be overwritten) */ |
324 |
+- pushq %r8 /* pt_regs->r12 = 0 */ |
325 |
+- pushq %r8 /* pt_regs->r13 = 0 */ |
326 |
+- pushq %r8 /* pt_regs->r14 = 0 */ |
327 |
+- pushq %r8 /* pt_regs->r15 = 0 */ |
328 |
++ xorl %ebp, %ebp /* nospec rbp */ |
329 |
++ pushq $0 /* pt_regs->r12 = 0 */ |
330 |
++ xorq %r12, %r12 /* nospec r12 */ |
331 |
++ pushq $0 /* pt_regs->r13 = 0 */ |
332 |
++ xorq %r13, %r13 /* nospec r13 */ |
333 |
++ pushq $0 /* pt_regs->r14 = 0 */ |
334 |
++ xorq %r14, %r14 /* nospec r14 */ |
335 |
++ pushq $0 /* pt_regs->r15 = 0 */ |
336 |
++ xorq %r15, %r15 /* nospec r15 */ |
337 |
+ |
338 |
+ /* |
339 |
+ * User mode is traced as though IRQs are on, and SYSENTER |
340 |
+@@ -292,17 +310,26 @@ ENTRY(entry_INT80_compat) |
341 |
+ pushq %rdx /* pt_regs->dx */ |
342 |
+ pushq %rcx /* pt_regs->cx */ |
343 |
+ pushq $-ENOSYS /* pt_regs->ax */ |
344 |
+- xorq %r8,%r8 |
345 |
+- pushq %r8 /* pt_regs->r8 = 0 */ |
346 |
+- pushq %r8 /* pt_regs->r9 = 0 */ |
347 |
+- pushq %r8 /* pt_regs->r10 = 0 */ |
348 |
+- pushq %r8 /* pt_regs->r11 = 0 */ |
349 |
++ pushq $0 /* pt_regs->r8 = 0 */ |
350 |
++ xorq %r8, %r8 /* nospec r8 */ |
351 |
++ pushq $0 /* pt_regs->r9 = 0 */ |
352 |
++ xorq %r9, %r9 /* nospec r9 */ |
353 |
++ pushq $0 /* pt_regs->r10 = 0 */ |
354 |
++ xorq %r10, %r10 /* nospec r10 */ |
355 |
++ pushq $0 /* pt_regs->r11 = 0 */ |
356 |
++ xorq %r11, %r11 /* nospec r11 */ |
357 |
+ pushq %rbx /* pt_regs->rbx */ |
358 |
++ xorl %ebx, %ebx /* nospec rbx */ |
359 |
+ pushq %rbp /* pt_regs->rbp */ |
360 |
++ xorl %ebp, %ebp /* nospec rbp */ |
361 |
+ pushq %r12 /* pt_regs->r12 */ |
362 |
++ xorq %r12, %r12 /* nospec r12 */ |
363 |
+ pushq %r13 /* pt_regs->r13 */ |
364 |
++ xorq %r13, %r13 /* nospec r13 */ |
365 |
+ pushq %r14 /* pt_regs->r14 */ |
366 |
++ xorq %r14, %r14 /* nospec r14 */ |
367 |
+ pushq %r15 /* pt_regs->r15 */ |
368 |
++ xorq %r15, %r15 /* nospec r15 */ |
369 |
+ cld |
370 |
+ |
371 |
+ /* |
372 |
+diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h |
373 |
+index 20370c6db74b..3d1ec41ae09a 100644 |
374 |
+--- a/arch/x86/include/asm/apm.h |
375 |
++++ b/arch/x86/include/asm/apm.h |
376 |
+@@ -6,6 +6,8 @@ |
377 |
+ #ifndef _ASM_X86_MACH_DEFAULT_APM_H |
378 |
+ #define _ASM_X86_MACH_DEFAULT_APM_H |
379 |
+ |
380 |
++#include <asm/nospec-branch.h> |
381 |
++ |
382 |
+ #ifdef APM_ZERO_SEGS |
383 |
+ # define APM_DO_ZERO_SEGS \ |
384 |
+ "pushl %%ds\n\t" \ |
385 |
+@@ -31,6 +33,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, |
386 |
+ * N.B. We do NOT need a cld after the BIOS call |
387 |
+ * because we always save and restore the flags. |
388 |
+ */ |
389 |
++ firmware_restrict_branch_speculation_start(); |
390 |
+ __asm__ __volatile__(APM_DO_ZERO_SEGS |
391 |
+ "pushl %%edi\n\t" |
392 |
+ "pushl %%ebp\n\t" |
393 |
+@@ -43,6 +46,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, |
394 |
+ "=S" (*esi) |
395 |
+ : "a" (func), "b" (ebx_in), "c" (ecx_in) |
396 |
+ : "memory", "cc"); |
397 |
++ firmware_restrict_branch_speculation_end(); |
398 |
+ } |
399 |
+ |
400 |
+ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, |
401 |
+@@ -55,6 +59,7 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, |
402 |
+ * N.B. We do NOT need a cld after the BIOS call |
403 |
+ * because we always save and restore the flags. |
404 |
+ */ |
405 |
++ firmware_restrict_branch_speculation_start(); |
406 |
+ __asm__ __volatile__(APM_DO_ZERO_SEGS |
407 |
+ "pushl %%edi\n\t" |
408 |
+ "pushl %%ebp\n\t" |
409 |
+@@ -67,6 +72,7 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, |
410 |
+ "=S" (si) |
411 |
+ : "a" (func), "b" (ebx_in), "c" (ecx_in) |
412 |
+ : "memory", "cc"); |
413 |
++ firmware_restrict_branch_speculation_end(); |
414 |
+ return error; |
415 |
+ } |
416 |
+ |
417 |
+diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h |
418 |
+index e3a6f66d288c..7f5dcb64cedb 100644 |
419 |
+--- a/arch/x86/include/asm/barrier.h |
420 |
++++ b/arch/x86/include/asm/barrier.h |
421 |
+@@ -40,7 +40,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, |
422 |
+ |
423 |
+ asm volatile ("cmp %1,%2; sbb %0,%0;" |
424 |
+ :"=r" (mask) |
425 |
+- :"r"(size),"r" (index) |
426 |
++ :"g"(size),"r" (index) |
427 |
+ :"cc"); |
428 |
+ return mask; |
429 |
+ } |
430 |
+diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h |
431 |
+index dd0089841a0f..d72c1db64679 100644 |
432 |
+--- a/arch/x86/include/asm/cpufeature.h |
433 |
++++ b/arch/x86/include/asm/cpufeature.h |
434 |
+@@ -28,6 +28,7 @@ enum cpuid_leafs |
435 |
+ CPUID_8000_000A_EDX, |
436 |
+ CPUID_7_ECX, |
437 |
+ CPUID_8000_0007_EBX, |
438 |
++ CPUID_7_EDX, |
439 |
+ }; |
440 |
+ |
441 |
+ #ifdef CONFIG_X86_FEATURE_NAMES |
442 |
+@@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; |
443 |
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ |
444 |
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ |
445 |
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ |
446 |
++ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ |
447 |
+ REQUIRED_MASK_CHECK || \ |
448 |
+- BUILD_BUG_ON_ZERO(NCAPINTS != 18)) |
449 |
++ BUILD_BUG_ON_ZERO(NCAPINTS != 19)) |
450 |
+ |
451 |
+ #define DISABLED_MASK_BIT_SET(feature_bit) \ |
452 |
+ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ |
453 |
+@@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; |
454 |
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ |
455 |
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ |
456 |
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ |
457 |
++ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ |
458 |
+ DISABLED_MASK_CHECK || \ |
459 |
+- BUILD_BUG_ON_ZERO(NCAPINTS != 18)) |
460 |
++ BUILD_BUG_ON_ZERO(NCAPINTS != 19)) |
461 |
+ |
462 |
+ #define cpu_has(c, bit) \ |
463 |
+ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ |
464 |
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h |
465 |
+index 205ce70c1d6c..f4b175db70f4 100644 |
466 |
+--- a/arch/x86/include/asm/cpufeatures.h |
467 |
++++ b/arch/x86/include/asm/cpufeatures.h |
468 |
+@@ -12,7 +12,7 @@ |
469 |
+ /* |
470 |
+ * Defines x86 CPU feature bits |
471 |
+ */ |
472 |
+-#define NCAPINTS 18 /* N 32-bit words worth of info */ |
473 |
++#define NCAPINTS 19 /* N 32-bit words worth of info */ |
474 |
+ #define NBUGINTS 1 /* N 32-bit bug flags */ |
475 |
+ |
476 |
+ /* |
477 |
+@@ -194,13 +194,28 @@ |
478 |
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ |
479 |
+ |
480 |
+ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ |
481 |
+-#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ |
482 |
++#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ |
483 |
++ |
484 |
++#define X86_FEATURE_RETPOLINE ( 7*32+29) /* "" Generic Retpoline mitigation for Spectre variant 2 */ |
485 |
++#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */ |
486 |
++ |
487 |
++#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ |
488 |
++#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ |
489 |
+ |
490 |
+-#define X86_FEATURE_RETPOLINE ( 7*32+29) /* Generic Retpoline mitigation for Spectre variant 2 */ |
491 |
+-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* AMD Retpoline mitigation for Spectre variant 2 */ |
492 |
+ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ |
493 |
+ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ |
494 |
+ |
495 |
++#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled*/ |
496 |
++#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ |
497 |
++#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ |
498 |
++#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ |
499 |
++ |
500 |
++#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ |
501 |
++#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ |
502 |
++#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ |
503 |
++#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ |
504 |
++ |
505 |
++ |
506 |
+ /* Virtualization flags: Linux defined, word 8 */ |
507 |
+ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ |
508 |
+ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ |
509 |
+@@ -251,6 +266,10 @@ |
510 |
+ |
511 |
+ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ |
512 |
+ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ |
513 |
++#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ |
514 |
++#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ |
515 |
++#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ |
516 |
++#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ |
517 |
+ |
518 |
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ |
519 |
+ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ |
520 |
+@@ -285,6 +304,15 @@ |
521 |
+ #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ |
522 |
+ #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ |
523 |
+ |
524 |
++ |
525 |
++/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ |
526 |
++#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ |
527 |
++#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ |
528 |
++#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ |
529 |
++#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ |
530 |
++#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ |
531 |
++#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ |
532 |
++ |
533 |
+ /* |
534 |
+ * BUG word(s) |
535 |
+ */ |
536 |
+@@ -302,5 +330,6 @@ |
537 |
+ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ |
538 |
+ #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ |
539 |
+ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ |
540 |
++#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ |
541 |
+ |
542 |
+ #endif /* _ASM_X86_CPUFEATURES_H */ |
543 |
+diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h |
544 |
+index 21c5ac15657b..1f8cca459c6c 100644 |
545 |
+--- a/arch/x86/include/asm/disabled-features.h |
546 |
++++ b/arch/x86/include/asm/disabled-features.h |
547 |
+@@ -59,6 +59,7 @@ |
548 |
+ #define DISABLED_MASK15 0 |
549 |
+ #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE) |
550 |
+ #define DISABLED_MASK17 0 |
551 |
+-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) |
552 |
++#define DISABLED_MASK18 0 |
553 |
++#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) |
554 |
+ |
555 |
+ #endif /* _ASM_X86_DISABLED_FEATURES_H */ |
556 |
+diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h |
557 |
+index 0010c78c4998..7e5a2ffb6938 100644 |
558 |
+--- a/arch/x86/include/asm/efi.h |
559 |
++++ b/arch/x86/include/asm/efi.h |
560 |
+@@ -3,6 +3,7 @@ |
561 |
+ |
562 |
+ #include <asm/fpu/api.h> |
563 |
+ #include <asm/pgtable.h> |
564 |
++#include <asm/nospec-branch.h> |
565 |
+ |
566 |
+ /* |
567 |
+ * We map the EFI regions needed for runtime services non-contiguously, |
568 |
+@@ -39,8 +40,10 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); |
569 |
+ ({ \ |
570 |
+ efi_status_t __s; \ |
571 |
+ kernel_fpu_begin(); \ |
572 |
++ firmware_restrict_branch_speculation_start(); \ |
573 |
+ __s = ((efi_##f##_t __attribute__((regparm(0)))*) \ |
574 |
+ efi.systab->runtime->f)(args); \ |
575 |
++ firmware_restrict_branch_speculation_end(); \ |
576 |
+ kernel_fpu_end(); \ |
577 |
+ __s; \ |
578 |
+ }) |
579 |
+@@ -49,8 +52,10 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); |
580 |
+ #define __efi_call_virt(f, args...) \ |
581 |
+ ({ \ |
582 |
+ kernel_fpu_begin(); \ |
583 |
++ firmware_restrict_branch_speculation_start(); \ |
584 |
+ ((efi_##f##_t __attribute__((regparm(0)))*) \ |
585 |
+ efi.systab->runtime->f)(args); \ |
586 |
++ firmware_restrict_branch_speculation_end(); \ |
587 |
+ kernel_fpu_end(); \ |
588 |
+ }) |
589 |
+ |
590 |
+@@ -71,7 +76,9 @@ extern u64 asmlinkage efi_call(void *fp, ...); |
591 |
+ efi_sync_low_kernel_mappings(); \ |
592 |
+ preempt_disable(); \ |
593 |
+ __kernel_fpu_begin(); \ |
594 |
++ firmware_restrict_branch_speculation_start(); \ |
595 |
+ __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \ |
596 |
++ firmware_restrict_branch_speculation_end(); \ |
597 |
+ __kernel_fpu_end(); \ |
598 |
+ preempt_enable(); \ |
599 |
+ __s; \ |
600 |
+diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h |
601 |
+index 6999f7d01a0d..e13ff5a14633 100644 |
602 |
+--- a/arch/x86/include/asm/intel-family.h |
603 |
++++ b/arch/x86/include/asm/intel-family.h |
604 |
+@@ -12,6 +12,7 @@ |
605 |
+ */ |
606 |
+ |
607 |
+ #define INTEL_FAM6_CORE_YONAH 0x0E |
608 |
++ |
609 |
+ #define INTEL_FAM6_CORE2_MEROM 0x0F |
610 |
+ #define INTEL_FAM6_CORE2_MEROM_L 0x16 |
611 |
+ #define INTEL_FAM6_CORE2_PENRYN 0x17 |
612 |
+@@ -20,6 +21,7 @@ |
613 |
+ #define INTEL_FAM6_NEHALEM 0x1E |
614 |
+ #define INTEL_FAM6_NEHALEM_EP 0x1A |
615 |
+ #define INTEL_FAM6_NEHALEM_EX 0x2E |
616 |
++ |
617 |
+ #define INTEL_FAM6_WESTMERE 0x25 |
618 |
+ #define INTEL_FAM6_WESTMERE2 0x1F |
619 |
+ #define INTEL_FAM6_WESTMERE_EP 0x2C |
620 |
+@@ -36,9 +38,9 @@ |
621 |
+ #define INTEL_FAM6_HASWELL_GT3E 0x46 |
622 |
+ |
623 |
+ #define INTEL_FAM6_BROADWELL_CORE 0x3D |
624 |
+-#define INTEL_FAM6_BROADWELL_XEON_D 0x56 |
625 |
+ #define INTEL_FAM6_BROADWELL_GT3E 0x47 |
626 |
+ #define INTEL_FAM6_BROADWELL_X 0x4F |
627 |
++#define INTEL_FAM6_BROADWELL_XEON_D 0x56 |
628 |
+ |
629 |
+ #define INTEL_FAM6_SKYLAKE_MOBILE 0x4E |
630 |
+ #define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E |
631 |
+@@ -56,13 +58,15 @@ |
632 |
+ #define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */ |
633 |
+ #define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ |
634 |
+ #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ |
635 |
+-#define INTEL_FAM6_ATOM_MERRIFIELD1 0x4A /* Tangier */ |
636 |
+-#define INTEL_FAM6_ATOM_MERRIFIELD2 0x5A /* Annidale */ |
637 |
++#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ |
638 |
++#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */ |
639 |
+ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C |
640 |
+ #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ |
641 |
++#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A |
642 |
+ |
643 |
+ /* Xeon Phi */ |
644 |
+ |
645 |
+ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ |
646 |
++#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ |
647 |
+ |
648 |
+ #endif /* _ASM_X86_INTEL_FAMILY_H */ |
649 |
+diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h |
650 |
+index b77f5edb03b0..0056bc945cd1 100644 |
651 |
+--- a/arch/x86/include/asm/irqflags.h |
652 |
++++ b/arch/x86/include/asm/irqflags.h |
653 |
+@@ -8,7 +8,7 @@ |
654 |
+ * Interrupt control: |
655 |
+ */ |
656 |
+ |
657 |
+-static inline unsigned long native_save_fl(void) |
658 |
++extern inline unsigned long native_save_fl(void) |
659 |
+ { |
660 |
+ unsigned long flags; |
661 |
+ |
662 |
+diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h |
663 |
+index 7680b76adafc..3359dfedc7ee 100644 |
664 |
+--- a/arch/x86/include/asm/mmu.h |
665 |
++++ b/arch/x86/include/asm/mmu.h |
666 |
+@@ -3,12 +3,18 @@ |
667 |
+ |
668 |
+ #include <linux/spinlock.h> |
669 |
+ #include <linux/mutex.h> |
670 |
++#include <linux/atomic.h> |
671 |
+ |
672 |
+ /* |
673 |
+- * The x86 doesn't have a mmu context, but |
674 |
+- * we put the segment information here. |
675 |
++ * x86 has arch-specific MMU state beyond what lives in mm_struct. |
676 |
+ */ |
677 |
+ typedef struct { |
678 |
++ /* |
679 |
++ * ctx_id uniquely identifies this mm_struct. A ctx_id will never |
680 |
++ * be reused, and zero is not a valid ctx_id. |
681 |
++ */ |
682 |
++ u64 ctx_id; |
683 |
++ |
684 |
+ #ifdef CONFIG_MODIFY_LDT_SYSCALL |
685 |
+ struct ldt_struct *ldt; |
686 |
+ #endif |
687 |
+@@ -24,6 +30,11 @@ typedef struct { |
688 |
+ atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */ |
689 |
+ } mm_context_t; |
690 |
+ |
691 |
++#define INIT_MM_CONTEXT(mm) \ |
692 |
++ .context = { \ |
693 |
++ .ctx_id = 1, \ |
694 |
++ } |
695 |
++ |
696 |
+ void leave_mm(int cpu); |
697 |
+ |
698 |
+ #endif /* _ASM_X86_MMU_H */ |
699 |
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h |
700 |
+index 9bfc5fd77015..effc12767cbf 100644 |
701 |
+--- a/arch/x86/include/asm/mmu_context.h |
702 |
++++ b/arch/x86/include/asm/mmu_context.h |
703 |
+@@ -11,6 +11,9 @@ |
704 |
+ #include <asm/tlbflush.h> |
705 |
+ #include <asm/paravirt.h> |
706 |
+ #include <asm/mpx.h> |
707 |
++ |
708 |
++extern atomic64_t last_mm_ctx_id; |
709 |
++ |
710 |
+ #ifndef CONFIG_PARAVIRT |
711 |
+ static inline void paravirt_activate_mm(struct mm_struct *prev, |
712 |
+ struct mm_struct *next) |
713 |
+@@ -52,15 +55,15 @@ struct ldt_struct { |
714 |
+ /* |
715 |
+ * Used for LDT copy/destruction. |
716 |
+ */ |
717 |
+-int init_new_context(struct task_struct *tsk, struct mm_struct *mm); |
718 |
+-void destroy_context(struct mm_struct *mm); |
719 |
++int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm); |
720 |
++void destroy_context_ldt(struct mm_struct *mm); |
721 |
+ #else /* CONFIG_MODIFY_LDT_SYSCALL */ |
722 |
+-static inline int init_new_context(struct task_struct *tsk, |
723 |
+- struct mm_struct *mm) |
724 |
++static inline int init_new_context_ldt(struct task_struct *tsk, |
725 |
++ struct mm_struct *mm) |
726 |
+ { |
727 |
+ return 0; |
728 |
+ } |
729 |
+-static inline void destroy_context(struct mm_struct *mm) {} |
730 |
++static inline void destroy_context_ldt(struct mm_struct *mm) {} |
731 |
+ #endif |
732 |
+ |
733 |
+ static inline void load_mm_ldt(struct mm_struct *mm) |
734 |
+@@ -102,6 +105,18 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) |
735 |
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); |
736 |
+ } |
737 |
+ |
738 |
++static inline int init_new_context(struct task_struct *tsk, |
739 |
++ struct mm_struct *mm) |
740 |
++{ |
741 |
++ mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); |
742 |
++ init_new_context_ldt(tsk, mm); |
743 |
++ return 0; |
744 |
++} |
745 |
++static inline void destroy_context(struct mm_struct *mm) |
746 |
++{ |
747 |
++ destroy_context_ldt(mm); |
748 |
++} |
749 |
++ |
750 |
+ extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, |
751 |
+ struct task_struct *tsk); |
752 |
+ |
753 |
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h |
754 |
+index b8911aecf035..caa00191e565 100644 |
755 |
+--- a/arch/x86/include/asm/msr-index.h |
756 |
++++ b/arch/x86/include/asm/msr-index.h |
757 |
+@@ -32,6 +32,15 @@ |
758 |
+ #define EFER_FFXSR (1<<_EFER_FFXSR) |
759 |
+ |
760 |
+ /* Intel MSRs. Some also available on other CPUs */ |
761 |
++#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ |
762 |
++#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ |
763 |
++#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ |
764 |
++#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ |
765 |
++#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ |
766 |
++ |
767 |
++#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ |
768 |
++#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ |
769 |
++ |
770 |
+ #define MSR_IA32_PERFCTR0 0x000000c1 |
771 |
+ #define MSR_IA32_PERFCTR1 0x000000c2 |
772 |
+ #define MSR_FSB_FREQ 0x000000cd |
773 |
+@@ -45,6 +54,16 @@ |
774 |
+ #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) |
775 |
+ |
776 |
+ #define MSR_MTRRcap 0x000000fe |
777 |
++ |
778 |
++#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a |
779 |
++#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ |
780 |
++#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ |
781 |
++#define ARCH_CAP_SSB_NO (1 << 4) /* |
782 |
++ * Not susceptible to Speculative Store Bypass |
783 |
++ * attack, so no Speculative Store Bypass |
784 |
++ * control required. |
785 |
++ */ |
786 |
++ |
787 |
+ #define MSR_IA32_BBL_CR_CTL 0x00000119 |
788 |
+ #define MSR_IA32_BBL_CR_CTL3 0x0000011e |
789 |
+ |
790 |
+@@ -132,6 +151,7 @@ |
791 |
+ |
792 |
+ /* DEBUGCTLMSR bits (others vary by model): */ |
793 |
+ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ |
794 |
++#define DEBUGCTLMSR_BTF_SHIFT 1 |
795 |
+ #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ |
796 |
+ #define DEBUGCTLMSR_TR (1UL << 6) |
797 |
+ #define DEBUGCTLMSR_BTS (1UL << 7) |
798 |
+@@ -308,6 +328,8 @@ |
799 |
+ #define MSR_AMD64_IBSOPDATA4 0xc001103d |
800 |
+ #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ |
801 |
+ |
802 |
++#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f |
803 |
++ |
804 |
+ /* Fam 16h MSRs */ |
805 |
+ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 |
806 |
+ #define MSR_F16H_L2I_PERF_CTR 0xc0010231 |
807 |
+diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h |
808 |
+index 8b910416243c..b4c74c24c890 100644 |
809 |
+--- a/arch/x86/include/asm/nospec-branch.h |
810 |
++++ b/arch/x86/include/asm/nospec-branch.h |
811 |
+@@ -6,6 +6,7 @@ |
812 |
+ #include <asm/alternative.h> |
813 |
+ #include <asm/alternative-asm.h> |
814 |
+ #include <asm/cpufeatures.h> |
815 |
++#include <asm/msr-index.h> |
816 |
+ |
817 |
+ /* |
818 |
+ * Fill the CPU return stack buffer. |
819 |
+@@ -171,6 +172,14 @@ enum spectre_v2_mitigation { |
820 |
+ SPECTRE_V2_IBRS, |
821 |
+ }; |
822 |
+ |
823 |
++/* The Speculative Store Bypass disable variants */ |
824 |
++enum ssb_mitigation { |
825 |
++ SPEC_STORE_BYPASS_NONE, |
826 |
++ SPEC_STORE_BYPASS_DISABLE, |
827 |
++ SPEC_STORE_BYPASS_PRCTL, |
828 |
++ SPEC_STORE_BYPASS_SECCOMP, |
829 |
++}; |
830 |
++ |
831 |
+ extern char __indirect_thunk_start[]; |
832 |
+ extern char __indirect_thunk_end[]; |
833 |
+ |
834 |
+@@ -194,6 +203,51 @@ static inline void vmexit_fill_RSB(void) |
835 |
+ #endif |
836 |
+ } |
837 |
+ |
838 |
++static __always_inline |
839 |
++void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) |
840 |
++{ |
841 |
++ asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) |
842 |
++ : : "c" (msr), |
843 |
++ "a" ((u32)val), |
844 |
++ "d" ((u32)(val >> 32)), |
845 |
++ [feature] "i" (feature) |
846 |
++ : "memory"); |
847 |
++} |
848 |
++ |
849 |
++static inline void indirect_branch_prediction_barrier(void) |
850 |
++{ |
851 |
++ u64 val = PRED_CMD_IBPB; |
852 |
++ |
853 |
++ alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); |
854 |
++} |
855 |
++ |
856 |
++/* The Intel SPEC CTRL MSR base value cache */ |
857 |
++extern u64 x86_spec_ctrl_base; |
858 |
++ |
859 |
++/* |
860 |
++ * With retpoline, we must use IBRS to restrict branch prediction |
861 |
++ * before calling into firmware. |
862 |
++ * |
863 |
++ * (Implemented as CPP macros due to header hell.) |
864 |
++ */ |
865 |
++#define firmware_restrict_branch_speculation_start() \ |
866 |
++do { \ |
867 |
++ u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ |
868 |
++ \ |
869 |
++ preempt_disable(); \ |
870 |
++ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ |
871 |
++ X86_FEATURE_USE_IBRS_FW); \ |
872 |
++} while (0) |
873 |
++ |
874 |
++#define firmware_restrict_branch_speculation_end() \ |
875 |
++do { \ |
876 |
++ u64 val = x86_spec_ctrl_base; \ |
877 |
++ \ |
878 |
++ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ |
879 |
++ X86_FEATURE_USE_IBRS_FW); \ |
880 |
++ preempt_enable(); \ |
881 |
++} while (0) |
882 |
++ |
883 |
+ #endif /* __ASSEMBLY__ */ |
884 |
+ |
885 |
+ /* |
886 |
+diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h |
887 |
+index fac9a5c0abe9..6847d85400a8 100644 |
888 |
+--- a/arch/x86/include/asm/required-features.h |
889 |
++++ b/arch/x86/include/asm/required-features.h |
890 |
+@@ -100,6 +100,7 @@ |
891 |
+ #define REQUIRED_MASK15 0 |
892 |
+ #define REQUIRED_MASK16 0 |
893 |
+ #define REQUIRED_MASK17 0 |
894 |
+-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) |
895 |
++#define REQUIRED_MASK18 0 |
896 |
++#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) |
897 |
+ |
898 |
+ #endif /* _ASM_X86_REQUIRED_FEATURES_H */ |
899 |
+diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h |
900 |
+new file mode 100644 |
901 |
+index 000000000000..ae7c2c5cd7f0 |
902 |
+--- /dev/null |
903 |
++++ b/arch/x86/include/asm/spec-ctrl.h |
904 |
+@@ -0,0 +1,80 @@ |
905 |
++/* SPDX-License-Identifier: GPL-2.0 */ |
906 |
++#ifndef _ASM_X86_SPECCTRL_H_ |
907 |
++#define _ASM_X86_SPECCTRL_H_ |
908 |
++ |
909 |
++#include <linux/thread_info.h> |
910 |
++#include <asm/nospec-branch.h> |
911 |
++ |
912 |
++/* |
913 |
++ * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR |
914 |
++ * the guest has, while on VMEXIT we restore the host view. This |
915 |
++ * would be easier if SPEC_CTRL were architecturally maskable or |
916 |
++ * shadowable for guests but this is not (currently) the case. |
917 |
++ * Takes the guest view of SPEC_CTRL MSR as a parameter and also |
918 |
++ * the guest's version of VIRT_SPEC_CTRL, if emulated. |
919 |
++ */ |
920 |
++extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest); |
921 |
++ |
922 |
++/** |
923 |
++ * x86_spec_ctrl_set_guest - Set speculation control registers for the guest |
924 |
++ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL |
925 |
++ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL |
926 |
++ * (may get translated to MSR_AMD64_LS_CFG bits) |
927 |
++ * |
928 |
++ * Avoids writing to the MSR if the content/bits are the same |
929 |
++ */ |
930 |
++static inline |
931 |
++void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) |
932 |
++{ |
933 |
++ x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true); |
934 |
++} |
935 |
++ |
936 |
++/** |
937 |
++ * x86_spec_ctrl_restore_host - Restore host speculation control registers |
938 |
++ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL |
939 |
++ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL |
940 |
++ * (may get translated to MSR_AMD64_LS_CFG bits) |
941 |
++ * |
942 |
++ * Avoids writing to the MSR if the content/bits are the same |
943 |
++ */ |
944 |
++static inline |
945 |
++void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) |
946 |
++{ |
947 |
++ x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false); |
948 |
++} |
949 |
++ |
950 |
++/* AMD specific Speculative Store Bypass MSR data */ |
951 |
++extern u64 x86_amd_ls_cfg_base; |
952 |
++extern u64 x86_amd_ls_cfg_ssbd_mask; |
953 |
++ |
954 |
++static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) |
955 |
++{ |
956 |
++ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); |
957 |
++ return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); |
958 |
++} |
959 |
++ |
960 |
++static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) |
961 |
++{ |
962 |
++ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); |
963 |
++ return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); |
964 |
++} |
965 |
++ |
966 |
++static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) |
967 |
++{ |
968 |
++ return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; |
969 |
++} |
970 |
++ |
971 |
++#ifdef CONFIG_SMP |
972 |
++extern void speculative_store_bypass_ht_init(void); |
973 |
++#else |
974 |
++static inline void speculative_store_bypass_ht_init(void) { } |
975 |
++#endif |
976 |
++ |
977 |
++extern void speculative_store_bypass_update(unsigned long tif); |
978 |
++ |
979 |
++static inline void speculative_store_bypass_update_current(void) |
980 |
++{ |
981 |
++ speculative_store_bypass_update(current_thread_info()->flags); |
982 |
++} |
983 |
++ |
984 |
++#endif |
985 |
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h |
986 |
+index 18c9aaa8c043..a96e88b243ef 100644 |
987 |
+--- a/arch/x86/include/asm/thread_info.h |
988 |
++++ b/arch/x86/include/asm/thread_info.h |
989 |
+@@ -92,6 +92,7 @@ struct thread_info { |
990 |
+ #define TIF_SIGPENDING 2 /* signal pending */ |
991 |
+ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ |
992 |
+ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ |
993 |
++#define TIF_SSBD 5 /* Reduced data speculation */ |
994 |
+ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ |
995 |
+ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ |
996 |
+ #define TIF_SECCOMP 8 /* secure computing */ |
997 |
+@@ -114,8 +115,9 @@ struct thread_info { |
998 |
+ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) |
999 |
+ #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) |
1000 |
+ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) |
1001 |
+-#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) |
1002 |
+ #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) |
1003 |
++#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) |
1004 |
++#define _TIF_SSBD (1 << TIF_SSBD) |
1005 |
+ #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) |
1006 |
+ #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) |
1007 |
+ #define _TIF_SECCOMP (1 << TIF_SECCOMP) |
1008 |
+@@ -147,7 +149,7 @@ struct thread_info { |
1009 |
+ |
1010 |
+ /* flags to check in __switch_to() */ |
1011 |
+ #define _TIF_WORK_CTXSW \ |
1012 |
+- (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) |
1013 |
++ (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) |
1014 |
+ |
1015 |
+ #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) |
1016 |
+ #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) |
1017 |
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h |
1018 |
+index e2a89d2577fb..72cfe3e53af1 100644 |
1019 |
+--- a/arch/x86/include/asm/tlbflush.h |
1020 |
++++ b/arch/x86/include/asm/tlbflush.h |
1021 |
+@@ -68,6 +68,8 @@ static inline void invpcid_flush_all_nonglobals(void) |
1022 |
+ struct tlb_state { |
1023 |
+ struct mm_struct *active_mm; |
1024 |
+ int state; |
1025 |
++ /* last user mm's ctx id */ |
1026 |
++ u64 last_ctx_id; |
1027 |
+ |
1028 |
+ /* |
1029 |
+ * Access to this CR4 shadow and to H/W CR4 is protected by |
1030 |
+@@ -109,6 +111,16 @@ static inline void cr4_clear_bits(unsigned long mask) |
1031 |
+ } |
1032 |
+ } |
1033 |
+ |
1034 |
++static inline void cr4_toggle_bits(unsigned long mask) |
1035 |
++{ |
1036 |
++ unsigned long cr4; |
1037 |
++ |
1038 |
++ cr4 = this_cpu_read(cpu_tlbstate.cr4); |
1039 |
++ cr4 ^= mask; |
1040 |
++ this_cpu_write(cpu_tlbstate.cr4, cr4); |
1041 |
++ __write_cr4(cr4); |
1042 |
++} |
1043 |
++ |
1044 |
+ /* Read the CR4 shadow. */ |
1045 |
+ static inline unsigned long cr4_read_shadow(void) |
1046 |
+ { |
1047 |
+diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile |
1048 |
+index b1b78ffe01d0..7947cee61f61 100644 |
1049 |
+--- a/arch/x86/kernel/Makefile |
1050 |
++++ b/arch/x86/kernel/Makefile |
1051 |
+@@ -41,6 +41,7 @@ obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
1052 |
+ obj-y += tsc.o tsc_msr.o io_delay.o rtc.o |
1053 |
+ obj-y += pci-iommu_table.o |
1054 |
+ obj-y += resource.o |
1055 |
++obj-y += irqflags.o |
1056 |
+ |
1057 |
+ obj-y += process.o |
1058 |
+ obj-y += fpu/ |
1059 |
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c |
1060 |
+index f4fb8f5b0be4..9f6151884249 100644 |
1061 |
+--- a/arch/x86/kernel/cpu/amd.c |
1062 |
++++ b/arch/x86/kernel/cpu/amd.c |
1063 |
+@@ -9,6 +9,7 @@ |
1064 |
+ #include <asm/processor.h> |
1065 |
+ #include <asm/apic.h> |
1066 |
+ #include <asm/cpu.h> |
1067 |
++#include <asm/spec-ctrl.h> |
1068 |
+ #include <asm/smp.h> |
1069 |
+ #include <asm/pci-direct.h> |
1070 |
+ #include <asm/delay.h> |
1071 |
+@@ -519,6 +520,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) |
1072 |
+ |
1073 |
+ if (cpu_has(c, X86_FEATURE_MWAITX)) |
1074 |
+ use_mwaitx_delay(); |
1075 |
++ |
1076 |
++ if (c->x86 >= 0x15 && c->x86 <= 0x17) { |
1077 |
++ unsigned int bit; |
1078 |
++ |
1079 |
++ switch (c->x86) { |
1080 |
++ case 0x15: bit = 54; break; |
1081 |
++ case 0x16: bit = 33; break; |
1082 |
++ case 0x17: bit = 10; break; |
1083 |
++ default: return; |
1084 |
++ } |
1085 |
++ /* |
1086 |
++ * Try to cache the base value so further operations can |
1087 |
++ * avoid RMW. If that faults, do not enable SSBD. |
1088 |
++ */ |
1089 |
++ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { |
1090 |
++ setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD); |
1091 |
++ setup_force_cpu_cap(X86_FEATURE_SSBD); |
1092 |
++ x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; |
1093 |
++ } |
1094 |
++ } |
1095 |
+ } |
1096 |
+ |
1097 |
+ static void early_init_amd(struct cpuinfo_x86 *c) |
1098 |
+@@ -692,6 +713,17 @@ static void init_amd_bd(struct cpuinfo_x86 *c) |
1099 |
+ } |
1100 |
+ } |
1101 |
+ |
1102 |
++static void init_amd_zn(struct cpuinfo_x86 *c) |
1103 |
++{ |
1104 |
++ set_cpu_cap(c, X86_FEATURE_ZEN); |
1105 |
++ /* |
1106 |
++ * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects |
1107 |
++ * all up to and including B1. |
1108 |
++ */ |
1109 |
++ if (c->x86_model <= 1 && c->x86_mask <= 1) |
1110 |
++ set_cpu_cap(c, X86_FEATURE_CPB); |
1111 |
++} |
1112 |
++ |
1113 |
+ static void init_amd(struct cpuinfo_x86 *c) |
1114 |
+ { |
1115 |
+ u32 dummy; |
1116 |
+@@ -722,6 +754,7 @@ static void init_amd(struct cpuinfo_x86 *c) |
1117 |
+ case 0x10: init_amd_gh(c); break; |
1118 |
+ case 0x12: init_amd_ln(c); break; |
1119 |
+ case 0x15: init_amd_bd(c); break; |
1120 |
++ case 0x17: init_amd_zn(c); break; |
1121 |
+ } |
1122 |
+ |
1123 |
+ /* Enable workaround for FXSAVE leak */ |
1124 |
+@@ -791,8 +824,9 @@ static void init_amd(struct cpuinfo_x86 *c) |
1125 |
+ if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) |
1126 |
+ set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); |
1127 |
+ |
1128 |
+- /* AMD CPUs don't reset SS attributes on SYSRET */ |
1129 |
+- set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); |
1130 |
++ /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ |
1131 |
++ if (!cpu_has(c, X86_FEATURE_XENPV)) |
1132 |
++ set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); |
1133 |
+ } |
1134 |
+ |
1135 |
+ #ifdef CONFIG_X86_32 |
1136 |
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c |
1137 |
+index 2bbc74f8a4a8..12a8867071f3 100644 |
1138 |
+--- a/arch/x86/kernel/cpu/bugs.c |
1139 |
++++ b/arch/x86/kernel/cpu/bugs.c |
1140 |
+@@ -11,8 +11,10 @@ |
1141 |
+ #include <linux/utsname.h> |
1142 |
+ #include <linux/cpu.h> |
1143 |
+ #include <linux/module.h> |
1144 |
++#include <linux/nospec.h> |
1145 |
++#include <linux/prctl.h> |
1146 |
+ |
1147 |
+-#include <asm/nospec-branch.h> |
1148 |
++#include <asm/spec-ctrl.h> |
1149 |
+ #include <asm/cmdline.h> |
1150 |
+ #include <asm/bugs.h> |
1151 |
+ #include <asm/processor.h> |
1152 |
+@@ -26,6 +28,27 @@ |
1153 |
+ #include <asm/intel-family.h> |
1154 |
+ |
1155 |
+ static void __init spectre_v2_select_mitigation(void); |
1156 |
++static void __init ssb_select_mitigation(void); |
1157 |
++ |
1158 |
++/* |
1159 |
++ * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any |
1160 |
++ * writes to SPEC_CTRL contain whatever reserved bits have been set. |
1161 |
++ */ |
1162 |
++u64 x86_spec_ctrl_base; |
1163 |
++EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); |
1164 |
++ |
1165 |
++/* |
1166 |
++ * The vendor and possibly platform specific bits which can be modified in |
1167 |
++ * x86_spec_ctrl_base. |
1168 |
++ */ |
1169 |
++static u64 x86_spec_ctrl_mask = SPEC_CTRL_IBRS; |
1170 |
++ |
1171 |
++/* |
1172 |
++ * AMD specific MSR info for Speculative Store Bypass control. |
1173 |
++ * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu(). |
1174 |
++ */ |
1175 |
++u64 x86_amd_ls_cfg_base; |
1176 |
++u64 x86_amd_ls_cfg_ssbd_mask; |
1177 |
+ |
1178 |
+ void __init check_bugs(void) |
1179 |
+ { |
1180 |
+@@ -36,9 +59,27 @@ void __init check_bugs(void) |
1181 |
+ print_cpu_info(&boot_cpu_data); |
1182 |
+ } |
1183 |
+ |
1184 |
++ /* |
1185 |
++ * Read the SPEC_CTRL MSR to account for reserved bits which may |
1186 |
++ * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD |
1187 |
++ * init code as it is not enumerated and depends on the family. |
1188 |
++ */ |
1189 |
++ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) |
1190 |
++ rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); |
1191 |
++ |
1192 |
++ /* Allow STIBP in MSR_SPEC_CTRL if supported */ |
1193 |
++ if (boot_cpu_has(X86_FEATURE_STIBP)) |
1194 |
++ x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; |
1195 |
++ |
1196 |
+ /* Select the proper spectre mitigation before patching alternatives */ |
1197 |
+ spectre_v2_select_mitigation(); |
1198 |
+ |
1199 |
++ /* |
1200 |
++ * Select proper mitigation for any exposure to the Speculative Store |
1201 |
++ * Bypass vulnerability. |
1202 |
++ */ |
1203 |
++ ssb_select_mitigation(); |
1204 |
++ |
1205 |
+ #ifdef CONFIG_X86_32 |
1206 |
+ /* |
1207 |
+ * Check whether we are able to run this kernel safely on SMP. |
1208 |
+@@ -94,6 +135,73 @@ static const char *spectre_v2_strings[] = { |
1209 |
+ |
1210 |
+ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; |
1211 |
+ |
1212 |
++void |
1213 |
++x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) |
1214 |
++{ |
1215 |
++ u64 msrval, guestval, hostval = x86_spec_ctrl_base; |
1216 |
++ struct thread_info *ti = current_thread_info(); |
1217 |
++ |
1218 |
++ /* Is MSR_SPEC_CTRL implemented ? */ |
1219 |
++ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { |
1220 |
++ /* |
1221 |
++ * Restrict guest_spec_ctrl to supported values. Clear the |
1222 |
++ * modifiable bits in the host base value and or the |
1223 |
++ * modifiable bits from the guest value. |
1224 |
++ */ |
1225 |
++ guestval = hostval & ~x86_spec_ctrl_mask; |
1226 |
++ guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; |
1227 |
++ |
1228 |
++ /* SSBD controlled in MSR_SPEC_CTRL */ |
1229 |
++ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) |
1230 |
++ hostval |= ssbd_tif_to_spec_ctrl(ti->flags); |
1231 |
++ |
1232 |
++ if (hostval != guestval) { |
1233 |
++ msrval = setguest ? guestval : hostval; |
1234 |
++ wrmsrl(MSR_IA32_SPEC_CTRL, msrval); |
1235 |
++ } |
1236 |
++ } |
1237 |
++ |
1238 |
++ /* |
1239 |
++ * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update |
1240 |
++ * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. |
1241 |
++ */ |
1242 |
++ if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) && |
1243 |
++ !static_cpu_has(X86_FEATURE_VIRT_SSBD)) |
1244 |
++ return; |
1245 |
++ |
1246 |
++ /* |
1247 |
++ * If the host has SSBD mitigation enabled, force it in the host's |
1248 |
++ * virtual MSR value. If its not permanently enabled, evaluate |
1249 |
++ * current's TIF_SSBD thread flag. |
1250 |
++ */ |
1251 |
++ if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE)) |
1252 |
++ hostval = SPEC_CTRL_SSBD; |
1253 |
++ else |
1254 |
++ hostval = ssbd_tif_to_spec_ctrl(ti->flags); |
1255 |
++ |
1256 |
++ /* Sanitize the guest value */ |
1257 |
++ guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD; |
1258 |
++ |
1259 |
++ if (hostval != guestval) { |
1260 |
++ unsigned long tif; |
1261 |
++ |
1262 |
++ tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : |
1263 |
++ ssbd_spec_ctrl_to_tif(hostval); |
1264 |
++ |
1265 |
++ speculative_store_bypass_update(tif); |
1266 |
++ } |
1267 |
++} |
1268 |
++EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); |
1269 |
++ |
1270 |
++static void x86_amd_ssb_disable(void) |
1271 |
++{ |
1272 |
++ u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; |
1273 |
++ |
1274 |
++ if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) |
1275 |
++ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD); |
1276 |
++ else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) |
1277 |
++ wrmsrl(MSR_AMD64_LS_CFG, msrval); |
1278 |
++} |
1279 |
+ |
1280 |
+ #ifdef RETPOLINE |
1281 |
+ static bool spectre_v2_bad_module; |
1282 |
+@@ -162,8 +270,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) |
1283 |
+ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) |
1284 |
+ return SPECTRE_V2_CMD_NONE; |
1285 |
+ else { |
1286 |
+- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, |
1287 |
+- sizeof(arg)); |
1288 |
++ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); |
1289 |
+ if (ret < 0) |
1290 |
+ return SPECTRE_V2_CMD_AUTO; |
1291 |
+ |
1292 |
+@@ -184,8 +291,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) |
1293 |
+ cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || |
1294 |
+ cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && |
1295 |
+ !IS_ENABLED(CONFIG_RETPOLINE)) { |
1296 |
+- pr_err("%s selected but not compiled in. Switching to AUTO select\n", |
1297 |
+- mitigation_options[i].option); |
1298 |
++ pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); |
1299 |
+ return SPECTRE_V2_CMD_AUTO; |
1300 |
+ } |
1301 |
+ |
1302 |
+@@ -255,14 +361,14 @@ static void __init spectre_v2_select_mitigation(void) |
1303 |
+ goto retpoline_auto; |
1304 |
+ break; |
1305 |
+ } |
1306 |
+- pr_err("kernel not compiled with retpoline; no mitigation available!"); |
1307 |
++ pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!"); |
1308 |
+ return; |
1309 |
+ |
1310 |
+ retpoline_auto: |
1311 |
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
1312 |
+ retpoline_amd: |
1313 |
+ if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { |
1314 |
+- pr_err("LFENCE not serializing. Switching to generic retpoline\n"); |
1315 |
++ pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); |
1316 |
+ goto retpoline_generic; |
1317 |
+ } |
1318 |
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : |
1319 |
+@@ -280,7 +386,7 @@ retpoline_auto: |
1320 |
+ pr_info("%s\n", spectre_v2_strings[mode]); |
1321 |
+ |
1322 |
+ /* |
1323 |
+- * If neither SMEP or KPTI are available, there is a risk of |
1324 |
++ * If neither SMEP nor PTI are available, there is a risk of |
1325 |
+ * hitting userspace addresses in the RSB after a context switch |
1326 |
+ * from a shallow call stack to a deeper one. To prevent this fill |
1327 |
+ * the entire RSB, even when using IBRS. |
1328 |
+@@ -294,38 +400,309 @@ retpoline_auto: |
1329 |
+ if ((!boot_cpu_has(X86_FEATURE_KAISER) && |
1330 |
+ !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { |
1331 |
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); |
1332 |
+- pr_info("Filling RSB on context switch\n"); |
1333 |
++ pr_info("Spectre v2 mitigation: Filling RSB on context switch\n"); |
1334 |
++ } |
1335 |
++ |
1336 |
++ /* Initialize Indirect Branch Prediction Barrier if supported */ |
1337 |
++ if (boot_cpu_has(X86_FEATURE_IBPB)) { |
1338 |
++ setup_force_cpu_cap(X86_FEATURE_USE_IBPB); |
1339 |
++ pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); |
1340 |
++ } |
1341 |
++ |
1342 |
++ /* |
1343 |
++ * Retpoline means the kernel is safe because it has no indirect |
1344 |
++ * branches. But firmware isn't, so use IBRS to protect that. |
1345 |
++ */ |
1346 |
++ if (boot_cpu_has(X86_FEATURE_IBRS)) { |
1347 |
++ setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); |
1348 |
++ pr_info("Enabling Restricted Speculation for firmware calls\n"); |
1349 |
++ } |
1350 |
++} |
1351 |
++ |
1352 |
++#undef pr_fmt |
1353 |
++#define pr_fmt(fmt) "Speculative Store Bypass: " fmt |
1354 |
++ |
1355 |
++static enum ssb_mitigation ssb_mode = SPEC_STORE_BYPASS_NONE; |
1356 |
++ |
1357 |
++/* The kernel command line selection */ |
1358 |
++enum ssb_mitigation_cmd { |
1359 |
++ SPEC_STORE_BYPASS_CMD_NONE, |
1360 |
++ SPEC_STORE_BYPASS_CMD_AUTO, |
1361 |
++ SPEC_STORE_BYPASS_CMD_ON, |
1362 |
++ SPEC_STORE_BYPASS_CMD_PRCTL, |
1363 |
++ SPEC_STORE_BYPASS_CMD_SECCOMP, |
1364 |
++}; |
1365 |
++ |
1366 |
++static const char *ssb_strings[] = { |
1367 |
++ [SPEC_STORE_BYPASS_NONE] = "Vulnerable", |
1368 |
++ [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", |
1369 |
++ [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", |
1370 |
++ [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp", |
1371 |
++}; |
1372 |
++ |
1373 |
++static const struct { |
1374 |
++ const char *option; |
1375 |
++ enum ssb_mitigation_cmd cmd; |
1376 |
++} ssb_mitigation_options[] = { |
1377 |
++ { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ |
1378 |
++ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ |
1379 |
++ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ |
1380 |
++ { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ |
1381 |
++ { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */ |
1382 |
++}; |
1383 |
++ |
1384 |
++static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) |
1385 |
++{ |
1386 |
++ enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO; |
1387 |
++ char arg[20]; |
1388 |
++ int ret, i; |
1389 |
++ |
1390 |
++ if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) { |
1391 |
++ return SPEC_STORE_BYPASS_CMD_NONE; |
1392 |
++ } else { |
1393 |
++ ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable", |
1394 |
++ arg, sizeof(arg)); |
1395 |
++ if (ret < 0) |
1396 |
++ return SPEC_STORE_BYPASS_CMD_AUTO; |
1397 |
++ |
1398 |
++ for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) { |
1399 |
++ if (!match_option(arg, ret, ssb_mitigation_options[i].option)) |
1400 |
++ continue; |
1401 |
++ |
1402 |
++ cmd = ssb_mitigation_options[i].cmd; |
1403 |
++ break; |
1404 |
++ } |
1405 |
++ |
1406 |
++ if (i >= ARRAY_SIZE(ssb_mitigation_options)) { |
1407 |
++ pr_err("unknown option (%s). Switching to AUTO select\n", arg); |
1408 |
++ return SPEC_STORE_BYPASS_CMD_AUTO; |
1409 |
++ } |
1410 |
++ } |
1411 |
++ |
1412 |
++ return cmd; |
1413 |
++} |
1414 |
++ |
1415 |
++static enum ssb_mitigation __init __ssb_select_mitigation(void) |
1416 |
++{ |
1417 |
++ enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; |
1418 |
++ enum ssb_mitigation_cmd cmd; |
1419 |
++ |
1420 |
++ if (!boot_cpu_has(X86_FEATURE_SSBD)) |
1421 |
++ return mode; |
1422 |
++ |
1423 |
++ cmd = ssb_parse_cmdline(); |
1424 |
++ if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) && |
1425 |
++ (cmd == SPEC_STORE_BYPASS_CMD_NONE || |
1426 |
++ cmd == SPEC_STORE_BYPASS_CMD_AUTO)) |
1427 |
++ return mode; |
1428 |
++ |
1429 |
++ switch (cmd) { |
1430 |
++ case SPEC_STORE_BYPASS_CMD_AUTO: |
1431 |
++ case SPEC_STORE_BYPASS_CMD_SECCOMP: |
1432 |
++ /* |
1433 |
++ * Choose prctl+seccomp as the default mode if seccomp is |
1434 |
++ * enabled. |
1435 |
++ */ |
1436 |
++ if (IS_ENABLED(CONFIG_SECCOMP)) |
1437 |
++ mode = SPEC_STORE_BYPASS_SECCOMP; |
1438 |
++ else |
1439 |
++ mode = SPEC_STORE_BYPASS_PRCTL; |
1440 |
++ break; |
1441 |
++ case SPEC_STORE_BYPASS_CMD_ON: |
1442 |
++ mode = SPEC_STORE_BYPASS_DISABLE; |
1443 |
++ break; |
1444 |
++ case SPEC_STORE_BYPASS_CMD_PRCTL: |
1445 |
++ mode = SPEC_STORE_BYPASS_PRCTL; |
1446 |
++ break; |
1447 |
++ case SPEC_STORE_BYPASS_CMD_NONE: |
1448 |
++ break; |
1449 |
++ } |
1450 |
++ |
1451 |
++ /* |
1452 |
++ * We have three CPU feature flags that are in play here: |
1453 |
++ * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. |
1454 |
++ * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass |
1455 |
++ * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation |
1456 |
++ */ |
1457 |
++ if (mode == SPEC_STORE_BYPASS_DISABLE) { |
1458 |
++ setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); |
1459 |
++ /* |
1460 |
++ * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses |
1461 |
++ * a completely different MSR and bit dependent on family. |
1462 |
++ */ |
1463 |
++ switch (boot_cpu_data.x86_vendor) { |
1464 |
++ case X86_VENDOR_INTEL: |
1465 |
++ x86_spec_ctrl_base |= SPEC_CTRL_SSBD; |
1466 |
++ x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; |
1467 |
++ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); |
1468 |
++ break; |
1469 |
++ case X86_VENDOR_AMD: |
1470 |
++ x86_amd_ssb_disable(); |
1471 |
++ break; |
1472 |
++ } |
1473 |
+ } |
1474 |
++ |
1475 |
++ return mode; |
1476 |
++} |
1477 |
++ |
1478 |
++static void ssb_select_mitigation(void) |
1479 |
++{ |
1480 |
++ ssb_mode = __ssb_select_mitigation(); |
1481 |
++ |
1482 |
++ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) |
1483 |
++ pr_info("%s\n", ssb_strings[ssb_mode]); |
1484 |
+ } |
1485 |
+ |
1486 |
+ #undef pr_fmt |
1487 |
++#define pr_fmt(fmt) "Speculation prctl: " fmt |
1488 |
++ |
1489 |
++static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) |
1490 |
++{ |
1491 |
++ bool update; |
1492 |
++ |
1493 |
++ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && |
1494 |
++ ssb_mode != SPEC_STORE_BYPASS_SECCOMP) |
1495 |
++ return -ENXIO; |
1496 |
++ |
1497 |
++ switch (ctrl) { |
1498 |
++ case PR_SPEC_ENABLE: |
1499 |
++ /* If speculation is force disabled, enable is not allowed */ |
1500 |
++ if (task_spec_ssb_force_disable(task)) |
1501 |
++ return -EPERM; |
1502 |
++ task_clear_spec_ssb_disable(task); |
1503 |
++ update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); |
1504 |
++ break; |
1505 |
++ case PR_SPEC_DISABLE: |
1506 |
++ task_set_spec_ssb_disable(task); |
1507 |
++ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); |
1508 |
++ break; |
1509 |
++ case PR_SPEC_FORCE_DISABLE: |
1510 |
++ task_set_spec_ssb_disable(task); |
1511 |
++ task_set_spec_ssb_force_disable(task); |
1512 |
++ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); |
1513 |
++ break; |
1514 |
++ default: |
1515 |
++ return -ERANGE; |
1516 |
++ } |
1517 |
++ |
1518 |
++ /* |
1519 |
++ * If being set on non-current task, delay setting the CPU |
1520 |
++ * mitigation until it is next scheduled. |
1521 |
++ */ |
1522 |
++ if (task == current && update) |
1523 |
++ speculative_store_bypass_update_current(); |
1524 |
++ |
1525 |
++ return 0; |
1526 |
++} |
1527 |
++ |
1528 |
++int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, |
1529 |
++ unsigned long ctrl) |
1530 |
++{ |
1531 |
++ switch (which) { |
1532 |
++ case PR_SPEC_STORE_BYPASS: |
1533 |
++ return ssb_prctl_set(task, ctrl); |
1534 |
++ default: |
1535 |
++ return -ENODEV; |
1536 |
++ } |
1537 |
++} |
1538 |
++ |
1539 |
++#ifdef CONFIG_SECCOMP |
1540 |
++void arch_seccomp_spec_mitigate(struct task_struct *task) |
1541 |
++{ |
1542 |
++ if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) |
1543 |
++ ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); |
1544 |
++} |
1545 |
++#endif |
1546 |
++ |
1547 |
++static int ssb_prctl_get(struct task_struct *task) |
1548 |
++{ |
1549 |
++ switch (ssb_mode) { |
1550 |
++ case SPEC_STORE_BYPASS_DISABLE: |
1551 |
++ return PR_SPEC_DISABLE; |
1552 |
++ case SPEC_STORE_BYPASS_SECCOMP: |
1553 |
++ case SPEC_STORE_BYPASS_PRCTL: |
1554 |
++ if (task_spec_ssb_force_disable(task)) |
1555 |
++ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; |
1556 |
++ if (task_spec_ssb_disable(task)) |
1557 |
++ return PR_SPEC_PRCTL | PR_SPEC_DISABLE; |
1558 |
++ return PR_SPEC_PRCTL | PR_SPEC_ENABLE; |
1559 |
++ default: |
1560 |
++ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) |
1561 |
++ return PR_SPEC_ENABLE; |
1562 |
++ return PR_SPEC_NOT_AFFECTED; |
1563 |
++ } |
1564 |
++} |
1565 |
++ |
1566 |
++int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) |
1567 |
++{ |
1568 |
++ switch (which) { |
1569 |
++ case PR_SPEC_STORE_BYPASS: |
1570 |
++ return ssb_prctl_get(task); |
1571 |
++ default: |
1572 |
++ return -ENODEV; |
1573 |
++ } |
1574 |
++} |
1575 |
++ |
1576 |
++void x86_spec_ctrl_setup_ap(void) |
1577 |
++{ |
1578 |
++ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) |
1579 |
++ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); |
1580 |
++ |
1581 |
++ if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) |
1582 |
++ x86_amd_ssb_disable(); |
1583 |
++} |
1584 |
+ |
1585 |
+ #ifdef CONFIG_SYSFS |
1586 |
+-ssize_t cpu_show_meltdown(struct device *dev, |
1587 |
+- struct device_attribute *attr, char *buf) |
1588 |
++ |
1589 |
++static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, |
1590 |
++ char *buf, unsigned int bug) |
1591 |
+ { |
1592 |
+- if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) |
1593 |
++ if (!boot_cpu_has_bug(bug)) |
1594 |
+ return sprintf(buf, "Not affected\n"); |
1595 |
+- if (boot_cpu_has(X86_FEATURE_KAISER)) |
1596 |
+- return sprintf(buf, "Mitigation: PTI\n"); |
1597 |
++ |
1598 |
++ switch (bug) { |
1599 |
++ case X86_BUG_CPU_MELTDOWN: |
1600 |
++ if (boot_cpu_has(X86_FEATURE_KAISER)) |
1601 |
++ return sprintf(buf, "Mitigation: PTI\n"); |
1602 |
++ |
1603 |
++ break; |
1604 |
++ |
1605 |
++ case X86_BUG_SPECTRE_V1: |
1606 |
++ return sprintf(buf, "Mitigation: __user pointer sanitization\n"); |
1607 |
++ |
1608 |
++ case X86_BUG_SPECTRE_V2: |
1609 |
++ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], |
1610 |
++ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", |
1611 |
++ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", |
1612 |
++ spectre_v2_module_string()); |
1613 |
++ |
1614 |
++ case X86_BUG_SPEC_STORE_BYPASS: |
1615 |
++ return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); |
1616 |
++ |
1617 |
++ default: |
1618 |
++ break; |
1619 |
++ } |
1620 |
++ |
1621 |
+ return sprintf(buf, "Vulnerable\n"); |
1622 |
+ } |
1623 |
+ |
1624 |
+-ssize_t cpu_show_spectre_v1(struct device *dev, |
1625 |
+- struct device_attribute *attr, char *buf) |
1626 |
++ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) |
1627 |
+ { |
1628 |
+- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) |
1629 |
+- return sprintf(buf, "Not affected\n"); |
1630 |
+- return sprintf(buf, "Mitigation: __user pointer sanitization\n"); |
1631 |
++ return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN); |
1632 |
+ } |
1633 |
+ |
1634 |
+-ssize_t cpu_show_spectre_v2(struct device *dev, |
1635 |
+- struct device_attribute *attr, char *buf) |
1636 |
++ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) |
1637 |
+ { |
1638 |
+- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) |
1639 |
+- return sprintf(buf, "Not affected\n"); |
1640 |
++ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1); |
1641 |
++} |
1642 |
+ |
1643 |
+- return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], |
1644 |
+- spectre_v2_module_string()); |
1645 |
++ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) |
1646 |
++{ |
1647 |
++ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2); |
1648 |
++} |
1649 |
++ |
1650 |
++ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) |
1651 |
++{ |
1652 |
++ return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS); |
1653 |
+ } |
1654 |
+ #endif |
1655 |
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c |
1656 |
+index 736e2843139b..3d21b28f9826 100644 |
1657 |
+--- a/arch/x86/kernel/cpu/common.c |
1658 |
++++ b/arch/x86/kernel/cpu/common.c |
1659 |
+@@ -43,6 +43,8 @@ |
1660 |
+ #include <asm/pat.h> |
1661 |
+ #include <asm/microcode.h> |
1662 |
+ #include <asm/microcode_intel.h> |
1663 |
++#include <asm/intel-family.h> |
1664 |
++#include <asm/cpu_device_id.h> |
1665 |
+ |
1666 |
+ #ifdef CONFIG_X86_LOCAL_APIC |
1667 |
+ #include <asm/uv/uv.h> |
1668 |
+@@ -674,6 +676,40 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) |
1669 |
+ } |
1670 |
+ } |
1671 |
+ |
1672 |
++static void init_speculation_control(struct cpuinfo_x86 *c) |
1673 |
++{ |
1674 |
++ /* |
1675 |
++ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, |
1676 |
++ * and they also have a different bit for STIBP support. Also, |
1677 |
++ * a hypervisor might have set the individual AMD bits even on |
1678 |
++ * Intel CPUs, for finer-grained selection of what's available. |
1679 |
++ */ |
1680 |
++ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { |
1681 |
++ set_cpu_cap(c, X86_FEATURE_IBRS); |
1682 |
++ set_cpu_cap(c, X86_FEATURE_IBPB); |
1683 |
++ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); |
1684 |
++ } |
1685 |
++ |
1686 |
++ if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) |
1687 |
++ set_cpu_cap(c, X86_FEATURE_STIBP); |
1688 |
++ |
1689 |
++ if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD)) |
1690 |
++ set_cpu_cap(c, X86_FEATURE_SSBD); |
1691 |
++ |
1692 |
++ if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { |
1693 |
++ set_cpu_cap(c, X86_FEATURE_IBRS); |
1694 |
++ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); |
1695 |
++ } |
1696 |
++ |
1697 |
++ if (cpu_has(c, X86_FEATURE_AMD_IBPB)) |
1698 |
++ set_cpu_cap(c, X86_FEATURE_IBPB); |
1699 |
++ |
1700 |
++ if (cpu_has(c, X86_FEATURE_AMD_STIBP)) { |
1701 |
++ set_cpu_cap(c, X86_FEATURE_STIBP); |
1702 |
++ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); |
1703 |
++ } |
1704 |
++} |
1705 |
++ |
1706 |
+ void get_cpu_cap(struct cpuinfo_x86 *c) |
1707 |
+ { |
1708 |
+ u32 eax, ebx, ecx, edx; |
1709 |
+@@ -695,6 +731,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) |
1710 |
+ cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); |
1711 |
+ c->x86_capability[CPUID_7_0_EBX] = ebx; |
1712 |
+ c->x86_capability[CPUID_7_ECX] = ecx; |
1713 |
++ c->x86_capability[CPUID_7_EDX] = edx; |
1714 |
+ } |
1715 |
+ |
1716 |
+ /* Extended state features: level 0x0000000d */ |
1717 |
+@@ -765,6 +802,14 @@ void get_cpu_cap(struct cpuinfo_x86 *c) |
1718 |
+ c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); |
1719 |
+ |
1720 |
+ init_scattered_cpuid_features(c); |
1721 |
++ init_speculation_control(c); |
1722 |
++ |
1723 |
++ /* |
1724 |
++ * Clear/Set all flags overridden by options, after probe. |
1725 |
++ * This needs to happen each time we re-probe, which may happen |
1726 |
++ * several times during CPU initialization. |
1727 |
++ */ |
1728 |
++ apply_forced_caps(c); |
1729 |
+ } |
1730 |
+ |
1731 |
+ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) |
1732 |
+@@ -793,6 +838,75 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) |
1733 |
+ #endif |
1734 |
+ } |
1735 |
+ |
1736 |
++static const __initconst struct x86_cpu_id cpu_no_speculation[] = { |
1737 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, |
1738 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, |
1739 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, |
1740 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, |
1741 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, |
1742 |
++ { X86_VENDOR_CENTAUR, 5 }, |
1743 |
++ { X86_VENDOR_INTEL, 5 }, |
1744 |
++ { X86_VENDOR_NSC, 5 }, |
1745 |
++ { X86_VENDOR_ANY, 4 }, |
1746 |
++ {} |
1747 |
++}; |
1748 |
++ |
1749 |
++static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { |
1750 |
++ { X86_VENDOR_AMD }, |
1751 |
++ {} |
1752 |
++}; |
1753 |
++ |
1754 |
++static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { |
1755 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW }, |
1756 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT }, |
1757 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL }, |
1758 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW }, |
1759 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW }, |
1760 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, |
1761 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, |
1762 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, |
1763 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, |
1764 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, |
1765 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, |
1766 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, |
1767 |
++ { X86_VENDOR_CENTAUR, 5, }, |
1768 |
++ { X86_VENDOR_INTEL, 5, }, |
1769 |
++ { X86_VENDOR_NSC, 5, }, |
1770 |
++ { X86_VENDOR_AMD, 0x12, }, |
1771 |
++ { X86_VENDOR_AMD, 0x11, }, |
1772 |
++ { X86_VENDOR_AMD, 0x10, }, |
1773 |
++ { X86_VENDOR_AMD, 0xf, }, |
1774 |
++ { X86_VENDOR_ANY, 4, }, |
1775 |
++ {} |
1776 |
++}; |
1777 |
++ |
1778 |
++static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) |
1779 |
++{ |
1780 |
++ u64 ia32_cap = 0; |
1781 |
++ |
1782 |
++ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) |
1783 |
++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); |
1784 |
++ |
1785 |
++ if (!x86_match_cpu(cpu_no_spec_store_bypass) && |
1786 |
++ !(ia32_cap & ARCH_CAP_SSB_NO)) |
1787 |
++ setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); |
1788 |
++ |
1789 |
++ if (x86_match_cpu(cpu_no_speculation)) |
1790 |
++ return; |
1791 |
++ |
1792 |
++ setup_force_cpu_bug(X86_BUG_SPECTRE_V1); |
1793 |
++ setup_force_cpu_bug(X86_BUG_SPECTRE_V2); |
1794 |
++ |
1795 |
++ if (x86_match_cpu(cpu_no_meltdown)) |
1796 |
++ return; |
1797 |
++ |
1798 |
++ /* Rogue Data Cache Load? No! */ |
1799 |
++ if (ia32_cap & ARCH_CAP_RDCL_NO) |
1800 |
++ return; |
1801 |
++ |
1802 |
++ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); |
1803 |
++} |
1804 |
++ |
1805 |
+ /* |
1806 |
+ * Do minimum CPU detection early. |
1807 |
+ * Fields really needed: vendor, cpuid_level, family, model, mask, |
1808 |
+@@ -839,11 +953,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) |
1809 |
+ |
1810 |
+ setup_force_cpu_cap(X86_FEATURE_ALWAYS); |
1811 |
+ |
1812 |
+- if (c->x86_vendor != X86_VENDOR_AMD) |
1813 |
+- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); |
1814 |
+- |
1815 |
+- setup_force_cpu_bug(X86_BUG_SPECTRE_V1); |
1816 |
+- setup_force_cpu_bug(X86_BUG_SPECTRE_V2); |
1817 |
++ cpu_set_bug_bits(c); |
1818 |
+ |
1819 |
+ fpu__init_system(c); |
1820 |
+ |
1821 |
+@@ -1132,6 +1242,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) |
1822 |
+ enable_sep_cpu(); |
1823 |
+ #endif |
1824 |
+ mtrr_ap_init(); |
1825 |
++ x86_spec_ctrl_setup_ap(); |
1826 |
+ } |
1827 |
+ |
1828 |
+ struct msr_range { |
1829 |
+diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h |
1830 |
+index 2584265d4745..3b19d82f7932 100644 |
1831 |
+--- a/arch/x86/kernel/cpu/cpu.h |
1832 |
++++ b/arch/x86/kernel/cpu/cpu.h |
1833 |
+@@ -46,4 +46,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], |
1834 |
+ |
1835 |
+ extern void get_cpu_cap(struct cpuinfo_x86 *c); |
1836 |
+ extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); |
1837 |
++ |
1838 |
++extern void x86_spec_ctrl_setup_ap(void); |
1839 |
++ |
1840 |
+ #endif /* ARCH_X86_CPU_H */ |
1841 |
+diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c |
1842 |
+index 9299e3bdfad6..4dce22d3cb06 100644 |
1843 |
+--- a/arch/x86/kernel/cpu/intel.c |
1844 |
++++ b/arch/x86/kernel/cpu/intel.c |
1845 |
+@@ -13,6 +13,7 @@ |
1846 |
+ #include <asm/msr.h> |
1847 |
+ #include <asm/bugs.h> |
1848 |
+ #include <asm/cpu.h> |
1849 |
++#include <asm/intel-family.h> |
1850 |
+ |
1851 |
+ #ifdef CONFIG_X86_64 |
1852 |
+ #include <linux/topology.h> |
1853 |
+@@ -25,6 +26,62 @@ |
1854 |
+ #include <asm/apic.h> |
1855 |
+ #endif |
1856 |
+ |
1857 |
++/* |
1858 |
++ * Early microcode releases for the Spectre v2 mitigation were broken. |
1859 |
++ * Information taken from; |
1860 |
++ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf |
1861 |
++ * - https://kb.vmware.com/s/article/52345 |
1862 |
++ * - Microcode revisions observed in the wild |
1863 |
++ * - Release note from 20180108 microcode release |
1864 |
++ */ |
1865 |
++struct sku_microcode { |
1866 |
++ u8 model; |
1867 |
++ u8 stepping; |
1868 |
++ u32 microcode; |
1869 |
++}; |
1870 |
++static const struct sku_microcode spectre_bad_microcodes[] = { |
1871 |
++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x80 }, |
1872 |
++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x80 }, |
1873 |
++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x80 }, |
1874 |
++ { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x80 }, |
1875 |
++ { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, |
1876 |
++ { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, |
1877 |
++ { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, |
1878 |
++ { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, |
1879 |
++ { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, |
1880 |
++ { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, |
1881 |
++ { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 }, |
1882 |
++ { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, |
1883 |
++ { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 }, |
1884 |
++ { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 }, |
1885 |
++ { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 }, |
1886 |
++ { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, |
1887 |
++ { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, |
1888 |
++ { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, |
1889 |
++ /* Observed in the wild */ |
1890 |
++ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, |
1891 |
++ { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, |
1892 |
++}; |
1893 |
++ |
1894 |
++static bool bad_spectre_microcode(struct cpuinfo_x86 *c) |
1895 |
++{ |
1896 |
++ int i; |
1897 |
++ |
1898 |
++ /* |
1899 |
++ * We know that the hypervisor lie to us on the microcode version so |
1900 |
++ * we may as well hope that it is running the correct version. |
1901 |
++ */ |
1902 |
++ if (cpu_has(c, X86_FEATURE_HYPERVISOR)) |
1903 |
++ return false; |
1904 |
++ |
1905 |
++ for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { |
1906 |
++ if (c->x86_model == spectre_bad_microcodes[i].model && |
1907 |
++ c->x86_mask == spectre_bad_microcodes[i].stepping) |
1908 |
++ return (c->microcode <= spectre_bad_microcodes[i].microcode); |
1909 |
++ } |
1910 |
++ return false; |
1911 |
++} |
1912 |
++ |
1913 |
+ static void early_init_intel(struct cpuinfo_x86 *c) |
1914 |
+ { |
1915 |
+ u64 misc_enable; |
1916 |
+@@ -51,6 +108,22 @@ static void early_init_intel(struct cpuinfo_x86 *c) |
1917 |
+ rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); |
1918 |
+ } |
1919 |
+ |
1920 |
++ /* Now if any of them are set, check the blacklist and clear the lot */ |
1921 |
++ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || |
1922 |
++ cpu_has(c, X86_FEATURE_INTEL_STIBP) || |
1923 |
++ cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || |
1924 |
++ cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { |
1925 |
++ pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); |
1926 |
++ setup_clear_cpu_cap(X86_FEATURE_IBRS); |
1927 |
++ setup_clear_cpu_cap(X86_FEATURE_IBPB); |
1928 |
++ setup_clear_cpu_cap(X86_FEATURE_STIBP); |
1929 |
++ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); |
1930 |
++ setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); |
1931 |
++ setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); |
1932 |
++ setup_clear_cpu_cap(X86_FEATURE_SSBD); |
1933 |
++ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD); |
1934 |
++ } |
1935 |
++ |
1936 |
+ /* |
1937 |
+ * Atom erratum AAE44/AAF40/AAG38/AAH41: |
1938 |
+ * |
1939 |
+diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c |
1940 |
+index ddc9b8125918..7b8c8c838191 100644 |
1941 |
+--- a/arch/x86/kernel/cpu/mcheck/mce.c |
1942 |
++++ b/arch/x86/kernel/cpu/mcheck/mce.c |
1943 |
+@@ -2294,9 +2294,6 @@ static ssize_t store_int_with_restart(struct device *s, |
1944 |
+ if (check_interval == old_check_interval) |
1945 |
+ return ret; |
1946 |
+ |
1947 |
+- if (check_interval < 1) |
1948 |
+- check_interval = 1; |
1949 |
+- |
1950 |
+ mutex_lock(&mce_sysfs_mutex); |
1951 |
+ mce_restart(); |
1952 |
+ mutex_unlock(&mce_sysfs_mutex); |
1953 |
+diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S |
1954 |
+new file mode 100644 |
1955 |
+index 000000000000..3817eb748eb4 |
1956 |
+--- /dev/null |
1957 |
++++ b/arch/x86/kernel/irqflags.S |
1958 |
+@@ -0,0 +1,26 @@ |
1959 |
++/* SPDX-License-Identifier: GPL-2.0 */ |
1960 |
++ |
1961 |
++#include <asm/asm.h> |
1962 |
++#include <asm-generic/export.h> |
1963 |
++#include <linux/linkage.h> |
1964 |
++ |
1965 |
++/* |
1966 |
++ * unsigned long native_save_fl(void) |
1967 |
++ */ |
1968 |
++ENTRY(native_save_fl) |
1969 |
++ pushf |
1970 |
++ pop %_ASM_AX |
1971 |
++ ret |
1972 |
++ENDPROC(native_save_fl) |
1973 |
++EXPORT_SYMBOL(native_save_fl) |
1974 |
++ |
1975 |
++/* |
1976 |
++ * void native_restore_fl(unsigned long flags) |
1977 |
++ * %eax/%rdi: flags |
1978 |
++ */ |
1979 |
++ENTRY(native_restore_fl) |
1980 |
++ push %_ASM_ARG1 |
1981 |
++ popf |
1982 |
++ ret |
1983 |
++ENDPROC(native_restore_fl) |
1984 |
++EXPORT_SYMBOL(native_restore_fl) |
1985 |
+diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c |
1986 |
+index bc429365b72a..8bc68cfc0d33 100644 |
1987 |
+--- a/arch/x86/kernel/ldt.c |
1988 |
++++ b/arch/x86/kernel/ldt.c |
1989 |
+@@ -119,7 +119,7 @@ static void free_ldt_struct(struct ldt_struct *ldt) |
1990 |
+ * we do not have to muck with descriptors here, that is |
1991 |
+ * done in switch_mm() as needed. |
1992 |
+ */ |
1993 |
+-int init_new_context(struct task_struct *tsk, struct mm_struct *mm) |
1994 |
++int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) |
1995 |
+ { |
1996 |
+ struct ldt_struct *new_ldt; |
1997 |
+ struct mm_struct *old_mm; |
1998 |
+@@ -160,7 +160,7 @@ out_unlock: |
1999 |
+ * |
2000 |
+ * 64bit: Don't touch the LDT register - we're already in the next thread. |
2001 |
+ */ |
2002 |
+-void destroy_context(struct mm_struct *mm) |
2003 |
++void destroy_context_ldt(struct mm_struct *mm) |
2004 |
+ { |
2005 |
+ free_ldt_struct(mm->context.ldt); |
2006 |
+ mm->context.ldt = NULL; |
2007 |
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c |
2008 |
+index 7c5c5dc90ffa..e18c8798c3a2 100644 |
2009 |
+--- a/arch/x86/kernel/process.c |
2010 |
++++ b/arch/x86/kernel/process.c |
2011 |
+@@ -31,6 +31,7 @@ |
2012 |
+ #include <asm/tlbflush.h> |
2013 |
+ #include <asm/mce.h> |
2014 |
+ #include <asm/vm86.h> |
2015 |
++#include <asm/spec-ctrl.h> |
2016 |
+ |
2017 |
+ /* |
2018 |
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux, |
2019 |
+@@ -130,11 +131,6 @@ void flush_thread(void) |
2020 |
+ fpu__clear(&tsk->thread.fpu); |
2021 |
+ } |
2022 |
+ |
2023 |
+-static void hard_disable_TSC(void) |
2024 |
+-{ |
2025 |
+- cr4_set_bits(X86_CR4_TSD); |
2026 |
+-} |
2027 |
+- |
2028 |
+ void disable_TSC(void) |
2029 |
+ { |
2030 |
+ preempt_disable(); |
2031 |
+@@ -143,15 +139,10 @@ void disable_TSC(void) |
2032 |
+ * Must flip the CPU state synchronously with |
2033 |
+ * TIF_NOTSC in the current running context. |
2034 |
+ */ |
2035 |
+- hard_disable_TSC(); |
2036 |
++ cr4_set_bits(X86_CR4_TSD); |
2037 |
+ preempt_enable(); |
2038 |
+ } |
2039 |
+ |
2040 |
+-static void hard_enable_TSC(void) |
2041 |
+-{ |
2042 |
+- cr4_clear_bits(X86_CR4_TSD); |
2043 |
+-} |
2044 |
+- |
2045 |
+ static void enable_TSC(void) |
2046 |
+ { |
2047 |
+ preempt_disable(); |
2048 |
+@@ -160,7 +151,7 @@ static void enable_TSC(void) |
2049 |
+ * Must flip the CPU state synchronously with |
2050 |
+ * TIF_NOTSC in the current running context. |
2051 |
+ */ |
2052 |
+- hard_enable_TSC(); |
2053 |
++ cr4_clear_bits(X86_CR4_TSD); |
2054 |
+ preempt_enable(); |
2055 |
+ } |
2056 |
+ |
2057 |
+@@ -188,48 +179,199 @@ int set_tsc_mode(unsigned int val) |
2058 |
+ return 0; |
2059 |
+ } |
2060 |
+ |
2061 |
+-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, |
2062 |
+- struct tss_struct *tss) |
2063 |
++static inline void switch_to_bitmap(struct tss_struct *tss, |
2064 |
++ struct thread_struct *prev, |
2065 |
++ struct thread_struct *next, |
2066 |
++ unsigned long tifp, unsigned long tifn) |
2067 |
+ { |
2068 |
+- struct thread_struct *prev, *next; |
2069 |
+- |
2070 |
+- prev = &prev_p->thread; |
2071 |
+- next = &next_p->thread; |
2072 |
+- |
2073 |
+- if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ |
2074 |
+- test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { |
2075 |
+- unsigned long debugctl = get_debugctlmsr(); |
2076 |
+- |
2077 |
+- debugctl &= ~DEBUGCTLMSR_BTF; |
2078 |
+- if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) |
2079 |
+- debugctl |= DEBUGCTLMSR_BTF; |
2080 |
+- |
2081 |
+- update_debugctlmsr(debugctl); |
2082 |
+- } |
2083 |
+- |
2084 |
+- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ |
2085 |
+- test_tsk_thread_flag(next_p, TIF_NOTSC)) { |
2086 |
+- /* prev and next are different */ |
2087 |
+- if (test_tsk_thread_flag(next_p, TIF_NOTSC)) |
2088 |
+- hard_disable_TSC(); |
2089 |
+- else |
2090 |
+- hard_enable_TSC(); |
2091 |
+- } |
2092 |
+- |
2093 |
+- if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { |
2094 |
++ if (tifn & _TIF_IO_BITMAP) { |
2095 |
+ /* |
2096 |
+ * Copy the relevant range of the IO bitmap. |
2097 |
+ * Normally this is 128 bytes or less: |
2098 |
+ */ |
2099 |
+ memcpy(tss->io_bitmap, next->io_bitmap_ptr, |
2100 |
+ max(prev->io_bitmap_max, next->io_bitmap_max)); |
2101 |
+- } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { |
2102 |
++ } else if (tifp & _TIF_IO_BITMAP) { |
2103 |
+ /* |
2104 |
+ * Clear any possible leftover bits: |
2105 |
+ */ |
2106 |
+ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
2107 |
+ } |
2108 |
++} |
2109 |
++ |
2110 |
++#ifdef CONFIG_SMP |
2111 |
++ |
2112 |
++struct ssb_state { |
2113 |
++ struct ssb_state *shared_state; |
2114 |
++ raw_spinlock_t lock; |
2115 |
++ unsigned int disable_state; |
2116 |
++ unsigned long local_state; |
2117 |
++}; |
2118 |
++ |
2119 |
++#define LSTATE_SSB 0 |
2120 |
++ |
2121 |
++static DEFINE_PER_CPU(struct ssb_state, ssb_state); |
2122 |
++ |
2123 |
++void speculative_store_bypass_ht_init(void) |
2124 |
++{ |
2125 |
++ struct ssb_state *st = this_cpu_ptr(&ssb_state); |
2126 |
++ unsigned int this_cpu = smp_processor_id(); |
2127 |
++ unsigned int cpu; |
2128 |
++ |
2129 |
++ st->local_state = 0; |
2130 |
++ |
2131 |
++ /* |
2132 |
++ * Shared state setup happens once on the first bringup |
2133 |
++ * of the CPU. It's not destroyed on CPU hotunplug. |
2134 |
++ */ |
2135 |
++ if (st->shared_state) |
2136 |
++ return; |
2137 |
++ |
2138 |
++ raw_spin_lock_init(&st->lock); |
2139 |
++ |
2140 |
++ /* |
2141 |
++ * Go over HT siblings and check whether one of them has set up the |
2142 |
++ * shared state pointer already. |
2143 |
++ */ |
2144 |
++ for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) { |
2145 |
++ if (cpu == this_cpu) |
2146 |
++ continue; |
2147 |
++ |
2148 |
++ if (!per_cpu(ssb_state, cpu).shared_state) |
2149 |
++ continue; |
2150 |
++ |
2151 |
++ /* Link it to the state of the sibling: */ |
2152 |
++ st->shared_state = per_cpu(ssb_state, cpu).shared_state; |
2153 |
++ return; |
2154 |
++ } |
2155 |
++ |
2156 |
++ /* |
2157 |
++ * First HT sibling to come up on the core. Link shared state of |
2158 |
++ * the first HT sibling to itself. The siblings on the same core |
2159 |
++ * which come up later will see the shared state pointer and link |
2160 |
++ * themself to the state of this CPU. |
2161 |
++ */ |
2162 |
++ st->shared_state = st; |
2163 |
++} |
2164 |
++ |
2165 |
++/* |
2166 |
++ * Logic is: First HT sibling enables SSBD for both siblings in the core |
2167 |
++ * and last sibling to disable it, disables it for the whole core. This how |
2168 |
++ * MSR_SPEC_CTRL works in "hardware": |
2169 |
++ * |
2170 |
++ * CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL |
2171 |
++ */ |
2172 |
++static __always_inline void amd_set_core_ssb_state(unsigned long tifn) |
2173 |
++{ |
2174 |
++ struct ssb_state *st = this_cpu_ptr(&ssb_state); |
2175 |
++ u64 msr = x86_amd_ls_cfg_base; |
2176 |
++ |
2177 |
++ if (!static_cpu_has(X86_FEATURE_ZEN)) { |
2178 |
++ msr |= ssbd_tif_to_amd_ls_cfg(tifn); |
2179 |
++ wrmsrl(MSR_AMD64_LS_CFG, msr); |
2180 |
++ return; |
2181 |
++ } |
2182 |
++ |
2183 |
++ if (tifn & _TIF_SSBD) { |
2184 |
++ /* |
2185 |
++ * Since this can race with prctl(), block reentry on the |
2186 |
++ * same CPU. |
2187 |
++ */ |
2188 |
++ if (__test_and_set_bit(LSTATE_SSB, &st->local_state)) |
2189 |
++ return; |
2190 |
++ |
2191 |
++ msr |= x86_amd_ls_cfg_ssbd_mask; |
2192 |
++ |
2193 |
++ raw_spin_lock(&st->shared_state->lock); |
2194 |
++ /* First sibling enables SSBD: */ |
2195 |
++ if (!st->shared_state->disable_state) |
2196 |
++ wrmsrl(MSR_AMD64_LS_CFG, msr); |
2197 |
++ st->shared_state->disable_state++; |
2198 |
++ raw_spin_unlock(&st->shared_state->lock); |
2199 |
++ } else { |
2200 |
++ if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state)) |
2201 |
++ return; |
2202 |
++ |
2203 |
++ raw_spin_lock(&st->shared_state->lock); |
2204 |
++ st->shared_state->disable_state--; |
2205 |
++ if (!st->shared_state->disable_state) |
2206 |
++ wrmsrl(MSR_AMD64_LS_CFG, msr); |
2207 |
++ raw_spin_unlock(&st->shared_state->lock); |
2208 |
++ } |
2209 |
++} |
2210 |
++#else |
2211 |
++static __always_inline void amd_set_core_ssb_state(unsigned long tifn) |
2212 |
++{ |
2213 |
++ u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); |
2214 |
++ |
2215 |
++ wrmsrl(MSR_AMD64_LS_CFG, msr); |
2216 |
++} |
2217 |
++#endif |
2218 |
++ |
2219 |
++static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) |
2220 |
++{ |
2221 |
++ /* |
2222 |
++ * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL, |
2223 |
++ * so ssbd_tif_to_spec_ctrl() just works. |
2224 |
++ */ |
2225 |
++ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); |
2226 |
++} |
2227 |
++ |
2228 |
++static __always_inline void intel_set_ssb_state(unsigned long tifn) |
2229 |
++{ |
2230 |
++ u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); |
2231 |
++ |
2232 |
++ wrmsrl(MSR_IA32_SPEC_CTRL, msr); |
2233 |
++} |
2234 |
++ |
2235 |
++static __always_inline void __speculative_store_bypass_update(unsigned long tifn) |
2236 |
++{ |
2237 |
++ if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) |
2238 |
++ amd_set_ssb_virt_state(tifn); |
2239 |
++ else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) |
2240 |
++ amd_set_core_ssb_state(tifn); |
2241 |
++ else |
2242 |
++ intel_set_ssb_state(tifn); |
2243 |
++} |
2244 |
++ |
2245 |
++void speculative_store_bypass_update(unsigned long tif) |
2246 |
++{ |
2247 |
++ preempt_disable(); |
2248 |
++ __speculative_store_bypass_update(tif); |
2249 |
++ preempt_enable(); |
2250 |
++} |
2251 |
++ |
2252 |
++void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, |
2253 |
++ struct tss_struct *tss) |
2254 |
++{ |
2255 |
++ struct thread_struct *prev, *next; |
2256 |
++ unsigned long tifp, tifn; |
2257 |
++ |
2258 |
++ prev = &prev_p->thread; |
2259 |
++ next = &next_p->thread; |
2260 |
++ |
2261 |
++ tifn = READ_ONCE(task_thread_info(next_p)->flags); |
2262 |
++ tifp = READ_ONCE(task_thread_info(prev_p)->flags); |
2263 |
++ switch_to_bitmap(tss, prev, next, tifp, tifn); |
2264 |
++ |
2265 |
+ propagate_user_return_notify(prev_p, next_p); |
2266 |
++ |
2267 |
++ if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) && |
2268 |
++ arch_has_block_step()) { |
2269 |
++ unsigned long debugctl, msk; |
2270 |
++ |
2271 |
++ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); |
2272 |
++ debugctl &= ~DEBUGCTLMSR_BTF; |
2273 |
++ msk = tifn & _TIF_BLOCKSTEP; |
2274 |
++ debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT; |
2275 |
++ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); |
2276 |
++ } |
2277 |
++ |
2278 |
++ if ((tifp ^ tifn) & _TIF_NOTSC) |
2279 |
++ cr4_toggle_bits(X86_CR4_TSD); |
2280 |
++ |
2281 |
++ if ((tifp ^ tifn) & _TIF_SSBD) |
2282 |
++ __speculative_store_bypass_update(tifn); |
2283 |
+ } |
2284 |
+ |
2285 |
+ /* |
2286 |
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c |
2287 |
+index 1f7aefc7b0b4..c017f1c71560 100644 |
2288 |
+--- a/arch/x86/kernel/smpboot.c |
2289 |
++++ b/arch/x86/kernel/smpboot.c |
2290 |
+@@ -75,6 +75,7 @@ |
2291 |
+ #include <asm/i8259.h> |
2292 |
+ #include <asm/realmode.h> |
2293 |
+ #include <asm/misc.h> |
2294 |
++#include <asm/spec-ctrl.h> |
2295 |
+ |
2296 |
+ /* Number of siblings per CPU package */ |
2297 |
+ int smp_num_siblings = 1; |
2298 |
+@@ -217,6 +218,8 @@ static void notrace start_secondary(void *unused) |
2299 |
+ */ |
2300 |
+ check_tsc_sync_target(); |
2301 |
+ |
2302 |
++ speculative_store_bypass_ht_init(); |
2303 |
++ |
2304 |
+ /* |
2305 |
+ * Lock vector_lock and initialize the vectors on this cpu |
2306 |
+ * before setting the cpu online. We must set it online with |
2307 |
+@@ -1209,6 +1212,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) |
2308 |
+ set_mtrr_aps_delayed_init(); |
2309 |
+ |
2310 |
+ smp_quirk_init_udelay(); |
2311 |
++ |
2312 |
++ speculative_store_bypass_ht_init(); |
2313 |
+ } |
2314 |
+ |
2315 |
+ void arch_enable_nonboot_cpus_begin(void) |
2316 |
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c |
2317 |
+index 42654375b73f..df7827a981dd 100644 |
2318 |
+--- a/arch/x86/kvm/svm.c |
2319 |
++++ b/arch/x86/kvm/svm.c |
2320 |
+@@ -37,7 +37,7 @@ |
2321 |
+ #include <asm/desc.h> |
2322 |
+ #include <asm/debugreg.h> |
2323 |
+ #include <asm/kvm_para.h> |
2324 |
+-#include <asm/nospec-branch.h> |
2325 |
++#include <asm/spec-ctrl.h> |
2326 |
+ |
2327 |
+ #include <asm/virtext.h> |
2328 |
+ #include "trace.h" |
2329 |
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c |
2330 |
+index 63c44a9bf6bb..18143886b186 100644 |
2331 |
+--- a/arch/x86/kvm/vmx.c |
2332 |
++++ b/arch/x86/kvm/vmx.c |
2333 |
+@@ -48,7 +48,7 @@ |
2334 |
+ #include <asm/kexec.h> |
2335 |
+ #include <asm/apic.h> |
2336 |
+ #include <asm/irq_remapping.h> |
2337 |
+-#include <asm/nospec-branch.h> |
2338 |
++#include <asm/spec-ctrl.h> |
2339 |
+ |
2340 |
+ #include "trace.h" |
2341 |
+ #include "pmu.h" |
2342 |
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c |
2343 |
+index 7cad01af6dcd..6d683bbb3502 100644 |
2344 |
+--- a/arch/x86/mm/tlb.c |
2345 |
++++ b/arch/x86/mm/tlb.c |
2346 |
+@@ -10,6 +10,7 @@ |
2347 |
+ |
2348 |
+ #include <asm/tlbflush.h> |
2349 |
+ #include <asm/mmu_context.h> |
2350 |
++#include <asm/nospec-branch.h> |
2351 |
+ #include <asm/cache.h> |
2352 |
+ #include <asm/apic.h> |
2353 |
+ #include <asm/uv/uv.h> |
2354 |
+@@ -29,6 +30,8 @@ |
2355 |
+ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi |
2356 |
+ */ |
2357 |
+ |
2358 |
++atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); |
2359 |
++ |
2360 |
+ struct flush_tlb_info { |
2361 |
+ struct mm_struct *flush_mm; |
2362 |
+ unsigned long flush_start; |
2363 |
+@@ -104,6 +107,36 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, |
2364 |
+ unsigned cpu = smp_processor_id(); |
2365 |
+ |
2366 |
+ if (likely(prev != next)) { |
2367 |
++ u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); |
2368 |
++ |
2369 |
++ /* |
2370 |
++ * Avoid user/user BTB poisoning by flushing the branch |
2371 |
++ * predictor when switching between processes. This stops |
2372 |
++ * one process from doing Spectre-v2 attacks on another. |
2373 |
++ * |
2374 |
++ * As an optimization, flush indirect branches only when |
2375 |
++ * switching into processes that disable dumping. This |
2376 |
++ * protects high value processes like gpg, without having |
2377 |
++ * too high performance overhead. IBPB is *expensive*! |
2378 |
++ * |
2379 |
++ * This will not flush branches when switching into kernel |
2380 |
++ * threads. It will also not flush if we switch to idle |
2381 |
++ * thread and back to the same process. It will flush if we |
2382 |
++ * switch to a different non-dumpable process. |
2383 |
++ */ |
2384 |
++ if (tsk && tsk->mm && |
2385 |
++ tsk->mm->context.ctx_id != last_ctx_id && |
2386 |
++ get_dumpable(tsk->mm) != SUID_DUMP_USER) |
2387 |
++ indirect_branch_prediction_barrier(); |
2388 |
++ |
2389 |
++ /* |
2390 |
++ * Record last user mm's context id, so we can avoid |
2391 |
++ * flushing branch buffer with IBPB if we switch back |
2392 |
++ * to the same user. |
2393 |
++ */ |
2394 |
++ if (next != &init_mm) |
2395 |
++ this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); |
2396 |
++ |
2397 |
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); |
2398 |
+ this_cpu_write(cpu_tlbstate.active_mm, next); |
2399 |
+ cpumask_set_cpu(cpu, mm_cpumask(next)); |
2400 |
+diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c |
2401 |
+index a0ac0f9c307f..f5a8cd96bae4 100644 |
2402 |
+--- a/arch/x86/platform/efi/efi_64.c |
2403 |
++++ b/arch/x86/platform/efi/efi_64.c |
2404 |
+@@ -40,6 +40,7 @@ |
2405 |
+ #include <asm/fixmap.h> |
2406 |
+ #include <asm/realmode.h> |
2407 |
+ #include <asm/time.h> |
2408 |
++#include <asm/nospec-branch.h> |
2409 |
+ |
2410 |
+ /* |
2411 |
+ * We allocate runtime services regions bottom-up, starting from -4G, i.e. |
2412 |
+@@ -347,6 +348,7 @@ extern efi_status_t efi64_thunk(u32, ...); |
2413 |
+ \ |
2414 |
+ efi_sync_low_kernel_mappings(); \ |
2415 |
+ local_irq_save(flags); \ |
2416 |
++ firmware_restrict_branch_speculation_start(); \ |
2417 |
+ \ |
2418 |
+ efi_scratch.prev_cr3 = read_cr3(); \ |
2419 |
+ write_cr3((unsigned long)efi_scratch.efi_pgt); \ |
2420 |
+@@ -357,6 +359,7 @@ extern efi_status_t efi64_thunk(u32, ...); |
2421 |
+ \ |
2422 |
+ write_cr3(efi_scratch.prev_cr3); \ |
2423 |
+ __flush_tlb_all(); \ |
2424 |
++ firmware_restrict_branch_speculation_end(); \ |
2425 |
+ local_irq_restore(flags); \ |
2426 |
+ \ |
2427 |
+ __s; \ |
2428 |
+diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c |
2429 |
+index cbef64b508e1..82fd84d5e1aa 100644 |
2430 |
+--- a/arch/x86/xen/enlighten.c |
2431 |
++++ b/arch/x86/xen/enlighten.c |
2432 |
+@@ -460,6 +460,12 @@ static void __init xen_init_cpuid_mask(void) |
2433 |
+ cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32)); |
2434 |
+ } |
2435 |
+ |
2436 |
++static void __init xen_init_capabilities(void) |
2437 |
++{ |
2438 |
++ if (xen_pv_domain()) |
2439 |
++ setup_force_cpu_cap(X86_FEATURE_XENPV); |
2440 |
++} |
2441 |
++ |
2442 |
+ static void xen_set_debugreg(int reg, unsigned long val) |
2443 |
+ { |
2444 |
+ HYPERVISOR_set_debugreg(reg, val); |
2445 |
+@@ -1587,6 +1593,7 @@ asmlinkage __visible void __init xen_start_kernel(void) |
2446 |
+ |
2447 |
+ xen_init_irq_ops(); |
2448 |
+ xen_init_cpuid_mask(); |
2449 |
++ xen_init_capabilities(); |
2450 |
+ |
2451 |
+ #ifdef CONFIG_X86_LOCAL_APIC |
2452 |
+ /* |
2453 |
+@@ -1883,14 +1890,6 @@ bool xen_hvm_need_lapic(void) |
2454 |
+ } |
2455 |
+ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); |
2456 |
+ |
2457 |
+-static void xen_set_cpu_features(struct cpuinfo_x86 *c) |
2458 |
+-{ |
2459 |
+- if (xen_pv_domain()) { |
2460 |
+- clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); |
2461 |
+- set_cpu_cap(c, X86_FEATURE_XENPV); |
2462 |
+- } |
2463 |
+-} |
2464 |
+- |
2465 |
+ const struct hypervisor_x86 x86_hyper_xen = { |
2466 |
+ .name = "Xen", |
2467 |
+ .detect = xen_platform, |
2468 |
+@@ -1898,7 +1897,6 @@ const struct hypervisor_x86 x86_hyper_xen = { |
2469 |
+ .init_platform = xen_hvm_guest_init, |
2470 |
+ #endif |
2471 |
+ .x2apic_available = xen_x2apic_para_available, |
2472 |
+- .set_cpu_features = xen_set_cpu_features, |
2473 |
+ }; |
2474 |
+ EXPORT_SYMBOL(x86_hyper_xen); |
2475 |
+ |
2476 |
+diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c |
2477 |
+index 3f4ebf0261f2..29e50d1229bc 100644 |
2478 |
+--- a/arch/x86/xen/smp.c |
2479 |
++++ b/arch/x86/xen/smp.c |
2480 |
+@@ -28,6 +28,7 @@ |
2481 |
+ #include <xen/interface/vcpu.h> |
2482 |
+ #include <xen/interface/xenpmu.h> |
2483 |
+ |
2484 |
++#include <asm/spec-ctrl.h> |
2485 |
+ #include <asm/xen/interface.h> |
2486 |
+ #include <asm/xen/hypercall.h> |
2487 |
+ |
2488 |
+@@ -87,6 +88,8 @@ static void cpu_bringup(void) |
2489 |
+ cpu_data(cpu).x86_max_cores = 1; |
2490 |
+ set_cpu_sibling_map(cpu); |
2491 |
+ |
2492 |
++ speculative_store_bypass_ht_init(); |
2493 |
++ |
2494 |
+ xen_setup_cpu_clockevents(); |
2495 |
+ |
2496 |
+ notify_cpu_starting(cpu); |
2497 |
+@@ -357,6 +360,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) |
2498 |
+ } |
2499 |
+ set_cpu_sibling_map(0); |
2500 |
+ |
2501 |
++ speculative_store_bypass_ht_init(); |
2502 |
++ |
2503 |
+ xen_pmu_init(0); |
2504 |
+ |
2505 |
+ if (xen_smp_intr_init(0)) |
2506 |
+diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c |
2507 |
+index 7f664c416faf..4ecd0de08557 100644 |
2508 |
+--- a/arch/x86/xen/suspend.c |
2509 |
++++ b/arch/x86/xen/suspend.c |
2510 |
+@@ -1,11 +1,14 @@ |
2511 |
+ #include <linux/types.h> |
2512 |
+ #include <linux/tick.h> |
2513 |
++#include <linux/percpu-defs.h> |
2514 |
+ |
2515 |
+ #include <xen/xen.h> |
2516 |
+ #include <xen/interface/xen.h> |
2517 |
+ #include <xen/grant_table.h> |
2518 |
+ #include <xen/events.h> |
2519 |
+ |
2520 |
++#include <asm/cpufeatures.h> |
2521 |
++#include <asm/msr-index.h> |
2522 |
+ #include <asm/xen/hypercall.h> |
2523 |
+ #include <asm/xen/page.h> |
2524 |
+ #include <asm/fixmap.h> |
2525 |
+@@ -68,6 +71,8 @@ static void xen_pv_post_suspend(int suspend_cancelled) |
2526 |
+ xen_mm_unpin_all(); |
2527 |
+ } |
2528 |
+ |
2529 |
++static DEFINE_PER_CPU(u64, spec_ctrl); |
2530 |
++ |
2531 |
+ void xen_arch_pre_suspend(void) |
2532 |
+ { |
2533 |
+ if (xen_pv_domain()) |
2534 |
+@@ -84,6 +89,9 @@ void xen_arch_post_suspend(int cancelled) |
2535 |
+ |
2536 |
+ static void xen_vcpu_notify_restore(void *data) |
2537 |
+ { |
2538 |
++ if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) |
2539 |
++ wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl)); |
2540 |
++ |
2541 |
+ /* Boot processor notified via generic timekeeping_resume() */ |
2542 |
+ if (smp_processor_id() == 0) |
2543 |
+ return; |
2544 |
+@@ -93,7 +101,15 @@ static void xen_vcpu_notify_restore(void *data) |
2545 |
+ |
2546 |
+ static void xen_vcpu_notify_suspend(void *data) |
2547 |
+ { |
2548 |
++ u64 tmp; |
2549 |
++ |
2550 |
+ tick_suspend_local(); |
2551 |
++ |
2552 |
++ if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) { |
2553 |
++ rdmsrl(MSR_IA32_SPEC_CTRL, tmp); |
2554 |
++ this_cpu_write(spec_ctrl, tmp); |
2555 |
++ wrmsrl(MSR_IA32_SPEC_CTRL, 0); |
2556 |
++ } |
2557 |
+ } |
2558 |
+ |
2559 |
+ void xen_arch_resume(void) |
2560 |
+diff --git a/block/blk-core.c b/block/blk-core.c |
2561 |
+index f5f1a55703ae..50d77c90070d 100644 |
2562 |
+--- a/block/blk-core.c |
2563 |
++++ b/block/blk-core.c |
2564 |
+@@ -651,21 +651,17 @@ EXPORT_SYMBOL(blk_alloc_queue); |
2565 |
+ int blk_queue_enter(struct request_queue *q, gfp_t gfp) |
2566 |
+ { |
2567 |
+ while (true) { |
2568 |
+- int ret; |
2569 |
+- |
2570 |
+ if (percpu_ref_tryget_live(&q->q_usage_counter)) |
2571 |
+ return 0; |
2572 |
+ |
2573 |
+ if (!gfpflags_allow_blocking(gfp)) |
2574 |
+ return -EBUSY; |
2575 |
+ |
2576 |
+- ret = wait_event_interruptible(q->mq_freeze_wq, |
2577 |
+- !atomic_read(&q->mq_freeze_depth) || |
2578 |
+- blk_queue_dying(q)); |
2579 |
++ wait_event(q->mq_freeze_wq, |
2580 |
++ !atomic_read(&q->mq_freeze_depth) || |
2581 |
++ blk_queue_dying(q)); |
2582 |
+ if (blk_queue_dying(q)) |
2583 |
+ return -ENODEV; |
2584 |
+- if (ret) |
2585 |
+- return ret; |
2586 |
+ } |
2587 |
+ } |
2588 |
+ |
2589 |
+diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c |
2590 |
+index 3db71afbba93..143edea1076f 100644 |
2591 |
+--- a/drivers/base/cpu.c |
2592 |
++++ b/drivers/base/cpu.c |
2593 |
+@@ -518,14 +518,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev, |
2594 |
+ return sprintf(buf, "Not affected\n"); |
2595 |
+ } |
2596 |
+ |
2597 |
++ssize_t __weak cpu_show_spec_store_bypass(struct device *dev, |
2598 |
++ struct device_attribute *attr, char *buf) |
2599 |
++{ |
2600 |
++ return sprintf(buf, "Not affected\n"); |
2601 |
++} |
2602 |
++ |
2603 |
+ static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); |
2604 |
+ static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); |
2605 |
+ static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); |
2606 |
++static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); |
2607 |
+ |
2608 |
+ static struct attribute *cpu_root_vulnerabilities_attrs[] = { |
2609 |
+ &dev_attr_meltdown.attr, |
2610 |
+ &dev_attr_spectre_v1.attr, |
2611 |
+ &dev_attr_spectre_v2.attr, |
2612 |
++ &dev_attr_spec_store_bypass.attr, |
2613 |
+ NULL |
2614 |
+ }; |
2615 |
+ |
2616 |
+diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c |
2617 |
+index 8c41c6fcb9ee..acf83569f86f 100644 |
2618 |
+--- a/drivers/clk/tegra/clk-tegra30.c |
2619 |
++++ b/drivers/clk/tegra/clk-tegra30.c |
2620 |
+@@ -333,11 +333,11 @@ static struct pdiv_map pllu_p[] = { |
2621 |
+ }; |
2622 |
+ |
2623 |
+ static struct tegra_clk_pll_freq_table pll_u_freq_table[] = { |
2624 |
+- { 12000000, 480000000, 960, 12, 0, 12}, |
2625 |
+- { 13000000, 480000000, 960, 13, 0, 12}, |
2626 |
+- { 16800000, 480000000, 400, 7, 0, 5}, |
2627 |
+- { 19200000, 480000000, 200, 4, 0, 3}, |
2628 |
+- { 26000000, 480000000, 960, 26, 0, 12}, |
2629 |
++ { 12000000, 480000000, 960, 12, 2, 12 }, |
2630 |
++ { 13000000, 480000000, 960, 13, 2, 12 }, |
2631 |
++ { 16800000, 480000000, 400, 7, 2, 5 }, |
2632 |
++ { 19200000, 480000000, 200, 4, 2, 3 }, |
2633 |
++ { 26000000, 480000000, 960, 26, 2, 12 }, |
2634 |
+ { 0, 0, 0, 0, 0, 0 }, |
2635 |
+ }; |
2636 |
+ |
2637 |
+@@ -1372,6 +1372,7 @@ static struct tegra_clk_init_table init_table[] __initdata = { |
2638 |
+ {TEGRA30_CLK_GR2D, TEGRA30_CLK_PLL_C, 300000000, 0}, |
2639 |
+ {TEGRA30_CLK_GR3D, TEGRA30_CLK_PLL_C, 300000000, 0}, |
2640 |
+ {TEGRA30_CLK_GR3D2, TEGRA30_CLK_PLL_C, 300000000, 0}, |
2641 |
++ { TEGRA30_CLK_PLL_U, TEGRA30_CLK_CLK_MAX, 480000000, 0 }, |
2642 |
+ {TEGRA30_CLK_CLK_MAX, TEGRA30_CLK_CLK_MAX, 0, 0}, /* This MUST be the last entry. */ |
2643 |
+ }; |
2644 |
+ |
2645 |
+diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c |
2646 |
+index c1aaf0336cf2..5cde3ad1665e 100644 |
2647 |
+--- a/drivers/mtd/ubi/attach.c |
2648 |
++++ b/drivers/mtd/ubi/attach.c |
2649 |
+@@ -174,6 +174,40 @@ static int add_corrupted(struct ubi_attach_info *ai, int pnum, int ec) |
2650 |
+ return 0; |
2651 |
+ } |
2652 |
+ |
2653 |
++/** |
2654 |
++ * add_fastmap - add a Fastmap related physical eraseblock. |
2655 |
++ * @ai: attaching information |
2656 |
++ * @pnum: physical eraseblock number the VID header came from |
2657 |
++ * @vid_hdr: the volume identifier header |
2658 |
++ * @ec: erase counter of the physical eraseblock |
2659 |
++ * |
2660 |
++ * This function allocates a 'struct ubi_ainf_peb' object for a Fastamp |
2661 |
++ * physical eraseblock @pnum and adds it to the 'fastmap' list. |
2662 |
++ * Such blocks can be Fastmap super and data blocks from both the most |
2663 |
++ * recent Fastmap we're attaching from or from old Fastmaps which will |
2664 |
++ * be erased. |
2665 |
++ */ |
2666 |
++static int add_fastmap(struct ubi_attach_info *ai, int pnum, |
2667 |
++ struct ubi_vid_hdr *vid_hdr, int ec) |
2668 |
++{ |
2669 |
++ struct ubi_ainf_peb *aeb; |
2670 |
++ |
2671 |
++ aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL); |
2672 |
++ if (!aeb) |
2673 |
++ return -ENOMEM; |
2674 |
++ |
2675 |
++ aeb->pnum = pnum; |
2676 |
++ aeb->vol_id = be32_to_cpu(vidh->vol_id); |
2677 |
++ aeb->sqnum = be64_to_cpu(vidh->sqnum); |
2678 |
++ aeb->ec = ec; |
2679 |
++ list_add(&aeb->u.list, &ai->fastmap); |
2680 |
++ |
2681 |
++ dbg_bld("add to fastmap list: PEB %d, vol_id %d, sqnum: %llu", pnum, |
2682 |
++ aeb->vol_id, aeb->sqnum); |
2683 |
++ |
2684 |
++ return 0; |
2685 |
++} |
2686 |
++ |
2687 |
+ /** |
2688 |
+ * validate_vid_hdr - check volume identifier header. |
2689 |
+ * @ubi: UBI device description object |
2690 |
+@@ -803,13 +837,26 @@ out_unlock: |
2691 |
+ return err; |
2692 |
+ } |
2693 |
+ |
2694 |
++static bool vol_ignored(int vol_id) |
2695 |
++{ |
2696 |
++ switch (vol_id) { |
2697 |
++ case UBI_LAYOUT_VOLUME_ID: |
2698 |
++ return true; |
2699 |
++ } |
2700 |
++ |
2701 |
++#ifdef CONFIG_MTD_UBI_FASTMAP |
2702 |
++ return ubi_is_fm_vol(vol_id); |
2703 |
++#else |
2704 |
++ return false; |
2705 |
++#endif |
2706 |
++} |
2707 |
++ |
2708 |
+ /** |
2709 |
+ * scan_peb - scan and process UBI headers of a PEB. |
2710 |
+ * @ubi: UBI device description object |
2711 |
+ * @ai: attaching information |
2712 |
+ * @pnum: the physical eraseblock number |
2713 |
+- * @vid: The volume ID of the found volume will be stored in this pointer |
2714 |
+- * @sqnum: The sqnum of the found volume will be stored in this pointer |
2715 |
++ * @fast: true if we're scanning for a Fastmap |
2716 |
+ * |
2717 |
+ * This function reads UBI headers of PEB @pnum, checks them, and adds |
2718 |
+ * information about this PEB to the corresponding list or RB-tree in the |
2719 |
+@@ -817,9 +864,9 @@ out_unlock: |
2720 |
+ * successfully handled and a negative error code in case of failure. |
2721 |
+ */ |
2722 |
+ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, |
2723 |
+- int pnum, int *vid, unsigned long long *sqnum) |
2724 |
++ int pnum, bool fast) |
2725 |
+ { |
2726 |
+- long long uninitialized_var(ec); |
2727 |
++ long long ec; |
2728 |
+ int err, bitflips = 0, vol_id = -1, ec_err = 0; |
2729 |
+ |
2730 |
+ dbg_bld("scan PEB %d", pnum); |
2731 |
+@@ -935,6 +982,20 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, |
2732 |
+ */ |
2733 |
+ ai->maybe_bad_peb_count += 1; |
2734 |
+ case UBI_IO_BAD_HDR: |
2735 |
++ /* |
2736 |
++ * If we're facing a bad VID header we have to drop *all* |
2737 |
++ * Fastmap data structures we find. The most recent Fastmap |
2738 |
++ * could be bad and therefore there is a chance that we attach |
2739 |
++ * from an old one. On a fine MTD stack a PEB must not render |
2740 |
++ * bad all of a sudden, but the reality is different. |
2741 |
++ * So, let's be paranoid and help finding the root cause by |
2742 |
++ * falling back to scanning mode instead of attaching with a |
2743 |
++ * bad EBA table and cause data corruption which is hard to |
2744 |
++ * analyze. |
2745 |
++ */ |
2746 |
++ if (fast) |
2747 |
++ ai->force_full_scan = 1; |
2748 |
++ |
2749 |
+ if (ec_err) |
2750 |
+ /* |
2751 |
+ * Both headers are corrupted. There is a possibility |
2752 |
+@@ -991,21 +1052,15 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, |
2753 |
+ } |
2754 |
+ |
2755 |
+ vol_id = be32_to_cpu(vidh->vol_id); |
2756 |
+- if (vid) |
2757 |
+- *vid = vol_id; |
2758 |
+- if (sqnum) |
2759 |
+- *sqnum = be64_to_cpu(vidh->sqnum); |
2760 |
+- if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) { |
2761 |
++ if (vol_id > UBI_MAX_VOLUMES && !vol_ignored(vol_id)) { |
2762 |
+ int lnum = be32_to_cpu(vidh->lnum); |
2763 |
+ |
2764 |
+ /* Unsupported internal volume */ |
2765 |
+ switch (vidh->compat) { |
2766 |
+ case UBI_COMPAT_DELETE: |
2767 |
+- if (vol_id != UBI_FM_SB_VOLUME_ID |
2768 |
+- && vol_id != UBI_FM_DATA_VOLUME_ID) { |
2769 |
+- ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it", |
2770 |
+- vol_id, lnum); |
2771 |
+- } |
2772 |
++ ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it", |
2773 |
++ vol_id, lnum); |
2774 |
++ |
2775 |
+ err = add_to_list(ai, pnum, vol_id, lnum, |
2776 |
+ ec, 1, &ai->erase); |
2777 |
+ if (err) |
2778 |
+@@ -1037,7 +1092,12 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, |
2779 |
+ if (ec_err) |
2780 |
+ ubi_warn(ubi, "valid VID header but corrupted EC header at PEB %d", |
2781 |
+ pnum); |
2782 |
+- err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips); |
2783 |
++ |
2784 |
++ if (ubi_is_fm_vol(vol_id)) |
2785 |
++ err = add_fastmap(ai, pnum, vidh, ec); |
2786 |
++ else |
2787 |
++ err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips); |
2788 |
++ |
2789 |
+ if (err) |
2790 |
+ return err; |
2791 |
+ |
2792 |
+@@ -1186,6 +1246,10 @@ static void destroy_ai(struct ubi_attach_info *ai) |
2793 |
+ list_del(&aeb->u.list); |
2794 |
+ kmem_cache_free(ai->aeb_slab_cache, aeb); |
2795 |
+ } |
2796 |
++ list_for_each_entry_safe(aeb, aeb_tmp, &ai->fastmap, u.list) { |
2797 |
++ list_del(&aeb->u.list); |
2798 |
++ kmem_cache_free(ai->aeb_slab_cache, aeb); |
2799 |
++ } |
2800 |
+ |
2801 |
+ /* Destroy the volume RB-tree */ |
2802 |
+ rb = ai->volumes.rb_node; |
2803 |
+@@ -1245,7 +1309,7 @@ static int scan_all(struct ubi_device *ubi, struct ubi_attach_info *ai, |
2804 |
+ cond_resched(); |
2805 |
+ |
2806 |
+ dbg_gen("process PEB %d", pnum); |
2807 |
+- err = scan_peb(ubi, ai, pnum, NULL, NULL); |
2808 |
++ err = scan_peb(ubi, ai, pnum, false); |
2809 |
+ if (err < 0) |
2810 |
+ goto out_vidh; |
2811 |
+ } |
2812 |
+@@ -1311,6 +1375,7 @@ static struct ubi_attach_info *alloc_ai(void) |
2813 |
+ INIT_LIST_HEAD(&ai->free); |
2814 |
+ INIT_LIST_HEAD(&ai->erase); |
2815 |
+ INIT_LIST_HEAD(&ai->alien); |
2816 |
++ INIT_LIST_HEAD(&ai->fastmap); |
2817 |
+ ai->volumes = RB_ROOT; |
2818 |
+ ai->aeb_slab_cache = kmem_cache_create("ubi_aeb_slab_cache", |
2819 |
+ sizeof(struct ubi_ainf_peb), |
2820 |
+@@ -1337,52 +1402,58 @@ static struct ubi_attach_info *alloc_ai(void) |
2821 |
+ */ |
2822 |
+ static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai) |
2823 |
+ { |
2824 |
+- int err, pnum, fm_anchor = -1; |
2825 |
+- unsigned long long max_sqnum = 0; |
2826 |
++ int err, pnum; |
2827 |
++ struct ubi_attach_info *scan_ai; |
2828 |
+ |
2829 |
+ err = -ENOMEM; |
2830 |
+ |
2831 |
++ scan_ai = alloc_ai(); |
2832 |
++ if (!scan_ai) |
2833 |
++ goto out; |
2834 |
++ |
2835 |
+ ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); |
2836 |
+ if (!ech) |
2837 |
+- goto out; |
2838 |
++ goto out_ai; |
2839 |
+ |
2840 |
+ vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); |
2841 |
+ if (!vidh) |
2842 |
+ goto out_ech; |
2843 |
+ |
2844 |
+ for (pnum = 0; pnum < UBI_FM_MAX_START; pnum++) { |
2845 |
+- int vol_id = -1; |
2846 |
+- unsigned long long sqnum = -1; |
2847 |
+ cond_resched(); |
2848 |
+ |
2849 |
+ dbg_gen("process PEB %d", pnum); |
2850 |
+- err = scan_peb(ubi, *ai, pnum, &vol_id, &sqnum); |
2851 |
++ err = scan_peb(ubi, scan_ai, pnum, true); |
2852 |
+ if (err < 0) |
2853 |
+ goto out_vidh; |
2854 |
+- |
2855 |
+- if (vol_id == UBI_FM_SB_VOLUME_ID && sqnum > max_sqnum) { |
2856 |
+- max_sqnum = sqnum; |
2857 |
+- fm_anchor = pnum; |
2858 |
+- } |
2859 |
+ } |
2860 |
+ |
2861 |
+ ubi_free_vid_hdr(ubi, vidh); |
2862 |
+ kfree(ech); |
2863 |
+ |
2864 |
+- if (fm_anchor < 0) |
2865 |
+- return UBI_NO_FASTMAP; |
2866 |
++ if (scan_ai->force_full_scan) |
2867 |
++ err = UBI_NO_FASTMAP; |
2868 |
++ else |
2869 |
++ err = ubi_scan_fastmap(ubi, *ai, scan_ai); |
2870 |
+ |
2871 |
+- destroy_ai(*ai); |
2872 |
+- *ai = alloc_ai(); |
2873 |
+- if (!*ai) |
2874 |
+- return -ENOMEM; |
2875 |
++ if (err) { |
2876 |
++ /* |
2877 |
++ * Didn't attach via fastmap, do a full scan but reuse what |
2878 |
++ * we've aready scanned. |
2879 |
++ */ |
2880 |
++ destroy_ai(*ai); |
2881 |
++ *ai = scan_ai; |
2882 |
++ } else |
2883 |
++ destroy_ai(scan_ai); |
2884 |
+ |
2885 |
+- return ubi_scan_fastmap(ubi, *ai, fm_anchor); |
2886 |
++ return err; |
2887 |
+ |
2888 |
+ out_vidh: |
2889 |
+ ubi_free_vid_hdr(ubi, vidh); |
2890 |
+ out_ech: |
2891 |
+ kfree(ech); |
2892 |
++out_ai: |
2893 |
++ destroy_ai(scan_ai); |
2894 |
+ out: |
2895 |
+ return err; |
2896 |
+ } |
2897 |
+diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c |
2898 |
+index c4a25c858c07..03cf0553ec1b 100644 |
2899 |
+--- a/drivers/mtd/ubi/eba.c |
2900 |
++++ b/drivers/mtd/ubi/eba.c |
2901 |
+@@ -1178,6 +1178,8 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, |
2902 |
+ struct ubi_volume *vol; |
2903 |
+ uint32_t crc; |
2904 |
+ |
2905 |
++ ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem)); |
2906 |
++ |
2907 |
+ vol_id = be32_to_cpu(vid_hdr->vol_id); |
2908 |
+ lnum = be32_to_cpu(vid_hdr->lnum); |
2909 |
+ |
2910 |
+@@ -1346,9 +1348,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, |
2911 |
+ } |
2912 |
+ |
2913 |
+ ubi_assert(vol->eba_tbl[lnum] == from); |
2914 |
+- down_read(&ubi->fm_eba_sem); |
2915 |
+ vol->eba_tbl[lnum] = to; |
2916 |
+- up_read(&ubi->fm_eba_sem); |
2917 |
+ |
2918 |
+ out_unlock_buf: |
2919 |
+ mutex_unlock(&ubi->buf_mutex); |
2920 |
+diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c |
2921 |
+index ed62f1efe6eb..69dd21679a30 100644 |
2922 |
+--- a/drivers/mtd/ubi/fastmap-wl.c |
2923 |
++++ b/drivers/mtd/ubi/fastmap-wl.c |
2924 |
+@@ -262,6 +262,8 @@ static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi) |
2925 |
+ struct ubi_fm_pool *pool = &ubi->fm_wl_pool; |
2926 |
+ int pnum; |
2927 |
+ |
2928 |
++ ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem)); |
2929 |
++ |
2930 |
+ if (pool->used == pool->size) { |
2931 |
+ /* We cannot update the fastmap here because this |
2932 |
+ * function is called in atomic context. |
2933 |
+@@ -303,7 +305,7 @@ int ubi_ensure_anchor_pebs(struct ubi_device *ubi) |
2934 |
+ |
2935 |
+ wrk->anchor = 1; |
2936 |
+ wrk->func = &wear_leveling_worker; |
2937 |
+- schedule_ubi_work(ubi, wrk); |
2938 |
++ __schedule_ubi_work(ubi, wrk); |
2939 |
+ return 0; |
2940 |
+ } |
2941 |
+ |
2942 |
+@@ -344,7 +346,7 @@ int ubi_wl_put_fm_peb(struct ubi_device *ubi, struct ubi_wl_entry *fm_e, |
2943 |
+ spin_unlock(&ubi->wl_lock); |
2944 |
+ |
2945 |
+ vol_id = lnum ? UBI_FM_DATA_VOLUME_ID : UBI_FM_SB_VOLUME_ID; |
2946 |
+- return schedule_erase(ubi, e, vol_id, lnum, torture); |
2947 |
++ return schedule_erase(ubi, e, vol_id, lnum, torture, true); |
2948 |
+ } |
2949 |
+ |
2950 |
+ /** |
2951 |
+diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c |
2952 |
+index bba7dd1b5ebf..72e89b352034 100644 |
2953 |
+--- a/drivers/mtd/ubi/fastmap.c |
2954 |
++++ b/drivers/mtd/ubi/fastmap.c |
2955 |
+@@ -326,6 +326,7 @@ static int update_vol(struct ubi_device *ubi, struct ubi_attach_info *ai, |
2956 |
+ aeb->pnum = new_aeb->pnum; |
2957 |
+ aeb->copy_flag = new_vh->copy_flag; |
2958 |
+ aeb->scrub = new_aeb->scrub; |
2959 |
++ aeb->sqnum = new_aeb->sqnum; |
2960 |
+ kmem_cache_free(ai->aeb_slab_cache, new_aeb); |
2961 |
+ |
2962 |
+ /* new_aeb is older */ |
2963 |
+@@ -850,28 +851,58 @@ fail: |
2964 |
+ return ret; |
2965 |
+ } |
2966 |
+ |
2967 |
++/** |
2968 |
++ * find_fm_anchor - find the most recent Fastmap superblock (anchor) |
2969 |
++ * @ai: UBI attach info to be filled |
2970 |
++ */ |
2971 |
++static int find_fm_anchor(struct ubi_attach_info *ai) |
2972 |
++{ |
2973 |
++ int ret = -1; |
2974 |
++ struct ubi_ainf_peb *aeb; |
2975 |
++ unsigned long long max_sqnum = 0; |
2976 |
++ |
2977 |
++ list_for_each_entry(aeb, &ai->fastmap, u.list) { |
2978 |
++ if (aeb->vol_id == UBI_FM_SB_VOLUME_ID && aeb->sqnum > max_sqnum) { |
2979 |
++ max_sqnum = aeb->sqnum; |
2980 |
++ ret = aeb->pnum; |
2981 |
++ } |
2982 |
++ } |
2983 |
++ |
2984 |
++ return ret; |
2985 |
++} |
2986 |
++ |
2987 |
+ /** |
2988 |
+ * ubi_scan_fastmap - scan the fastmap. |
2989 |
+ * @ubi: UBI device object |
2990 |
+ * @ai: UBI attach info to be filled |
2991 |
+- * @fm_anchor: The fastmap starts at this PEB |
2992 |
++ * @scan_ai: UBI attach info from the first 64 PEBs, |
2993 |
++ * used to find the most recent Fastmap data structure |
2994 |
+ * |
2995 |
+ * Returns 0 on success, UBI_NO_FASTMAP if no fastmap was found, |
2996 |
+ * UBI_BAD_FASTMAP if one was found but is not usable. |
2997 |
+ * < 0 indicates an internal error. |
2998 |
+ */ |
2999 |
+ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, |
3000 |
+- int fm_anchor) |
3001 |
++ struct ubi_attach_info *scan_ai) |
3002 |
+ { |
3003 |
+ struct ubi_fm_sb *fmsb, *fmsb2; |
3004 |
+ struct ubi_vid_hdr *vh; |
3005 |
+ struct ubi_ec_hdr *ech; |
3006 |
+ struct ubi_fastmap_layout *fm; |
3007 |
+- int i, used_blocks, pnum, ret = 0; |
3008 |
++ struct ubi_ainf_peb *tmp_aeb, *aeb; |
3009 |
++ int i, used_blocks, pnum, fm_anchor, ret = 0; |
3010 |
+ size_t fm_size; |
3011 |
+ __be32 crc, tmp_crc; |
3012 |
+ unsigned long long sqnum = 0; |
3013 |
+ |
3014 |
++ fm_anchor = find_fm_anchor(scan_ai); |
3015 |
++ if (fm_anchor < 0) |
3016 |
++ return UBI_NO_FASTMAP; |
3017 |
++ |
3018 |
++ /* Move all (possible) fastmap blocks into our new attach structure. */ |
3019 |
++ list_for_each_entry_safe(aeb, tmp_aeb, &scan_ai->fastmap, u.list) |
3020 |
++ list_move_tail(&aeb->u.list, &ai->fastmap); |
3021 |
++ |
3022 |
+ down_write(&ubi->fm_protect); |
3023 |
+ memset(ubi->fm_buf, 0, ubi->fm_size); |
3024 |
+ |
3025 |
+@@ -1484,22 +1515,30 @@ int ubi_update_fastmap(struct ubi_device *ubi) |
3026 |
+ struct ubi_wl_entry *tmp_e; |
3027 |
+ |
3028 |
+ down_write(&ubi->fm_protect); |
3029 |
++ down_write(&ubi->work_sem); |
3030 |
++ down_write(&ubi->fm_eba_sem); |
3031 |
+ |
3032 |
+ ubi_refill_pools(ubi); |
3033 |
+ |
3034 |
+ if (ubi->ro_mode || ubi->fm_disabled) { |
3035 |
++ up_write(&ubi->fm_eba_sem); |
3036 |
++ up_write(&ubi->work_sem); |
3037 |
+ up_write(&ubi->fm_protect); |
3038 |
+ return 0; |
3039 |
+ } |
3040 |
+ |
3041 |
+ ret = ubi_ensure_anchor_pebs(ubi); |
3042 |
+ if (ret) { |
3043 |
++ up_write(&ubi->fm_eba_sem); |
3044 |
++ up_write(&ubi->work_sem); |
3045 |
+ up_write(&ubi->fm_protect); |
3046 |
+ return ret; |
3047 |
+ } |
3048 |
+ |
3049 |
+ new_fm = kzalloc(sizeof(*new_fm), GFP_KERNEL); |
3050 |
+ if (!new_fm) { |
3051 |
++ up_write(&ubi->fm_eba_sem); |
3052 |
++ up_write(&ubi->work_sem); |
3053 |
+ up_write(&ubi->fm_protect); |
3054 |
+ return -ENOMEM; |
3055 |
+ } |
3056 |
+@@ -1608,16 +1647,14 @@ int ubi_update_fastmap(struct ubi_device *ubi) |
3057 |
+ new_fm->e[0] = tmp_e; |
3058 |
+ } |
3059 |
+ |
3060 |
+- down_write(&ubi->work_sem); |
3061 |
+- down_write(&ubi->fm_eba_sem); |
3062 |
+ ret = ubi_write_fastmap(ubi, new_fm); |
3063 |
+- up_write(&ubi->fm_eba_sem); |
3064 |
+- up_write(&ubi->work_sem); |
3065 |
+ |
3066 |
+ if (ret) |
3067 |
+ goto err; |
3068 |
+ |
3069 |
+ out_unlock: |
3070 |
++ up_write(&ubi->fm_eba_sem); |
3071 |
++ up_write(&ubi->work_sem); |
3072 |
+ up_write(&ubi->fm_protect); |
3073 |
+ kfree(old_fm); |
3074 |
+ return ret; |
3075 |
+diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h |
3076 |
+index de1ea2e4c37d..05d9ec66437c 100644 |
3077 |
+--- a/drivers/mtd/ubi/ubi.h |
3078 |
++++ b/drivers/mtd/ubi/ubi.h |
3079 |
+@@ -699,6 +699,8 @@ struct ubi_ainf_volume { |
3080 |
+ * @erase: list of physical eraseblocks which have to be erased |
3081 |
+ * @alien: list of physical eraseblocks which should not be used by UBI (e.g., |
3082 |
+ * those belonging to "preserve"-compatible internal volumes) |
3083 |
++ * @fastmap: list of physical eraseblocks which relate to fastmap (e.g., |
3084 |
++ * eraseblocks of the current and not yet erased old fastmap blocks) |
3085 |
+ * @corr_peb_count: count of PEBs in the @corr list |
3086 |
+ * @empty_peb_count: count of PEBs which are presumably empty (contain only |
3087 |
+ * 0xFF bytes) |
3088 |
+@@ -709,6 +711,8 @@ struct ubi_ainf_volume { |
3089 |
+ * @vols_found: number of volumes found |
3090 |
+ * @highest_vol_id: highest volume ID |
3091 |
+ * @is_empty: flag indicating whether the MTD device is empty or not |
3092 |
++ * @force_full_scan: flag indicating whether we need to do a full scan and drop |
3093 |
++ all existing Fastmap data structures |
3094 |
+ * @min_ec: lowest erase counter value |
3095 |
+ * @max_ec: highest erase counter value |
3096 |
+ * @max_sqnum: highest sequence number value |
3097 |
+@@ -727,6 +731,7 @@ struct ubi_attach_info { |
3098 |
+ struct list_head free; |
3099 |
+ struct list_head erase; |
3100 |
+ struct list_head alien; |
3101 |
++ struct list_head fastmap; |
3102 |
+ int corr_peb_count; |
3103 |
+ int empty_peb_count; |
3104 |
+ int alien_peb_count; |
3105 |
+@@ -735,6 +740,7 @@ struct ubi_attach_info { |
3106 |
+ int vols_found; |
3107 |
+ int highest_vol_id; |
3108 |
+ int is_empty; |
3109 |
++ int force_full_scan; |
3110 |
+ int min_ec; |
3111 |
+ int max_ec; |
3112 |
+ unsigned long long max_sqnum; |
3113 |
+@@ -907,7 +913,7 @@ int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, |
3114 |
+ size_t ubi_calc_fm_size(struct ubi_device *ubi); |
3115 |
+ int ubi_update_fastmap(struct ubi_device *ubi); |
3116 |
+ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, |
3117 |
+- int fm_anchor); |
3118 |
++ struct ubi_attach_info *scan_ai); |
3119 |
+ #else |
3120 |
+ static inline int ubi_update_fastmap(struct ubi_device *ubi) { return 0; } |
3121 |
+ #endif |
3122 |
+@@ -1101,4 +1107,42 @@ static inline int idx2vol_id(const struct ubi_device *ubi, int idx) |
3123 |
+ return idx; |
3124 |
+ } |
3125 |
+ |
3126 |
++/** |
3127 |
++ * ubi_is_fm_vol - check whether a volume ID is a Fastmap volume. |
3128 |
++ * @vol_id: volume ID |
3129 |
++ */ |
3130 |
++static inline bool ubi_is_fm_vol(int vol_id) |
3131 |
++{ |
3132 |
++ switch (vol_id) { |
3133 |
++ case UBI_FM_SB_VOLUME_ID: |
3134 |
++ case UBI_FM_DATA_VOLUME_ID: |
3135 |
++ return true; |
3136 |
++ } |
3137 |
++ |
3138 |
++ return false; |
3139 |
++} |
3140 |
++ |
3141 |
++/** |
3142 |
++ * ubi_find_fm_block - check whether a PEB is part of the current Fastmap. |
3143 |
++ * @ubi: UBI device description object |
3144 |
++ * @pnum: physical eraseblock to look for |
3145 |
++ * |
3146 |
++ * This function returns a wear leveling object if @pnum relates to the current |
3147 |
++ * fastmap, @NULL otherwise. |
3148 |
++ */ |
3149 |
++static inline struct ubi_wl_entry *ubi_find_fm_block(const struct ubi_device *ubi, |
3150 |
++ int pnum) |
3151 |
++{ |
3152 |
++ int i; |
3153 |
++ |
3154 |
++ if (ubi->fm) { |
3155 |
++ for (i = 0; i < ubi->fm->used_blocks; i++) { |
3156 |
++ if (ubi->fm->e[i]->pnum == pnum) |
3157 |
++ return ubi->fm->e[i]; |
3158 |
++ } |
3159 |
++ } |
3160 |
++ |
3161 |
++ return NULL; |
3162 |
++} |
3163 |
++ |
3164 |
+ #endif /* !__UBI_UBI_H__ */ |
3165 |
+diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c |
3166 |
+index ca9746f41ff1..b3c1b8106a68 100644 |
3167 |
+--- a/drivers/mtd/ubi/wl.c |
3168 |
++++ b/drivers/mtd/ubi/wl.c |
3169 |
+@@ -580,7 +580,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, |
3170 |
+ * failure. |
3171 |
+ */ |
3172 |
+ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, |
3173 |
+- int vol_id, int lnum, int torture) |
3174 |
++ int vol_id, int lnum, int torture, bool nested) |
3175 |
+ { |
3176 |
+ struct ubi_work *wl_wrk; |
3177 |
+ |
3178 |
+@@ -599,7 +599,10 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, |
3179 |
+ wl_wrk->lnum = lnum; |
3180 |
+ wl_wrk->torture = torture; |
3181 |
+ |
3182 |
+- schedule_ubi_work(ubi, wl_wrk); |
3183 |
++ if (nested) |
3184 |
++ __schedule_ubi_work(ubi, wl_wrk); |
3185 |
++ else |
3186 |
++ schedule_ubi_work(ubi, wl_wrk); |
3187 |
+ return 0; |
3188 |
+ } |
3189 |
+ |
3190 |
+@@ -658,6 +661,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, |
3191 |
+ if (!vid_hdr) |
3192 |
+ return -ENOMEM; |
3193 |
+ |
3194 |
++ down_read(&ubi->fm_eba_sem); |
3195 |
+ mutex_lock(&ubi->move_mutex); |
3196 |
+ spin_lock(&ubi->wl_lock); |
3197 |
+ ubi_assert(!ubi->move_from && !ubi->move_to); |
3198 |
+@@ -884,6 +888,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, |
3199 |
+ |
3200 |
+ dbg_wl("done"); |
3201 |
+ mutex_unlock(&ubi->move_mutex); |
3202 |
++ up_read(&ubi->fm_eba_sem); |
3203 |
+ return 0; |
3204 |
+ |
3205 |
+ /* |
3206 |
+@@ -925,6 +930,7 @@ out_not_moved: |
3207 |
+ } |
3208 |
+ |
3209 |
+ mutex_unlock(&ubi->move_mutex); |
3210 |
++ up_read(&ubi->fm_eba_sem); |
3211 |
+ return 0; |
3212 |
+ |
3213 |
+ out_error: |
3214 |
+@@ -946,6 +952,7 @@ out_error: |
3215 |
+ out_ro: |
3216 |
+ ubi_ro_mode(ubi); |
3217 |
+ mutex_unlock(&ubi->move_mutex); |
3218 |
++ up_read(&ubi->fm_eba_sem); |
3219 |
+ ubi_assert(err != 0); |
3220 |
+ return err < 0 ? err : -EIO; |
3221 |
+ |
3222 |
+@@ -953,6 +960,7 @@ out_cancel: |
3223 |
+ ubi->wl_scheduled = 0; |
3224 |
+ spin_unlock(&ubi->wl_lock); |
3225 |
+ mutex_unlock(&ubi->move_mutex); |
3226 |
++ up_read(&ubi->fm_eba_sem); |
3227 |
+ ubi_free_vid_hdr(ubi, vid_hdr); |
3228 |
+ return 0; |
3229 |
+ } |
3230 |
+@@ -1075,7 +1083,7 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk) |
3231 |
+ int err1; |
3232 |
+ |
3233 |
+ /* Re-schedule the LEB for erasure */ |
3234 |
+- err1 = schedule_erase(ubi, e, vol_id, lnum, 0); |
3235 |
++ err1 = schedule_erase(ubi, e, vol_id, lnum, 0, false); |
3236 |
+ if (err1) { |
3237 |
+ wl_entry_destroy(ubi, e); |
3238 |
+ err = err1; |
3239 |
+@@ -1256,7 +1264,7 @@ retry: |
3240 |
+ } |
3241 |
+ spin_unlock(&ubi->wl_lock); |
3242 |
+ |
3243 |
+- err = schedule_erase(ubi, e, vol_id, lnum, torture); |
3244 |
++ err = schedule_erase(ubi, e, vol_id, lnum, torture, false); |
3245 |
+ if (err) { |
3246 |
+ spin_lock(&ubi->wl_lock); |
3247 |
+ wl_tree_add(e, &ubi->used); |
3248 |
+@@ -1500,6 +1508,46 @@ static void shutdown_work(struct ubi_device *ubi) |
3249 |
+ } |
3250 |
+ } |
3251 |
+ |
3252 |
++/** |
3253 |
++ * erase_aeb - erase a PEB given in UBI attach info PEB |
3254 |
++ * @ubi: UBI device description object |
3255 |
++ * @aeb: UBI attach info PEB |
3256 |
++ * @sync: If true, erase synchronously. Otherwise schedule for erasure |
3257 |
++ */ |
3258 |
++static int erase_aeb(struct ubi_device *ubi, struct ubi_ainf_peb *aeb, bool sync) |
3259 |
++{ |
3260 |
++ struct ubi_wl_entry *e; |
3261 |
++ int err; |
3262 |
++ |
3263 |
++ e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); |
3264 |
++ if (!e) |
3265 |
++ return -ENOMEM; |
3266 |
++ |
3267 |
++ e->pnum = aeb->pnum; |
3268 |
++ e->ec = aeb->ec; |
3269 |
++ ubi->lookuptbl[e->pnum] = e; |
3270 |
++ |
3271 |
++ if (sync) { |
3272 |
++ err = sync_erase(ubi, e, false); |
3273 |
++ if (err) |
3274 |
++ goto out_free; |
3275 |
++ |
3276 |
++ wl_tree_add(e, &ubi->free); |
3277 |
++ ubi->free_count++; |
3278 |
++ } else { |
3279 |
++ err = schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false); |
3280 |
++ if (err) |
3281 |
++ goto out_free; |
3282 |
++ } |
3283 |
++ |
3284 |
++ return 0; |
3285 |
++ |
3286 |
++out_free: |
3287 |
++ wl_entry_destroy(ubi, e); |
3288 |
++ |
3289 |
++ return err; |
3290 |
++} |
3291 |
++ |
3292 |
+ /** |
3293 |
+ * ubi_wl_init - initialize the WL sub-system using attaching information. |
3294 |
+ * @ubi: UBI device description object |
3295 |
+@@ -1537,17 +1585,9 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) |
3296 |
+ list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) { |
3297 |
+ cond_resched(); |
3298 |
+ |
3299 |
+- e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); |
3300 |
+- if (!e) |
3301 |
+- goto out_free; |
3302 |
+- |
3303 |
+- e->pnum = aeb->pnum; |
3304 |
+- e->ec = aeb->ec; |
3305 |
+- ubi->lookuptbl[e->pnum] = e; |
3306 |
+- if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) { |
3307 |
+- wl_entry_destroy(ubi, e); |
3308 |
++ err = erase_aeb(ubi, aeb, false); |
3309 |
++ if (err) |
3310 |
+ goto out_free; |
3311 |
+- } |
3312 |
+ |
3313 |
+ found_pebs++; |
3314 |
+ } |
3315 |
+@@ -1598,19 +1638,49 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) |
3316 |
+ } |
3317 |
+ } |
3318 |
+ |
3319 |
+- dbg_wl("found %i PEBs", found_pebs); |
3320 |
++ list_for_each_entry(aeb, &ai->fastmap, u.list) { |
3321 |
++ cond_resched(); |
3322 |
+ |
3323 |
+- if (ubi->fm) { |
3324 |
+- ubi_assert(ubi->good_peb_count == |
3325 |
+- found_pebs + ubi->fm->used_blocks); |
3326 |
++ e = ubi_find_fm_block(ubi, aeb->pnum); |
3327 |
+ |
3328 |
+- for (i = 0; i < ubi->fm->used_blocks; i++) { |
3329 |
+- e = ubi->fm->e[i]; |
3330 |
++ if (e) { |
3331 |
++ ubi_assert(!ubi->lookuptbl[e->pnum]); |
3332 |
+ ubi->lookuptbl[e->pnum] = e; |
3333 |
++ } else { |
3334 |
++ bool sync = false; |
3335 |
++ |
3336 |
++ /* |
3337 |
++ * Usually old Fastmap PEBs are scheduled for erasure |
3338 |
++ * and we don't have to care about them but if we face |
3339 |
++ * an power cut before scheduling them we need to |
3340 |
++ * take care of them here. |
3341 |
++ */ |
3342 |
++ if (ubi->lookuptbl[aeb->pnum]) |
3343 |
++ continue; |
3344 |
++ |
3345 |
++ /* |
3346 |
++ * The fastmap update code might not find a free PEB for |
3347 |
++ * writing the fastmap anchor to and then reuses the |
3348 |
++ * current fastmap anchor PEB. When this PEB gets erased |
3349 |
++ * and a power cut happens before it is written again we |
3350 |
++ * must make sure that the fastmap attach code doesn't |
3351 |
++ * find any outdated fastmap anchors, hence we erase the |
3352 |
++ * outdated fastmap anchor PEBs synchronously here. |
3353 |
++ */ |
3354 |
++ if (aeb->vol_id == UBI_FM_SB_VOLUME_ID) |
3355 |
++ sync = true; |
3356 |
++ |
3357 |
++ err = erase_aeb(ubi, aeb, sync); |
3358 |
++ if (err) |
3359 |
++ goto out_free; |
3360 |
+ } |
3361 |
++ |
3362 |
++ found_pebs++; |
3363 |
+ } |
3364 |
+- else |
3365 |
+- ubi_assert(ubi->good_peb_count == found_pebs); |
3366 |
++ |
3367 |
++ dbg_wl("found %i PEBs", found_pebs); |
3368 |
++ |
3369 |
++ ubi_assert(ubi->good_peb_count == found_pebs); |
3370 |
+ |
3371 |
+ reserved_pebs = WL_RESERVED_PEBS; |
3372 |
+ ubi_fastmap_init(ubi, &reserved_pebs); |
3373 |
+diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c |
3374 |
+index 1325825d5225..ce3a56bea6e6 100644 |
3375 |
+--- a/drivers/net/ethernet/broadcom/tg3.c |
3376 |
++++ b/drivers/net/ethernet/broadcom/tg3.c |
3377 |
+@@ -9278,6 +9278,15 @@ static int tg3_chip_reset(struct tg3 *tp) |
3378 |
+ |
3379 |
+ tg3_restore_clk(tp); |
3380 |
+ |
3381 |
++ /* Increase the core clock speed to fix tx timeout issue for 5762 |
3382 |
++ * with 100Mbps link speed. |
3383 |
++ */ |
3384 |
++ if (tg3_asic_rev(tp) == ASIC_REV_5762) { |
3385 |
++ val = tr32(TG3_CPMU_CLCK_ORIDE_ENABLE); |
3386 |
++ tw32(TG3_CPMU_CLCK_ORIDE_ENABLE, val | |
3387 |
++ TG3_CPMU_MAC_ORIDE_ENABLE); |
3388 |
++ } |
3389 |
++ |
3390 |
+ /* Reprobe ASF enable state. */ |
3391 |
+ tg3_flag_clear(tp, ENABLE_ASF); |
3392 |
+ tp->phy_flags &= ~(TG3_PHYFLG_1G_ON_VAUX_OK | |
3393 |
+diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c |
3394 |
+index 8179727d3423..1f2f25a71d18 100644 |
3395 |
+--- a/drivers/net/phy/phy_device.c |
3396 |
++++ b/drivers/net/phy/phy_device.c |
3397 |
+@@ -1265,11 +1265,8 @@ static int gen10g_resume(struct phy_device *phydev) |
3398 |
+ |
3399 |
+ static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) |
3400 |
+ { |
3401 |
+- /* The default values for phydev->supported are provided by the PHY |
3402 |
+- * driver "features" member, we want to reset to sane defaults first |
3403 |
+- * before supporting higher speeds. |
3404 |
+- */ |
3405 |
+- phydev->supported &= PHY_DEFAULT_FEATURES; |
3406 |
++ phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES | |
3407 |
++ PHY_10BT_FEATURES); |
3408 |
+ |
3409 |
+ switch (max_speed) { |
3410 |
+ default: |
3411 |
+diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c |
3412 |
+index da7bae991552..d877ff124365 100644 |
3413 |
+--- a/drivers/ptp/ptp_chardev.c |
3414 |
++++ b/drivers/ptp/ptp_chardev.c |
3415 |
+@@ -88,6 +88,7 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin, |
3416 |
+ case PTP_PF_PHYSYNC: |
3417 |
+ if (chan != 0) |
3418 |
+ return -EINVAL; |
3419 |
++ break; |
3420 |
+ default: |
3421 |
+ return -EINVAL; |
3422 |
+ } |
3423 |
+diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c |
3424 |
+index f2e9f59c90d6..2d837b6bd495 100644 |
3425 |
+--- a/drivers/usb/host/xhci.c |
3426 |
++++ b/drivers/usb/host/xhci.c |
3427 |
+@@ -887,6 +887,41 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci) |
3428 |
+ spin_unlock_irqrestore(&xhci->lock, flags); |
3429 |
+ } |
3430 |
+ |
3431 |
++static bool xhci_pending_portevent(struct xhci_hcd *xhci) |
3432 |
++{ |
3433 |
++ __le32 __iomem **port_array; |
3434 |
++ int port_index; |
3435 |
++ u32 status; |
3436 |
++ u32 portsc; |
3437 |
++ |
3438 |
++ status = readl(&xhci->op_regs->status); |
3439 |
++ if (status & STS_EINT) |
3440 |
++ return true; |
3441 |
++ /* |
3442 |
++ * Checking STS_EINT is not enough as there is a lag between a change |
3443 |
++ * bit being set and the Port Status Change Event that it generated |
3444 |
++ * being written to the Event Ring. See note in xhci 1.1 section 4.19.2. |
3445 |
++ */ |
3446 |
++ |
3447 |
++ port_index = xhci->num_usb2_ports; |
3448 |
++ port_array = xhci->usb2_ports; |
3449 |
++ while (port_index--) { |
3450 |
++ portsc = readl(port_array[port_index]); |
3451 |
++ if (portsc & PORT_CHANGE_MASK || |
3452 |
++ (portsc & PORT_PLS_MASK) == XDEV_RESUME) |
3453 |
++ return true; |
3454 |
++ } |
3455 |
++ port_index = xhci->num_usb3_ports; |
3456 |
++ port_array = xhci->usb3_ports; |
3457 |
++ while (port_index--) { |
3458 |
++ portsc = readl(port_array[port_index]); |
3459 |
++ if (portsc & PORT_CHANGE_MASK || |
3460 |
++ (portsc & PORT_PLS_MASK) == XDEV_RESUME) |
3461 |
++ return true; |
3462 |
++ } |
3463 |
++ return false; |
3464 |
++} |
3465 |
++ |
3466 |
+ /* |
3467 |
+ * Stop HC (not bus-specific) |
3468 |
+ * |
3469 |
+@@ -983,7 +1018,7 @@ EXPORT_SYMBOL_GPL(xhci_suspend); |
3470 |
+ */ |
3471 |
+ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) |
3472 |
+ { |
3473 |
+- u32 command, temp = 0, status; |
3474 |
++ u32 command, temp = 0; |
3475 |
+ struct usb_hcd *hcd = xhci_to_hcd(xhci); |
3476 |
+ struct usb_hcd *secondary_hcd; |
3477 |
+ int retval = 0; |
3478 |
+@@ -1105,8 +1140,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) |
3479 |
+ done: |
3480 |
+ if (retval == 0) { |
3481 |
+ /* Resume root hubs only when have pending events. */ |
3482 |
+- status = readl(&xhci->op_regs->status); |
3483 |
+- if (status & STS_EINT) { |
3484 |
++ if (xhci_pending_portevent(xhci)) { |
3485 |
+ usb_hcd_resume_root_hub(xhci->shared_hcd); |
3486 |
+ usb_hcd_resume_root_hub(hcd); |
3487 |
+ } |
3488 |
+diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h |
3489 |
+index 1715705acc59..84d8871755b7 100644 |
3490 |
+--- a/drivers/usb/host/xhci.h |
3491 |
++++ b/drivers/usb/host/xhci.h |
3492 |
+@@ -382,6 +382,10 @@ struct xhci_op_regs { |
3493 |
+ #define PORT_PLC (1 << 22) |
3494 |
+ /* port configure error change - port failed to configure its link partner */ |
3495 |
+ #define PORT_CEC (1 << 23) |
3496 |
++#define PORT_CHANGE_MASK (PORT_CSC | PORT_PEC | PORT_WRC | PORT_OCC | \ |
3497 |
++ PORT_RC | PORT_PLC | PORT_CEC) |
3498 |
++ |
3499 |
++ |
3500 |
+ /* Cold Attach Status - xHC can set this bit to report device attached during |
3501 |
+ * Sx state. Warm port reset should be perfomed to clear this bit and move port |
3502 |
+ * to connected state. |
3503 |
+diff --git a/fs/fat/inode.c b/fs/fat/inode.c |
3504 |
+index cf644d52c0cf..c81cfb79a339 100644 |
3505 |
+--- a/fs/fat/inode.c |
3506 |
++++ b/fs/fat/inode.c |
3507 |
+@@ -613,13 +613,21 @@ static void fat_set_state(struct super_block *sb, |
3508 |
+ brelse(bh); |
3509 |
+ } |
3510 |
+ |
3511 |
++static void fat_reset_iocharset(struct fat_mount_options *opts) |
3512 |
++{ |
3513 |
++ if (opts->iocharset != fat_default_iocharset) { |
3514 |
++ /* Note: opts->iocharset can be NULL here */ |
3515 |
++ kfree(opts->iocharset); |
3516 |
++ opts->iocharset = fat_default_iocharset; |
3517 |
++ } |
3518 |
++} |
3519 |
++ |
3520 |
+ static void delayed_free(struct rcu_head *p) |
3521 |
+ { |
3522 |
+ struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu); |
3523 |
+ unload_nls(sbi->nls_disk); |
3524 |
+ unload_nls(sbi->nls_io); |
3525 |
+- if (sbi->options.iocharset != fat_default_iocharset) |
3526 |
+- kfree(sbi->options.iocharset); |
3527 |
++ fat_reset_iocharset(&sbi->options); |
3528 |
+ kfree(sbi); |
3529 |
+ } |
3530 |
+ |
3531 |
+@@ -1034,7 +1042,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, |
3532 |
+ opts->fs_fmask = opts->fs_dmask = current_umask(); |
3533 |
+ opts->allow_utime = -1; |
3534 |
+ opts->codepage = fat_default_codepage; |
3535 |
+- opts->iocharset = fat_default_iocharset; |
3536 |
++ fat_reset_iocharset(opts); |
3537 |
+ if (is_vfat) { |
3538 |
+ opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95; |
3539 |
+ opts->rodir = 0; |
3540 |
+@@ -1184,8 +1192,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, |
3541 |
+ |
3542 |
+ /* vfat specific */ |
3543 |
+ case Opt_charset: |
3544 |
+- if (opts->iocharset != fat_default_iocharset) |
3545 |
+- kfree(opts->iocharset); |
3546 |
++ fat_reset_iocharset(opts); |
3547 |
+ iocharset = match_strdup(&args[0]); |
3548 |
+ if (!iocharset) |
3549 |
+ return -ENOMEM; |
3550 |
+@@ -1776,8 +1783,7 @@ out_fail: |
3551 |
+ iput(fat_inode); |
3552 |
+ unload_nls(sbi->nls_io); |
3553 |
+ unload_nls(sbi->nls_disk); |
3554 |
+- if (sbi->options.iocharset != fat_default_iocharset) |
3555 |
+- kfree(sbi->options.iocharset); |
3556 |
++ fat_reset_iocharset(&sbi->options); |
3557 |
+ sb->s_fs_info = NULL; |
3558 |
+ kfree(sbi); |
3559 |
+ return error; |
3560 |
+diff --git a/fs/proc/array.c b/fs/proc/array.c |
3561 |
+index b6c00ce0e29e..cb71cbae606d 100644 |
3562 |
+--- a/fs/proc/array.c |
3563 |
++++ b/fs/proc/array.c |
3564 |
+@@ -79,6 +79,7 @@ |
3565 |
+ #include <linux/delayacct.h> |
3566 |
+ #include <linux/seq_file.h> |
3567 |
+ #include <linux/pid_namespace.h> |
3568 |
++#include <linux/prctl.h> |
3569 |
+ #include <linux/ptrace.h> |
3570 |
+ #include <linux/tracehook.h> |
3571 |
+ #include <linux/string_helpers.h> |
3572 |
+@@ -332,6 +333,31 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) |
3573 |
+ #ifdef CONFIG_SECCOMP |
3574 |
+ seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode); |
3575 |
+ #endif |
3576 |
++ seq_printf(m, "\nSpeculation_Store_Bypass:\t"); |
3577 |
++ switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) { |
3578 |
++ case -EINVAL: |
3579 |
++ seq_printf(m, "unknown"); |
3580 |
++ break; |
3581 |
++ case PR_SPEC_NOT_AFFECTED: |
3582 |
++ seq_printf(m, "not vulnerable"); |
3583 |
++ break; |
3584 |
++ case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE: |
3585 |
++ seq_printf(m, "thread force mitigated"); |
3586 |
++ break; |
3587 |
++ case PR_SPEC_PRCTL | PR_SPEC_DISABLE: |
3588 |
++ seq_printf(m, "thread mitigated"); |
3589 |
++ break; |
3590 |
++ case PR_SPEC_PRCTL | PR_SPEC_ENABLE: |
3591 |
++ seq_printf(m, "thread vulnerable"); |
3592 |
++ break; |
3593 |
++ case PR_SPEC_DISABLE: |
3594 |
++ seq_printf(m, "globally mitigated"); |
3595 |
++ break; |
3596 |
++ default: |
3597 |
++ seq_printf(m, "vulnerable"); |
3598 |
++ break; |
3599 |
++ } |
3600 |
++ seq_putc(m, '\n'); |
3601 |
+ } |
3602 |
+ |
3603 |
+ static inline void task_context_switch_counts(struct seq_file *m, |
3604 |
+diff --git a/include/linux/cpu.h b/include/linux/cpu.h |
3605 |
+index 7e04bcd9af8e..2f9d12022100 100644 |
3606 |
+--- a/include/linux/cpu.h |
3607 |
++++ b/include/linux/cpu.h |
3608 |
+@@ -46,6 +46,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev, |
3609 |
+ struct device_attribute *attr, char *buf); |
3610 |
+ extern ssize_t cpu_show_spectre_v2(struct device *dev, |
3611 |
+ struct device_attribute *attr, char *buf); |
3612 |
++extern ssize_t cpu_show_spec_store_bypass(struct device *dev, |
3613 |
++ struct device_attribute *attr, char *buf); |
3614 |
+ |
3615 |
+ extern __printf(4, 5) |
3616 |
+ struct device *cpu_device_create(struct device *parent, void *drvdata, |
3617 |
+diff --git a/include/linux/nospec.h b/include/linux/nospec.h |
3618 |
+index e791ebc65c9c..0c5ef54fd416 100644 |
3619 |
+--- a/include/linux/nospec.h |
3620 |
++++ b/include/linux/nospec.h |
3621 |
+@@ -7,6 +7,8 @@ |
3622 |
+ #define _LINUX_NOSPEC_H |
3623 |
+ #include <asm/barrier.h> |
3624 |
+ |
3625 |
++struct task_struct; |
3626 |
++ |
3627 |
+ /** |
3628 |
+ * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise |
3629 |
+ * @index: array element index |
3630 |
+@@ -55,4 +57,12 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, |
3631 |
+ \ |
3632 |
+ (typeof(_i)) (_i & _mask); \ |
3633 |
+ }) |
3634 |
++ |
3635 |
++/* Speculation control prctl */ |
3636 |
++int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); |
3637 |
++int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, |
3638 |
++ unsigned long ctrl); |
3639 |
++/* Speculation control for seccomp enforced mitigation */ |
3640 |
++void arch_seccomp_spec_mitigate(struct task_struct *task); |
3641 |
++ |
3642 |
+ #endif /* _LINUX_NOSPEC_H */ |
3643 |
+diff --git a/include/linux/sched.h b/include/linux/sched.h |
3644 |
+index 90bea398e5e0..725498cc5d30 100644 |
3645 |
+--- a/include/linux/sched.h |
3646 |
++++ b/include/linux/sched.h |
3647 |
+@@ -2167,6 +2167,8 @@ static inline void memalloc_noio_restore(unsigned int flags) |
3648 |
+ #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ |
3649 |
+ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ |
3650 |
+ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ |
3651 |
++#define PFA_SPEC_SSB_DISABLE 4 /* Speculative Store Bypass disabled */ |
3652 |
++#define PFA_SPEC_SSB_FORCE_DISABLE 5 /* Speculative Store Bypass force disabled*/ |
3653 |
+ |
3654 |
+ |
3655 |
+ #define TASK_PFA_TEST(name, func) \ |
3656 |
+@@ -2190,6 +2192,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab) |
3657 |
+ TASK_PFA_SET(SPREAD_SLAB, spread_slab) |
3658 |
+ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) |
3659 |
+ |
3660 |
++TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) |
3661 |
++TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) |
3662 |
++TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) |
3663 |
++ |
3664 |
++TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) |
3665 |
++TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) |
3666 |
++ |
3667 |
+ /* |
3668 |
+ * task->jobctl flags |
3669 |
+ */ |
3670 |
+diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h |
3671 |
+index 2296e6b2f690..5a53d34bba26 100644 |
3672 |
+--- a/include/linux/seccomp.h |
3673 |
++++ b/include/linux/seccomp.h |
3674 |
+@@ -3,7 +3,8 @@ |
3675 |
+ |
3676 |
+ #include <uapi/linux/seccomp.h> |
3677 |
+ |
3678 |
+-#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) |
3679 |
++#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ |
3680 |
++ SECCOMP_FILTER_FLAG_SPEC_ALLOW) |
3681 |
+ |
3682 |
+ #ifdef CONFIG_SECCOMP |
3683 |
+ |
3684 |
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h |
3685 |
+index a6da214d0584..c28bd8be290a 100644 |
3686 |
+--- a/include/linux/skbuff.h |
3687 |
++++ b/include/linux/skbuff.h |
3688 |
+@@ -514,6 +514,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, |
3689 |
+ * @hash: the packet hash |
3690 |
+ * @queue_mapping: Queue mapping for multiqueue devices |
3691 |
+ * @xmit_more: More SKBs are pending for this queue |
3692 |
++ * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves |
3693 |
+ * @ndisc_nodetype: router type (from link layer) |
3694 |
+ * @ooo_okay: allow the mapping of a socket to a queue to be changed |
3695 |
+ * @l4_hash: indicate hash is a canonical 4-tuple hash over transport |
3696 |
+@@ -594,8 +595,8 @@ struct sk_buff { |
3697 |
+ fclone:2, |
3698 |
+ peeked:1, |
3699 |
+ head_frag:1, |
3700 |
+- xmit_more:1; |
3701 |
+- /* one bit hole */ |
3702 |
++ xmit_more:1, |
3703 |
++ pfmemalloc:1; |
3704 |
+ kmemcheck_bitfield_end(flags1); |
3705 |
+ |
3706 |
+ /* fields enclosed in headers_start/headers_end are copied |
3707 |
+@@ -615,19 +616,18 @@ struct sk_buff { |
3708 |
+ |
3709 |
+ __u8 __pkt_type_offset[0]; |
3710 |
+ __u8 pkt_type:3; |
3711 |
+- __u8 pfmemalloc:1; |
3712 |
+ __u8 ignore_df:1; |
3713 |
+ __u8 nfctinfo:3; |
3714 |
+- |
3715 |
+ __u8 nf_trace:1; |
3716 |
++ |
3717 |
+ __u8 ip_summed:2; |
3718 |
+ __u8 ooo_okay:1; |
3719 |
+ __u8 l4_hash:1; |
3720 |
+ __u8 sw_hash:1; |
3721 |
+ __u8 wifi_acked_valid:1; |
3722 |
+ __u8 wifi_acked:1; |
3723 |
+- |
3724 |
+ __u8 no_fcs:1; |
3725 |
++ |
3726 |
+ /* Indicates the inner headers are valid in the skbuff. */ |
3727 |
+ __u8 encapsulation:1; |
3728 |
+ __u8 encap_hdr_csum:1; |
3729 |
+@@ -635,11 +635,11 @@ struct sk_buff { |
3730 |
+ __u8 csum_complete_sw:1; |
3731 |
+ __u8 csum_level:2; |
3732 |
+ __u8 csum_bad:1; |
3733 |
+- |
3734 |
+ #ifdef CONFIG_IPV6_NDISC_NODETYPE |
3735 |
+ __u8 ndisc_nodetype:2; |
3736 |
+ #endif |
3737 |
+ __u8 ipvs_property:1; |
3738 |
++ |
3739 |
+ __u8 inner_protocol_type:1; |
3740 |
+ __u8 remcsum_offload:1; |
3741 |
+ /* 3 or 5 bit hole */ |
3742 |
+diff --git a/include/net/ipv6.h b/include/net/ipv6.h |
3743 |
+index 84f0d0602433..0e01d570fa22 100644 |
3744 |
+--- a/include/net/ipv6.h |
3745 |
++++ b/include/net/ipv6.h |
3746 |
+@@ -762,7 +762,7 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, |
3747 |
+ * to minimize possbility that any useful information to an |
3748 |
+ * attacker is leaked. Only lower 20 bits are relevant. |
3749 |
+ */ |
3750 |
+- rol32(hash, 16); |
3751 |
++ hash = rol32(hash, 16); |
3752 |
+ |
3753 |
+ flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; |
3754 |
+ |
3755 |
+diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h |
3756 |
+index a8d0759a9e40..64776b72e1eb 100644 |
3757 |
+--- a/include/uapi/linux/prctl.h |
3758 |
++++ b/include/uapi/linux/prctl.h |
3759 |
+@@ -197,4 +197,16 @@ struct prctl_mm_map { |
3760 |
+ # define PR_CAP_AMBIENT_LOWER 3 |
3761 |
+ # define PR_CAP_AMBIENT_CLEAR_ALL 4 |
3762 |
+ |
3763 |
++/* Per task speculation control */ |
3764 |
++#define PR_GET_SPECULATION_CTRL 52 |
3765 |
++#define PR_SET_SPECULATION_CTRL 53 |
3766 |
++/* Speculation control variants */ |
3767 |
++# define PR_SPEC_STORE_BYPASS 0 |
3768 |
++/* Return and control values for PR_SET/GET_SPECULATION_CTRL */ |
3769 |
++# define PR_SPEC_NOT_AFFECTED 0 |
3770 |
++# define PR_SPEC_PRCTL (1UL << 0) |
3771 |
++# define PR_SPEC_ENABLE (1UL << 1) |
3772 |
++# define PR_SPEC_DISABLE (1UL << 2) |
3773 |
++# define PR_SPEC_FORCE_DISABLE (1UL << 3) |
3774 |
++ |
3775 |
+ #endif /* _LINUX_PRCTL_H */ |
3776 |
+diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h |
3777 |
+index 0f238a43ff1e..e4acb615792b 100644 |
3778 |
+--- a/include/uapi/linux/seccomp.h |
3779 |
++++ b/include/uapi/linux/seccomp.h |
3780 |
+@@ -15,7 +15,9 @@ |
3781 |
+ #define SECCOMP_SET_MODE_FILTER 1 |
3782 |
+ |
3783 |
+ /* Valid flags for SECCOMP_SET_MODE_FILTER */ |
3784 |
+-#define SECCOMP_FILTER_FLAG_TSYNC 1 |
3785 |
++#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) |
3786 |
++/* In v4.14+ SECCOMP_FILTER_FLAG_LOG is (1UL << 1) */ |
3787 |
++#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) |
3788 |
+ |
3789 |
+ /* |
3790 |
+ * All BPF programs must return a 32-bit value. |
3791 |
+diff --git a/kernel/seccomp.c b/kernel/seccomp.c |
3792 |
+index efd384f3f852..9a9203b15cde 100644 |
3793 |
+--- a/kernel/seccomp.c |
3794 |
++++ b/kernel/seccomp.c |
3795 |
+@@ -16,6 +16,8 @@ |
3796 |
+ #include <linux/atomic.h> |
3797 |
+ #include <linux/audit.h> |
3798 |
+ #include <linux/compat.h> |
3799 |
++#include <linux/nospec.h> |
3800 |
++#include <linux/prctl.h> |
3801 |
+ #include <linux/sched.h> |
3802 |
+ #include <linux/seccomp.h> |
3803 |
+ #include <linux/slab.h> |
3804 |
+@@ -214,8 +216,11 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) |
3805 |
+ return true; |
3806 |
+ } |
3807 |
+ |
3808 |
++void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { } |
3809 |
++ |
3810 |
+ static inline void seccomp_assign_mode(struct task_struct *task, |
3811 |
+- unsigned long seccomp_mode) |
3812 |
++ unsigned long seccomp_mode, |
3813 |
++ unsigned long flags) |
3814 |
+ { |
3815 |
+ assert_spin_locked(&task->sighand->siglock); |
3816 |
+ |
3817 |
+@@ -225,6 +230,9 @@ static inline void seccomp_assign_mode(struct task_struct *task, |
3818 |
+ * filter) is set. |
3819 |
+ */ |
3820 |
+ smp_mb__before_atomic(); |
3821 |
++ /* Assume default seccomp processes want spec flaw mitigation. */ |
3822 |
++ if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0) |
3823 |
++ arch_seccomp_spec_mitigate(task); |
3824 |
+ set_tsk_thread_flag(task, TIF_SECCOMP); |
3825 |
+ } |
3826 |
+ |
3827 |
+@@ -292,7 +300,7 @@ static inline pid_t seccomp_can_sync_threads(void) |
3828 |
+ * without dropping the locks. |
3829 |
+ * |
3830 |
+ */ |
3831 |
+-static inline void seccomp_sync_threads(void) |
3832 |
++static inline void seccomp_sync_threads(unsigned long flags) |
3833 |
+ { |
3834 |
+ struct task_struct *thread, *caller; |
3835 |
+ |
3836 |
+@@ -333,7 +341,8 @@ static inline void seccomp_sync_threads(void) |
3837 |
+ * allow one thread to transition the other. |
3838 |
+ */ |
3839 |
+ if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) |
3840 |
+- seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); |
3841 |
++ seccomp_assign_mode(thread, SECCOMP_MODE_FILTER, |
3842 |
++ flags); |
3843 |
+ } |
3844 |
+ } |
3845 |
+ |
3846 |
+@@ -452,7 +461,7 @@ static long seccomp_attach_filter(unsigned int flags, |
3847 |
+ |
3848 |
+ /* Now that the new filter is in place, synchronize to all threads. */ |
3849 |
+ if (flags & SECCOMP_FILTER_FLAG_TSYNC) |
3850 |
+- seccomp_sync_threads(); |
3851 |
++ seccomp_sync_threads(flags); |
3852 |
+ |
3853 |
+ return 0; |
3854 |
+ } |
3855 |
+@@ -747,7 +756,7 @@ static long seccomp_set_mode_strict(void) |
3856 |
+ #ifdef TIF_NOTSC |
3857 |
+ disable_TSC(); |
3858 |
+ #endif |
3859 |
+- seccomp_assign_mode(current, seccomp_mode); |
3860 |
++ seccomp_assign_mode(current, seccomp_mode, 0); |
3861 |
+ ret = 0; |
3862 |
+ |
3863 |
+ out: |
3864 |
+@@ -805,7 +814,7 @@ static long seccomp_set_mode_filter(unsigned int flags, |
3865 |
+ /* Do not free the successfully attached filter. */ |
3866 |
+ prepared = NULL; |
3867 |
+ |
3868 |
+- seccomp_assign_mode(current, seccomp_mode); |
3869 |
++ seccomp_assign_mode(current, seccomp_mode, flags); |
3870 |
+ out: |
3871 |
+ spin_unlock_irq(¤t->sighand->siglock); |
3872 |
+ if (flags & SECCOMP_FILTER_FLAG_TSYNC) |
3873 |
+diff --git a/kernel/sys.c b/kernel/sys.c |
3874 |
+index 6624919ef0e7..f718742e55e6 100644 |
3875 |
+--- a/kernel/sys.c |
3876 |
++++ b/kernel/sys.c |
3877 |
+@@ -2075,6 +2075,17 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) |
3878 |
+ } |
3879 |
+ #endif |
3880 |
+ |
3881 |
++int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which) |
3882 |
++{ |
3883 |
++ return -EINVAL; |
3884 |
++} |
3885 |
++ |
3886 |
++int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, |
3887 |
++ unsigned long ctrl) |
3888 |
++{ |
3889 |
++ return -EINVAL; |
3890 |
++} |
3891 |
++ |
3892 |
+ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, |
3893 |
+ unsigned long, arg4, unsigned long, arg5) |
3894 |
+ { |
3895 |
+@@ -2269,6 +2280,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, |
3896 |
+ case PR_GET_FP_MODE: |
3897 |
+ error = GET_FP_MODE(me); |
3898 |
+ break; |
3899 |
++ case PR_GET_SPECULATION_CTRL: |
3900 |
++ if (arg3 || arg4 || arg5) |
3901 |
++ return -EINVAL; |
3902 |
++ error = arch_prctl_spec_ctrl_get(me, arg2); |
3903 |
++ break; |
3904 |
++ case PR_SET_SPECULATION_CTRL: |
3905 |
++ if (arg4 || arg5) |
3906 |
++ return -EINVAL; |
3907 |
++ error = arch_prctl_spec_ctrl_set(me, arg2, arg3); |
3908 |
++ break; |
3909 |
+ default: |
3910 |
+ error = -EINVAL; |
3911 |
+ break; |
3912 |
+diff --git a/lib/rhashtable.c b/lib/rhashtable.c |
3913 |
+index 51282f579760..37ea94b636a3 100644 |
3914 |
+--- a/lib/rhashtable.c |
3915 |
++++ b/lib/rhashtable.c |
3916 |
+@@ -670,8 +670,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop); |
3917 |
+ |
3918 |
+ static size_t rounded_hashtable_size(const struct rhashtable_params *params) |
3919 |
+ { |
3920 |
+- return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), |
3921 |
+- (unsigned long)params->min_size); |
3922 |
++ size_t retsize; |
3923 |
++ |
3924 |
++ if (params->nelem_hint) |
3925 |
++ retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3), |
3926 |
++ (unsigned long)params->min_size); |
3927 |
++ else |
3928 |
++ retsize = max(HASH_DEFAULT_SIZE, |
3929 |
++ (unsigned long)params->min_size); |
3930 |
++ |
3931 |
++ return retsize; |
3932 |
+ } |
3933 |
+ |
3934 |
+ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) |
3935 |
+@@ -728,8 +736,6 @@ int rhashtable_init(struct rhashtable *ht, |
3936 |
+ struct bucket_table *tbl; |
3937 |
+ size_t size; |
3938 |
+ |
3939 |
+- size = HASH_DEFAULT_SIZE; |
3940 |
+- |
3941 |
+ if ((!params->key_len && !params->obj_hashfn) || |
3942 |
+ (params->obj_hashfn && !params->obj_cmpfn)) |
3943 |
+ return -EINVAL; |
3944 |
+@@ -756,8 +762,7 @@ int rhashtable_init(struct rhashtable *ht, |
3945 |
+ |
3946 |
+ ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); |
3947 |
+ |
3948 |
+- if (params->nelem_hint) |
3949 |
+- size = rounded_hashtable_size(&ht->p); |
3950 |
++ size = rounded_hashtable_size(&ht->p); |
3951 |
+ |
3952 |
+ /* The maximum (not average) chain length grows with the |
3953 |
+ * size of the hash table, at a rate of (log N)/(log log N). |
3954 |
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c |
3955 |
+index 55a9facb8e8d..9a8e688724b1 100644 |
3956 |
+--- a/mm/memcontrol.c |
3957 |
++++ b/mm/memcontrol.c |
3958 |
+@@ -996,7 +996,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) |
3959 |
+ int nid, zid; |
3960 |
+ int i; |
3961 |
+ |
3962 |
+- while ((memcg = parent_mem_cgroup(memcg))) { |
3963 |
++ for (; memcg; memcg = parent_mem_cgroup(memcg)) { |
3964 |
+ for_each_node(nid) { |
3965 |
+ for (zid = 0; zid < MAX_NR_ZONES; zid++) { |
3966 |
+ mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; |
3967 |
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c |
3968 |
+index fa02c680eebc..55be076706e5 100644 |
3969 |
+--- a/net/core/skbuff.c |
3970 |
++++ b/net/core/skbuff.c |
3971 |
+@@ -828,6 +828,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) |
3972 |
+ n->cloned = 1; |
3973 |
+ n->nohdr = 0; |
3974 |
+ n->peeked = 0; |
3975 |
++ C(pfmemalloc); |
3976 |
+ n->destructor = NULL; |
3977 |
+ C(tail); |
3978 |
+ C(end); |
3979 |
+diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c |
3980 |
+index c9e68ff48a72..8f05816a8be2 100644 |
3981 |
+--- a/net/ipv4/fib_frontend.c |
3982 |
++++ b/net/ipv4/fib_frontend.c |
3983 |
+@@ -297,6 +297,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) |
3984 |
+ if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { |
3985 |
+ struct flowi4 fl4 = { |
3986 |
+ .flowi4_iif = LOOPBACK_IFINDEX, |
3987 |
++ .flowi4_oif = l3mdev_master_ifindex_rcu(dev), |
3988 |
+ .daddr = ip_hdr(skb)->saddr, |
3989 |
+ .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), |
3990 |
+ .flowi4_scope = scope, |
3991 |
+diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c |
3992 |
+index 75abf978ef30..da90c74d12ef 100644 |
3993 |
+--- a/net/ipv4/sysctl_net_ipv4.c |
3994 |
++++ b/net/ipv4/sysctl_net_ipv4.c |
3995 |
+@@ -141,8 +141,9 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, |
3996 |
+ if (write && ret == 0) { |
3997 |
+ low = make_kgid(user_ns, urange[0]); |
3998 |
+ high = make_kgid(user_ns, urange[1]); |
3999 |
+- if (!gid_valid(low) || !gid_valid(high) || |
4000 |
+- (urange[1] < urange[0]) || gid_lt(high, low)) { |
4001 |
++ if (!gid_valid(low) || !gid_valid(high)) |
4002 |
++ return -EINVAL; |
4003 |
++ if (urange[1] < urange[0] || gid_lt(high, low)) { |
4004 |
+ low = make_kgid(&init_user_ns, 1); |
4005 |
+ high = make_kgid(&init_user_ns, 0); |
4006 |
+ } |
4007 |
+diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c |
4008 |
+index 16f8124b1150..59111cadaec2 100644 |
4009 |
+--- a/sound/core/rawmidi.c |
4010 |
++++ b/sound/core/rawmidi.c |
4011 |
+@@ -635,7 +635,7 @@ static int snd_rawmidi_info_select_user(struct snd_card *card, |
4012 |
+ int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream, |
4013 |
+ struct snd_rawmidi_params * params) |
4014 |
+ { |
4015 |
+- char *newbuf; |
4016 |
++ char *newbuf, *oldbuf; |
4017 |
+ struct snd_rawmidi_runtime *runtime = substream->runtime; |
4018 |
+ |
4019 |
+ if (substream->append && substream->use_count > 1) |
4020 |
+@@ -648,13 +648,17 @@ int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream, |
4021 |
+ return -EINVAL; |
4022 |
+ } |
4023 |
+ if (params->buffer_size != runtime->buffer_size) { |
4024 |
+- newbuf = krealloc(runtime->buffer, params->buffer_size, |
4025 |
+- GFP_KERNEL); |
4026 |
++ newbuf = kmalloc(params->buffer_size, GFP_KERNEL); |
4027 |
+ if (!newbuf) |
4028 |
+ return -ENOMEM; |
4029 |
++ spin_lock_irq(&runtime->lock); |
4030 |
++ oldbuf = runtime->buffer; |
4031 |
+ runtime->buffer = newbuf; |
4032 |
+ runtime->buffer_size = params->buffer_size; |
4033 |
+ runtime->avail = runtime->buffer_size; |
4034 |
++ runtime->appl_ptr = runtime->hw_ptr = 0; |
4035 |
++ spin_unlock_irq(&runtime->lock); |
4036 |
++ kfree(oldbuf); |
4037 |
+ } |
4038 |
+ runtime->avail_min = params->avail_min; |
4039 |
+ substream->active_sensing = !params->no_active_sensing; |
4040 |
+@@ -665,7 +669,7 @@ EXPORT_SYMBOL(snd_rawmidi_output_params); |
4041 |
+ int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream, |
4042 |
+ struct snd_rawmidi_params * params) |
4043 |
+ { |
4044 |
+- char *newbuf; |
4045 |
++ char *newbuf, *oldbuf; |
4046 |
+ struct snd_rawmidi_runtime *runtime = substream->runtime; |
4047 |
+ |
4048 |
+ snd_rawmidi_drain_input(substream); |
4049 |
+@@ -676,12 +680,16 @@ int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream, |
4050 |
+ return -EINVAL; |
4051 |
+ } |
4052 |
+ if (params->buffer_size != runtime->buffer_size) { |
4053 |
+- newbuf = krealloc(runtime->buffer, params->buffer_size, |
4054 |
+- GFP_KERNEL); |
4055 |
++ newbuf = kmalloc(params->buffer_size, GFP_KERNEL); |
4056 |
+ if (!newbuf) |
4057 |
+ return -ENOMEM; |
4058 |
++ spin_lock_irq(&runtime->lock); |
4059 |
++ oldbuf = runtime->buffer; |
4060 |
+ runtime->buffer = newbuf; |
4061 |
+ runtime->buffer_size = params->buffer_size; |
4062 |
++ runtime->appl_ptr = runtime->hw_ptr = 0; |
4063 |
++ spin_unlock_irq(&runtime->lock); |
4064 |
++ kfree(oldbuf); |
4065 |
+ } |
4066 |
+ runtime->avail_min = params->avail_min; |
4067 |
+ return 0; |
4068 |
+diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c |
4069 |
+index 882fe83a3554..b3f345433ec7 100644 |
4070 |
+--- a/tools/testing/selftests/seccomp/seccomp_bpf.c |
4071 |
++++ b/tools/testing/selftests/seccomp/seccomp_bpf.c |
4072 |
+@@ -1476,15 +1476,19 @@ TEST_F(TRACE_syscall, syscall_dropped) |
4073 |
+ #define SECCOMP_SET_MODE_FILTER 1 |
4074 |
+ #endif |
4075 |
+ |
4076 |
+-#ifndef SECCOMP_FLAG_FILTER_TSYNC |
4077 |
+-#define SECCOMP_FLAG_FILTER_TSYNC 1 |
4078 |
++#ifndef SECCOMP_FILTER_FLAG_TSYNC |
4079 |
++#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) |
4080 |
++#endif |
4081 |
++ |
4082 |
++#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW |
4083 |
++#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) |
4084 |
+ #endif |
4085 |
+ |
4086 |
+ #ifndef seccomp |
4087 |
+-int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter) |
4088 |
++int seccomp(unsigned int op, unsigned int flags, void *args) |
4089 |
+ { |
4090 |
+ errno = 0; |
4091 |
+- return syscall(__NR_seccomp, op, flags, filter); |
4092 |
++ return syscall(__NR_seccomp, op, flags, args); |
4093 |
+ } |
4094 |
+ #endif |
4095 |
+ |
4096 |
+@@ -1576,6 +1580,78 @@ TEST(seccomp_syscall_mode_lock) |
4097 |
+ } |
4098 |
+ } |
4099 |
+ |
4100 |
++/* |
4101 |
++ * Test detection of known and unknown filter flags. Userspace needs to be able |
4102 |
++ * to check if a filter flag is supported by the current kernel and a good way |
4103 |
++ * of doing that is by attempting to enter filter mode, with the flag bit in |
4104 |
++ * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates |
4105 |
++ * that the flag is valid and EINVAL indicates that the flag is invalid. |
4106 |
++ */ |
4107 |
++TEST(detect_seccomp_filter_flags) |
4108 |
++{ |
4109 |
++ unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, |
4110 |
++ SECCOMP_FILTER_FLAG_SPEC_ALLOW }; |
4111 |
++ unsigned int flag, all_flags; |
4112 |
++ int i; |
4113 |
++ long ret; |
4114 |
++ |
4115 |
++ /* Test detection of known-good filter flags */ |
4116 |
++ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { |
4117 |
++ int bits = 0; |
4118 |
++ |
4119 |
++ flag = flags[i]; |
4120 |
++ /* Make sure the flag is a single bit! */ |
4121 |
++ while (flag) { |
4122 |
++ if (flag & 0x1) |
4123 |
++ bits ++; |
4124 |
++ flag >>= 1; |
4125 |
++ } |
4126 |
++ ASSERT_EQ(1, bits); |
4127 |
++ flag = flags[i]; |
4128 |
++ |
4129 |
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); |
4130 |
++ ASSERT_NE(ENOSYS, errno) { |
4131 |
++ TH_LOG("Kernel does not support seccomp syscall!"); |
4132 |
++ } |
4133 |
++ EXPECT_EQ(-1, ret); |
4134 |
++ EXPECT_EQ(EFAULT, errno) { |
4135 |
++ TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", |
4136 |
++ flag); |
4137 |
++ } |
4138 |
++ |
4139 |
++ all_flags |= flag; |
4140 |
++ } |
4141 |
++ |
4142 |
++ /* Test detection of all known-good filter flags */ |
4143 |
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL); |
4144 |
++ EXPECT_EQ(-1, ret); |
4145 |
++ EXPECT_EQ(EFAULT, errno) { |
4146 |
++ TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", |
4147 |
++ all_flags); |
4148 |
++ } |
4149 |
++ |
4150 |
++ /* Test detection of an unknown filter flag */ |
4151 |
++ flag = -1; |
4152 |
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); |
4153 |
++ EXPECT_EQ(-1, ret); |
4154 |
++ EXPECT_EQ(EINVAL, errno) { |
4155 |
++ TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", |
4156 |
++ flag); |
4157 |
++ } |
4158 |
++ |
4159 |
++ /* |
4160 |
++ * Test detection of an unknown filter flag that may simply need to be |
4161 |
++ * added to this test |
4162 |
++ */ |
4163 |
++ flag = flags[ARRAY_SIZE(flags) - 1] << 1; |
4164 |
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); |
4165 |
++ EXPECT_EQ(-1, ret); |
4166 |
++ EXPECT_EQ(EINVAL, errno) { |
4167 |
++ TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", |
4168 |
++ flag); |
4169 |
++ } |
4170 |
++} |
4171 |
++ |
4172 |
+ TEST(TSYNC_first) |
4173 |
+ { |
4174 |
+ struct sock_filter filter[] = { |
4175 |
+@@ -1592,7 +1668,7 @@ TEST(TSYNC_first) |
4176 |
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); |
4177 |
+ } |
4178 |
+ |
4179 |
+- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, |
4180 |
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
4181 |
+ &prog); |
4182 |
+ ASSERT_NE(ENOSYS, errno) { |
4183 |
+ TH_LOG("Kernel does not support seccomp syscall!"); |
4184 |
+@@ -1810,7 +1886,7 @@ TEST_F(TSYNC, two_siblings_with_ancestor) |
4185 |
+ self->sibling_count++; |
4186 |
+ } |
4187 |
+ |
4188 |
+- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, |
4189 |
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
4190 |
+ &self->apply_prog); |
4191 |
+ ASSERT_EQ(0, ret) { |
4192 |
+ TH_LOG("Could install filter on all threads!"); |
4193 |
+@@ -1871,7 +1947,7 @@ TEST_F(TSYNC, two_siblings_with_no_filter) |
4194 |
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); |
4195 |
+ } |
4196 |
+ |
4197 |
+- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, |
4198 |
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
4199 |
+ &self->apply_prog); |
4200 |
+ ASSERT_NE(ENOSYS, errno) { |
4201 |
+ TH_LOG("Kernel does not support seccomp syscall!"); |
4202 |
+@@ -1919,7 +1995,7 @@ TEST_F(TSYNC, two_siblings_with_one_divergence) |
4203 |
+ self->sibling_count++; |
4204 |
+ } |
4205 |
+ |
4206 |
+- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, |
4207 |
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
4208 |
+ &self->apply_prog); |
4209 |
+ ASSERT_EQ(self->sibling[0].system_tid, ret) { |
4210 |
+ TH_LOG("Did not fail on diverged sibling."); |
4211 |
+@@ -1971,7 +2047,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter) |
4212 |
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); |
4213 |
+ } |
4214 |
+ |
4215 |
+- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, |
4216 |
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
4217 |
+ &self->apply_prog); |
4218 |
+ ASSERT_EQ(ret, self->sibling[0].system_tid) { |
4219 |
+ TH_LOG("Did not fail on diverged sibling."); |
4220 |
+@@ -2000,7 +2076,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter) |
4221 |
+ /* Switch to the remaining sibling */ |
4222 |
+ sib = !sib; |
4223 |
+ |
4224 |
+- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, |
4225 |
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
4226 |
+ &self->apply_prog); |
4227 |
+ ASSERT_EQ(0, ret) { |
4228 |
+ TH_LOG("Expected the remaining sibling to sync"); |
4229 |
+@@ -2023,7 +2099,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter) |
4230 |
+ while (!kill(self->sibling[sib].system_tid, 0)) |
4231 |
+ sleep(0.1); |
4232 |
+ |
4233 |
+- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, |
4234 |
++ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
4235 |
+ &self->apply_prog); |
4236 |
+ ASSERT_EQ(0, ret); /* just us chickens */ |
4237 |
+ } |
4238 |
+diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c |
4239 |
+index 49001fa84ead..1203829316b2 100644 |
4240 |
+--- a/virt/kvm/eventfd.c |
4241 |
++++ b/virt/kvm/eventfd.c |
4242 |
+@@ -119,8 +119,12 @@ irqfd_shutdown(struct work_struct *work) |
4243 |
+ { |
4244 |
+ struct kvm_kernel_irqfd *irqfd = |
4245 |
+ container_of(work, struct kvm_kernel_irqfd, shutdown); |
4246 |
++ struct kvm *kvm = irqfd->kvm; |
4247 |
+ u64 cnt; |
4248 |
+ |
4249 |
++ /* Make sure irqfd has been initalized in assign path. */ |
4250 |
++ synchronize_srcu(&kvm->irq_srcu); |
4251 |
++ |
4252 |
+ /* |
4253 |
+ * Synchronize with the wait-queue and unhook ourselves to prevent |
4254 |
+ * further events. |
4255 |
+@@ -387,7 +391,6 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) |
4256 |
+ |
4257 |
+ idx = srcu_read_lock(&kvm->irq_srcu); |
4258 |
+ irqfd_update(kvm, irqfd); |
4259 |
+- srcu_read_unlock(&kvm->irq_srcu, idx); |
4260 |
+ |
4261 |
+ list_add_tail(&irqfd->list, &kvm->irqfds.items); |
4262 |
+ |
4263 |
+@@ -419,6 +422,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) |
4264 |
+ irqfd->consumer.token, ret); |
4265 |
+ #endif |
4266 |
+ |
4267 |
++ srcu_read_unlock(&kvm->irq_srcu, idx); |
4268 |
+ return 0; |
4269 |
+ |
4270 |
+ fail: |
4271 |
|
4272 |
diff --git a/1144_linux-4.4.145.patch b/1144_linux-4.4.145.patch |
4273 |
new file mode 100644 |
4274 |
index 0000000..f7b3f94 |
4275 |
--- /dev/null |
4276 |
+++ b/1144_linux-4.4.145.patch |
4277 |
@@ -0,0 +1,1006 @@ |
4278 |
+diff --git a/Makefile b/Makefile |
4279 |
+index 63f3e2438a26..be31491a2d67 100644 |
4280 |
+--- a/Makefile |
4281 |
++++ b/Makefile |
4282 |
+@@ -1,6 +1,6 @@ |
4283 |
+ VERSION = 4 |
4284 |
+ PATCHLEVEL = 4 |
4285 |
+-SUBLEVEL = 144 |
4286 |
++SUBLEVEL = 145 |
4287 |
+ EXTRAVERSION = |
4288 |
+ NAME = Blurry Fish Butt |
4289 |
+ |
4290 |
+@@ -624,6 +624,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) |
4291 |
+ KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) |
4292 |
+ KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) |
4293 |
+ KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context) |
4294 |
++KBUILD_CFLAGS += $(call cc-disable-warning, attribute-alias) |
4295 |
+ |
4296 |
+ ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE |
4297 |
+ KBUILD_CFLAGS += -Os |
4298 |
+diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h |
4299 |
+index 35c9db857ebe..cd8b589111ba 100644 |
4300 |
+--- a/arch/arm/include/asm/uaccess.h |
4301 |
++++ b/arch/arm/include/asm/uaccess.h |
4302 |
+@@ -251,7 +251,7 @@ extern int __put_user_8(void *, unsigned long long); |
4303 |
+ ({ \ |
4304 |
+ unsigned long __limit = current_thread_info()->addr_limit - 1; \ |
4305 |
+ const typeof(*(p)) __user *__tmp_p = (p); \ |
4306 |
+- register const typeof(*(p)) __r2 asm("r2") = (x); \ |
4307 |
++ register typeof(*(p)) __r2 asm("r2") = (x); \ |
4308 |
+ register const typeof(*(p)) __user *__p asm("r0") = __tmp_p; \ |
4309 |
+ register unsigned long __l asm("r1") = __limit; \ |
4310 |
+ register int __e asm("r0"); \ |
4311 |
+diff --git a/arch/mips/ath79/common.c b/arch/mips/ath79/common.c |
4312 |
+index 8ae4067a5eda..40ecb6e700cd 100644 |
4313 |
+--- a/arch/mips/ath79/common.c |
4314 |
++++ b/arch/mips/ath79/common.c |
4315 |
+@@ -58,7 +58,7 @@ EXPORT_SYMBOL_GPL(ath79_ddr_ctrl_init); |
4316 |
+ |
4317 |
+ void ath79_ddr_wb_flush(u32 reg) |
4318 |
+ { |
4319 |
+- void __iomem *flush_reg = ath79_ddr_wb_flush_base + reg; |
4320 |
++ void __iomem *flush_reg = ath79_ddr_wb_flush_base + (reg * 4); |
4321 |
+ |
4322 |
+ /* Flush the DDR write buffer. */ |
4323 |
+ __raw_writel(0x1, flush_reg); |
4324 |
+diff --git a/drivers/base/dd.c b/drivers/base/dd.c |
4325 |
+index a641cf3ccad6..1dffb018a7fe 100644 |
4326 |
+--- a/drivers/base/dd.c |
4327 |
++++ b/drivers/base/dd.c |
4328 |
+@@ -304,14 +304,6 @@ static int really_probe(struct device *dev, struct device_driver *drv) |
4329 |
+ goto probe_failed; |
4330 |
+ } |
4331 |
+ |
4332 |
+- /* |
4333 |
+- * Ensure devices are listed in devices_kset in correct order |
4334 |
+- * It's important to move Dev to the end of devices_kset before |
4335 |
+- * calling .probe, because it could be recursive and parent Dev |
4336 |
+- * should always go first |
4337 |
+- */ |
4338 |
+- devices_kset_move_last(dev); |
4339 |
+- |
4340 |
+ if (dev->bus->probe) { |
4341 |
+ ret = dev->bus->probe(dev); |
4342 |
+ if (ret) |
4343 |
+diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c |
4344 |
+index 51670b322409..700b98d9c250 100644 |
4345 |
+--- a/drivers/net/can/xilinx_can.c |
4346 |
++++ b/drivers/net/can/xilinx_can.c |
4347 |
+@@ -2,6 +2,7 @@ |
4348 |
+ * |
4349 |
+ * Copyright (C) 2012 - 2014 Xilinx, Inc. |
4350 |
+ * Copyright (C) 2009 PetaLogix. All rights reserved. |
4351 |
++ * Copyright (C) 2017 Sandvik Mining and Construction Oy |
4352 |
+ * |
4353 |
+ * Description: |
4354 |
+ * This driver is developed for Axi CAN IP and for Zynq CANPS Controller. |
4355 |
+@@ -25,8 +26,10 @@ |
4356 |
+ #include <linux/module.h> |
4357 |
+ #include <linux/netdevice.h> |
4358 |
+ #include <linux/of.h> |
4359 |
++#include <linux/of_device.h> |
4360 |
+ #include <linux/platform_device.h> |
4361 |
+ #include <linux/skbuff.h> |
4362 |
++#include <linux/spinlock.h> |
4363 |
+ #include <linux/string.h> |
4364 |
+ #include <linux/types.h> |
4365 |
+ #include <linux/can/dev.h> |
4366 |
+@@ -100,7 +103,7 @@ enum xcan_reg { |
4367 |
+ #define XCAN_INTR_ALL (XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\ |
4368 |
+ XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \ |
4369 |
+ XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \ |
4370 |
+- XCAN_IXR_ARBLST_MASK | XCAN_IXR_RXOK_MASK) |
4371 |
++ XCAN_IXR_RXOFLW_MASK | XCAN_IXR_ARBLST_MASK) |
4372 |
+ |
4373 |
+ /* CAN register bit shift - XCAN_<REG>_<BIT>_SHIFT */ |
4374 |
+ #define XCAN_BTR_SJW_SHIFT 7 /* Synchronous jump width */ |
4375 |
+@@ -117,6 +120,7 @@ enum xcan_reg { |
4376 |
+ /** |
4377 |
+ * struct xcan_priv - This definition define CAN driver instance |
4378 |
+ * @can: CAN private data structure. |
4379 |
++ * @tx_lock: Lock for synchronizing TX interrupt handling |
4380 |
+ * @tx_head: Tx CAN packets ready to send on the queue |
4381 |
+ * @tx_tail: Tx CAN packets successfully sended on the queue |
4382 |
+ * @tx_max: Maximum number packets the driver can send |
4383 |
+@@ -131,6 +135,7 @@ enum xcan_reg { |
4384 |
+ */ |
4385 |
+ struct xcan_priv { |
4386 |
+ struct can_priv can; |
4387 |
++ spinlock_t tx_lock; |
4388 |
+ unsigned int tx_head; |
4389 |
+ unsigned int tx_tail; |
4390 |
+ unsigned int tx_max; |
4391 |
+@@ -158,6 +163,11 @@ static const struct can_bittiming_const xcan_bittiming_const = { |
4392 |
+ .brp_inc = 1, |
4393 |
+ }; |
4394 |
+ |
4395 |
++#define XCAN_CAP_WATERMARK 0x0001 |
4396 |
++struct xcan_devtype_data { |
4397 |
++ unsigned int caps; |
4398 |
++}; |
4399 |
++ |
4400 |
+ /** |
4401 |
+ * xcan_write_reg_le - Write a value to the device register little endian |
4402 |
+ * @priv: Driver private data structure |
4403 |
+@@ -237,6 +247,10 @@ static int set_reset_mode(struct net_device *ndev) |
4404 |
+ usleep_range(500, 10000); |
4405 |
+ } |
4406 |
+ |
4407 |
++ /* reset clears FIFOs */ |
4408 |
++ priv->tx_head = 0; |
4409 |
++ priv->tx_tail = 0; |
4410 |
++ |
4411 |
+ return 0; |
4412 |
+ } |
4413 |
+ |
4414 |
+@@ -391,6 +405,7 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) |
4415 |
+ struct net_device_stats *stats = &ndev->stats; |
4416 |
+ struct can_frame *cf = (struct can_frame *)skb->data; |
4417 |
+ u32 id, dlc, data[2] = {0, 0}; |
4418 |
++ unsigned long flags; |
4419 |
+ |
4420 |
+ if (can_dropped_invalid_skb(ndev, skb)) |
4421 |
+ return NETDEV_TX_OK; |
4422 |
+@@ -438,6 +453,9 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) |
4423 |
+ data[1] = be32_to_cpup((__be32 *)(cf->data + 4)); |
4424 |
+ |
4425 |
+ can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max); |
4426 |
++ |
4427 |
++ spin_lock_irqsave(&priv->tx_lock, flags); |
4428 |
++ |
4429 |
+ priv->tx_head++; |
4430 |
+ |
4431 |
+ /* Write the Frame to Xilinx CAN TX FIFO */ |
4432 |
+@@ -453,10 +471,16 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) |
4433 |
+ stats->tx_bytes += cf->can_dlc; |
4434 |
+ } |
4435 |
+ |
4436 |
++ /* Clear TX-FIFO-empty interrupt for xcan_tx_interrupt() */ |
4437 |
++ if (priv->tx_max > 1) |
4438 |
++ priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXFEMP_MASK); |
4439 |
++ |
4440 |
+ /* Check if the TX buffer is full */ |
4441 |
+ if ((priv->tx_head - priv->tx_tail) == priv->tx_max) |
4442 |
+ netif_stop_queue(ndev); |
4443 |
+ |
4444 |
++ spin_unlock_irqrestore(&priv->tx_lock, flags); |
4445 |
++ |
4446 |
+ return NETDEV_TX_OK; |
4447 |
+ } |
4448 |
+ |
4449 |
+@@ -528,6 +552,123 @@ static int xcan_rx(struct net_device *ndev) |
4450 |
+ return 1; |
4451 |
+ } |
4452 |
+ |
4453 |
++/** |
4454 |
++ * xcan_current_error_state - Get current error state from HW |
4455 |
++ * @ndev: Pointer to net_device structure |
4456 |
++ * |
4457 |
++ * Checks the current CAN error state from the HW. Note that this |
4458 |
++ * only checks for ERROR_PASSIVE and ERROR_WARNING. |
4459 |
++ * |
4460 |
++ * Return: |
4461 |
++ * ERROR_PASSIVE or ERROR_WARNING if either is active, ERROR_ACTIVE |
4462 |
++ * otherwise. |
4463 |
++ */ |
4464 |
++static enum can_state xcan_current_error_state(struct net_device *ndev) |
4465 |
++{ |
4466 |
++ struct xcan_priv *priv = netdev_priv(ndev); |
4467 |
++ u32 status = priv->read_reg(priv, XCAN_SR_OFFSET); |
4468 |
++ |
4469 |
++ if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) |
4470 |
++ return CAN_STATE_ERROR_PASSIVE; |
4471 |
++ else if (status & XCAN_SR_ERRWRN_MASK) |
4472 |
++ return CAN_STATE_ERROR_WARNING; |
4473 |
++ else |
4474 |
++ return CAN_STATE_ERROR_ACTIVE; |
4475 |
++} |
4476 |
++ |
4477 |
++/** |
4478 |
++ * xcan_set_error_state - Set new CAN error state |
4479 |
++ * @ndev: Pointer to net_device structure |
4480 |
++ * @new_state: The new CAN state to be set |
4481 |
++ * @cf: Error frame to be populated or NULL |
4482 |
++ * |
4483 |
++ * Set new CAN error state for the device, updating statistics and |
4484 |
++ * populating the error frame if given. |
4485 |
++ */ |
4486 |
++static void xcan_set_error_state(struct net_device *ndev, |
4487 |
++ enum can_state new_state, |
4488 |
++ struct can_frame *cf) |
4489 |
++{ |
4490 |
++ struct xcan_priv *priv = netdev_priv(ndev); |
4491 |
++ u32 ecr = priv->read_reg(priv, XCAN_ECR_OFFSET); |
4492 |
++ u32 txerr = ecr & XCAN_ECR_TEC_MASK; |
4493 |
++ u32 rxerr = (ecr & XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT; |
4494 |
++ |
4495 |
++ priv->can.state = new_state; |
4496 |
++ |
4497 |
++ if (cf) { |
4498 |
++ cf->can_id |= CAN_ERR_CRTL; |
4499 |
++ cf->data[6] = txerr; |
4500 |
++ cf->data[7] = rxerr; |
4501 |
++ } |
4502 |
++ |
4503 |
++ switch (new_state) { |
4504 |
++ case CAN_STATE_ERROR_PASSIVE: |
4505 |
++ priv->can.can_stats.error_passive++; |
4506 |
++ if (cf) |
4507 |
++ cf->data[1] = (rxerr > 127) ? |
4508 |
++ CAN_ERR_CRTL_RX_PASSIVE : |
4509 |
++ CAN_ERR_CRTL_TX_PASSIVE; |
4510 |
++ break; |
4511 |
++ case CAN_STATE_ERROR_WARNING: |
4512 |
++ priv->can.can_stats.error_warning++; |
4513 |
++ if (cf) |
4514 |
++ cf->data[1] |= (txerr > rxerr) ? |
4515 |
++ CAN_ERR_CRTL_TX_WARNING : |
4516 |
++ CAN_ERR_CRTL_RX_WARNING; |
4517 |
++ break; |
4518 |
++ case CAN_STATE_ERROR_ACTIVE: |
4519 |
++ if (cf) |
4520 |
++ cf->data[1] |= CAN_ERR_CRTL_ACTIVE; |
4521 |
++ break; |
4522 |
++ default: |
4523 |
++ /* non-ERROR states are handled elsewhere */ |
4524 |
++ WARN_ON(1); |
4525 |
++ break; |
4526 |
++ } |
4527 |
++} |
4528 |
++ |
4529 |
++/** |
4530 |
++ * xcan_update_error_state_after_rxtx - Update CAN error state after RX/TX |
4531 |
++ * @ndev: Pointer to net_device structure |
4532 |
++ * |
4533 |
++ * If the device is in a ERROR-WARNING or ERROR-PASSIVE state, check if |
4534 |
++ * the performed RX/TX has caused it to drop to a lesser state and set |
4535 |
++ * the interface state accordingly. |
4536 |
++ */ |
4537 |
++static void xcan_update_error_state_after_rxtx(struct net_device *ndev) |
4538 |
++{ |
4539 |
++ struct xcan_priv *priv = netdev_priv(ndev); |
4540 |
++ enum can_state old_state = priv->can.state; |
4541 |
++ enum can_state new_state; |
4542 |
++ |
4543 |
++ /* changing error state due to successful frame RX/TX can only |
4544 |
++ * occur from these states |
4545 |
++ */ |
4546 |
++ if (old_state != CAN_STATE_ERROR_WARNING && |
4547 |
++ old_state != CAN_STATE_ERROR_PASSIVE) |
4548 |
++ return; |
4549 |
++ |
4550 |
++ new_state = xcan_current_error_state(ndev); |
4551 |
++ |
4552 |
++ if (new_state != old_state) { |
4553 |
++ struct sk_buff *skb; |
4554 |
++ struct can_frame *cf; |
4555 |
++ |
4556 |
++ skb = alloc_can_err_skb(ndev, &cf); |
4557 |
++ |
4558 |
++ xcan_set_error_state(ndev, new_state, skb ? cf : NULL); |
4559 |
++ |
4560 |
++ if (skb) { |
4561 |
++ struct net_device_stats *stats = &ndev->stats; |
4562 |
++ |
4563 |
++ stats->rx_packets++; |
4564 |
++ stats->rx_bytes += cf->can_dlc; |
4565 |
++ netif_rx(skb); |
4566 |
++ } |
4567 |
++ } |
4568 |
++} |
4569 |
++ |
4570 |
+ /** |
4571 |
+ * xcan_err_interrupt - error frame Isr |
4572 |
+ * @ndev: net_device pointer |
4573 |
+@@ -543,16 +684,12 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) |
4574 |
+ struct net_device_stats *stats = &ndev->stats; |
4575 |
+ struct can_frame *cf; |
4576 |
+ struct sk_buff *skb; |
4577 |
+- u32 err_status, status, txerr = 0, rxerr = 0; |
4578 |
++ u32 err_status; |
4579 |
+ |
4580 |
+ skb = alloc_can_err_skb(ndev, &cf); |
4581 |
+ |
4582 |
+ err_status = priv->read_reg(priv, XCAN_ESR_OFFSET); |
4583 |
+ priv->write_reg(priv, XCAN_ESR_OFFSET, err_status); |
4584 |
+- txerr = priv->read_reg(priv, XCAN_ECR_OFFSET) & XCAN_ECR_TEC_MASK; |
4585 |
+- rxerr = ((priv->read_reg(priv, XCAN_ECR_OFFSET) & |
4586 |
+- XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT); |
4587 |
+- status = priv->read_reg(priv, XCAN_SR_OFFSET); |
4588 |
+ |
4589 |
+ if (isr & XCAN_IXR_BSOFF_MASK) { |
4590 |
+ priv->can.state = CAN_STATE_BUS_OFF; |
4591 |
+@@ -562,28 +699,10 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) |
4592 |
+ can_bus_off(ndev); |
4593 |
+ if (skb) |
4594 |
+ cf->can_id |= CAN_ERR_BUSOFF; |
4595 |
+- } else if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) { |
4596 |
+- priv->can.state = CAN_STATE_ERROR_PASSIVE; |
4597 |
+- priv->can.can_stats.error_passive++; |
4598 |
+- if (skb) { |
4599 |
+- cf->can_id |= CAN_ERR_CRTL; |
4600 |
+- cf->data[1] = (rxerr > 127) ? |
4601 |
+- CAN_ERR_CRTL_RX_PASSIVE : |
4602 |
+- CAN_ERR_CRTL_TX_PASSIVE; |
4603 |
+- cf->data[6] = txerr; |
4604 |
+- cf->data[7] = rxerr; |
4605 |
+- } |
4606 |
+- } else if (status & XCAN_SR_ERRWRN_MASK) { |
4607 |
+- priv->can.state = CAN_STATE_ERROR_WARNING; |
4608 |
+- priv->can.can_stats.error_warning++; |
4609 |
+- if (skb) { |
4610 |
+- cf->can_id |= CAN_ERR_CRTL; |
4611 |
+- cf->data[1] |= (txerr > rxerr) ? |
4612 |
+- CAN_ERR_CRTL_TX_WARNING : |
4613 |
+- CAN_ERR_CRTL_RX_WARNING; |
4614 |
+- cf->data[6] = txerr; |
4615 |
+- cf->data[7] = rxerr; |
4616 |
+- } |
4617 |
++ } else { |
4618 |
++ enum can_state new_state = xcan_current_error_state(ndev); |
4619 |
++ |
4620 |
++ xcan_set_error_state(ndev, new_state, skb ? cf : NULL); |
4621 |
+ } |
4622 |
+ |
4623 |
+ /* Check for Arbitration lost interrupt */ |
4624 |
+@@ -599,7 +718,6 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) |
4625 |
+ if (isr & XCAN_IXR_RXOFLW_MASK) { |
4626 |
+ stats->rx_over_errors++; |
4627 |
+ stats->rx_errors++; |
4628 |
+- priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK); |
4629 |
+ if (skb) { |
4630 |
+ cf->can_id |= CAN_ERR_CRTL; |
4631 |
+ cf->data[1] |= CAN_ERR_CRTL_RX_OVERFLOW; |
4632 |
+@@ -708,26 +826,20 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota) |
4633 |
+ |
4634 |
+ isr = priv->read_reg(priv, XCAN_ISR_OFFSET); |
4635 |
+ while ((isr & XCAN_IXR_RXNEMP_MASK) && (work_done < quota)) { |
4636 |
+- if (isr & XCAN_IXR_RXOK_MASK) { |
4637 |
+- priv->write_reg(priv, XCAN_ICR_OFFSET, |
4638 |
+- XCAN_IXR_RXOK_MASK); |
4639 |
+- work_done += xcan_rx(ndev); |
4640 |
+- } else { |
4641 |
+- priv->write_reg(priv, XCAN_ICR_OFFSET, |
4642 |
+- XCAN_IXR_RXNEMP_MASK); |
4643 |
+- break; |
4644 |
+- } |
4645 |
++ work_done += xcan_rx(ndev); |
4646 |
+ priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_RXNEMP_MASK); |
4647 |
+ isr = priv->read_reg(priv, XCAN_ISR_OFFSET); |
4648 |
+ } |
4649 |
+ |
4650 |
+- if (work_done) |
4651 |
++ if (work_done) { |
4652 |
+ can_led_event(ndev, CAN_LED_EVENT_RX); |
4653 |
++ xcan_update_error_state_after_rxtx(ndev); |
4654 |
++ } |
4655 |
+ |
4656 |
+ if (work_done < quota) { |
4657 |
+ napi_complete(napi); |
4658 |
+ ier = priv->read_reg(priv, XCAN_IER_OFFSET); |
4659 |
+- ier |= (XCAN_IXR_RXOK_MASK | XCAN_IXR_RXNEMP_MASK); |
4660 |
++ ier |= XCAN_IXR_RXNEMP_MASK; |
4661 |
+ priv->write_reg(priv, XCAN_IER_OFFSET, ier); |
4662 |
+ } |
4663 |
+ return work_done; |
4664 |
+@@ -742,18 +854,71 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr) |
4665 |
+ { |
4666 |
+ struct xcan_priv *priv = netdev_priv(ndev); |
4667 |
+ struct net_device_stats *stats = &ndev->stats; |
4668 |
++ unsigned int frames_in_fifo; |
4669 |
++ int frames_sent = 1; /* TXOK => at least 1 frame was sent */ |
4670 |
++ unsigned long flags; |
4671 |
++ int retries = 0; |
4672 |
++ |
4673 |
++ /* Synchronize with xmit as we need to know the exact number |
4674 |
++ * of frames in the FIFO to stay in sync due to the TXFEMP |
4675 |
++ * handling. |
4676 |
++ * This also prevents a race between netif_wake_queue() and |
4677 |
++ * netif_stop_queue(). |
4678 |
++ */ |
4679 |
++ spin_lock_irqsave(&priv->tx_lock, flags); |
4680 |
+ |
4681 |
+- while ((priv->tx_head - priv->tx_tail > 0) && |
4682 |
+- (isr & XCAN_IXR_TXOK_MASK)) { |
4683 |
++ frames_in_fifo = priv->tx_head - priv->tx_tail; |
4684 |
++ |
4685 |
++ if (WARN_ON_ONCE(frames_in_fifo == 0)) { |
4686 |
++ /* clear TXOK anyway to avoid getting back here */ |
4687 |
+ priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); |
4688 |
++ spin_unlock_irqrestore(&priv->tx_lock, flags); |
4689 |
++ return; |
4690 |
++ } |
4691 |
++ |
4692 |
++ /* Check if 2 frames were sent (TXOK only means that at least 1 |
4693 |
++ * frame was sent). |
4694 |
++ */ |
4695 |
++ if (frames_in_fifo > 1) { |
4696 |
++ WARN_ON(frames_in_fifo > priv->tx_max); |
4697 |
++ |
4698 |
++ /* Synchronize TXOK and isr so that after the loop: |
4699 |
++ * (1) isr variable is up-to-date at least up to TXOK clear |
4700 |
++ * time. This avoids us clearing a TXOK of a second frame |
4701 |
++ * but not noticing that the FIFO is now empty and thus |
4702 |
++ * marking only a single frame as sent. |
4703 |
++ * (2) No TXOK is left. Having one could mean leaving a |
4704 |
++ * stray TXOK as we might process the associated frame |
4705 |
++ * via TXFEMP handling as we read TXFEMP *after* TXOK |
4706 |
++ * clear to satisfy (1). |
4707 |
++ */ |
4708 |
++ while ((isr & XCAN_IXR_TXOK_MASK) && !WARN_ON(++retries == 100)) { |
4709 |
++ priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); |
4710 |
++ isr = priv->read_reg(priv, XCAN_ISR_OFFSET); |
4711 |
++ } |
4712 |
++ |
4713 |
++ if (isr & XCAN_IXR_TXFEMP_MASK) { |
4714 |
++ /* nothing in FIFO anymore */ |
4715 |
++ frames_sent = frames_in_fifo; |
4716 |
++ } |
4717 |
++ } else { |
4718 |
++ /* single frame in fifo, just clear TXOK */ |
4719 |
++ priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); |
4720 |
++ } |
4721 |
++ |
4722 |
++ while (frames_sent--) { |
4723 |
+ can_get_echo_skb(ndev, priv->tx_tail % |
4724 |
+ priv->tx_max); |
4725 |
+ priv->tx_tail++; |
4726 |
+ stats->tx_packets++; |
4727 |
+- isr = priv->read_reg(priv, XCAN_ISR_OFFSET); |
4728 |
+ } |
4729 |
+- can_led_event(ndev, CAN_LED_EVENT_TX); |
4730 |
++ |
4731 |
+ netif_wake_queue(ndev); |
4732 |
++ |
4733 |
++ spin_unlock_irqrestore(&priv->tx_lock, flags); |
4734 |
++ |
4735 |
++ can_led_event(ndev, CAN_LED_EVENT_TX); |
4736 |
++ xcan_update_error_state_after_rxtx(ndev); |
4737 |
+ } |
4738 |
+ |
4739 |
+ /** |
4740 |
+@@ -772,6 +937,7 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) |
4741 |
+ struct net_device *ndev = (struct net_device *)dev_id; |
4742 |
+ struct xcan_priv *priv = netdev_priv(ndev); |
4743 |
+ u32 isr, ier; |
4744 |
++ u32 isr_errors; |
4745 |
+ |
4746 |
+ /* Get the interrupt status from Xilinx CAN */ |
4747 |
+ isr = priv->read_reg(priv, XCAN_ISR_OFFSET); |
4748 |
+@@ -790,18 +956,17 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) |
4749 |
+ xcan_tx_interrupt(ndev, isr); |
4750 |
+ |
4751 |
+ /* Check for the type of error interrupt and Processing it */ |
4752 |
+- if (isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK | |
4753 |
+- XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK)) { |
4754 |
+- priv->write_reg(priv, XCAN_ICR_OFFSET, (XCAN_IXR_ERROR_MASK | |
4755 |
+- XCAN_IXR_RXOFLW_MASK | XCAN_IXR_BSOFF_MASK | |
4756 |
+- XCAN_IXR_ARBLST_MASK)); |
4757 |
++ isr_errors = isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK | |
4758 |
++ XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK); |
4759 |
++ if (isr_errors) { |
4760 |
++ priv->write_reg(priv, XCAN_ICR_OFFSET, isr_errors); |
4761 |
+ xcan_err_interrupt(ndev, isr); |
4762 |
+ } |
4763 |
+ |
4764 |
+ /* Check for the type of receive interrupt and Processing it */ |
4765 |
+- if (isr & (XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK)) { |
4766 |
++ if (isr & XCAN_IXR_RXNEMP_MASK) { |
4767 |
+ ier = priv->read_reg(priv, XCAN_IER_OFFSET); |
4768 |
+- ier &= ~(XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK); |
4769 |
++ ier &= ~XCAN_IXR_RXNEMP_MASK; |
4770 |
+ priv->write_reg(priv, XCAN_IER_OFFSET, ier); |
4771 |
+ napi_schedule(&priv->napi); |
4772 |
+ } |
4773 |
+@@ -1030,6 +1195,18 @@ static int __maybe_unused xcan_resume(struct device *dev) |
4774 |
+ |
4775 |
+ static SIMPLE_DEV_PM_OPS(xcan_dev_pm_ops, xcan_suspend, xcan_resume); |
4776 |
+ |
4777 |
++static const struct xcan_devtype_data xcan_zynq_data = { |
4778 |
++ .caps = XCAN_CAP_WATERMARK, |
4779 |
++}; |
4780 |
++ |
4781 |
++/* Match table for OF platform binding */ |
4782 |
++static const struct of_device_id xcan_of_match[] = { |
4783 |
++ { .compatible = "xlnx,zynq-can-1.0", .data = &xcan_zynq_data }, |
4784 |
++ { .compatible = "xlnx,axi-can-1.00.a", }, |
4785 |
++ { /* end of list */ }, |
4786 |
++}; |
4787 |
++MODULE_DEVICE_TABLE(of, xcan_of_match); |
4788 |
++ |
4789 |
+ /** |
4790 |
+ * xcan_probe - Platform registration call |
4791 |
+ * @pdev: Handle to the platform device structure |
4792 |
+@@ -1044,8 +1221,10 @@ static int xcan_probe(struct platform_device *pdev) |
4793 |
+ struct resource *res; /* IO mem resources */ |
4794 |
+ struct net_device *ndev; |
4795 |
+ struct xcan_priv *priv; |
4796 |
++ const struct of_device_id *of_id; |
4797 |
++ int caps = 0; |
4798 |
+ void __iomem *addr; |
4799 |
+- int ret, rx_max, tx_max; |
4800 |
++ int ret, rx_max, tx_max, tx_fifo_depth; |
4801 |
+ |
4802 |
+ /* Get the virtual base address for the device */ |
4803 |
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
4804 |
+@@ -1055,7 +1234,8 @@ static int xcan_probe(struct platform_device *pdev) |
4805 |
+ goto err; |
4806 |
+ } |
4807 |
+ |
4808 |
+- ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", &tx_max); |
4809 |
++ ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", |
4810 |
++ &tx_fifo_depth); |
4811 |
+ if (ret < 0) |
4812 |
+ goto err; |
4813 |
+ |
4814 |
+@@ -1063,6 +1243,30 @@ static int xcan_probe(struct platform_device *pdev) |
4815 |
+ if (ret < 0) |
4816 |
+ goto err; |
4817 |
+ |
4818 |
++ of_id = of_match_device(xcan_of_match, &pdev->dev); |
4819 |
++ if (of_id) { |
4820 |
++ const struct xcan_devtype_data *devtype_data = of_id->data; |
4821 |
++ |
4822 |
++ if (devtype_data) |
4823 |
++ caps = devtype_data->caps; |
4824 |
++ } |
4825 |
++ |
4826 |
++ /* There is no way to directly figure out how many frames have been |
4827 |
++ * sent when the TXOK interrupt is processed. If watermark programming |
4828 |
++ * is supported, we can have 2 frames in the FIFO and use TXFEMP |
4829 |
++ * to determine if 1 or 2 frames have been sent. |
4830 |
++ * Theoretically we should be able to use TXFWMEMP to determine up |
4831 |
++ * to 3 frames, but it seems that after putting a second frame in the |
4832 |
++ * FIFO, with watermark at 2 frames, it can happen that TXFWMEMP (less |
4833 |
++ * than 2 frames in FIFO) is set anyway with no TXOK (a frame was |
4834 |
++ * sent), which is not a sensible state - possibly TXFWMEMP is not |
4835 |
++ * completely synchronized with the rest of the bits? |
4836 |
++ */ |
4837 |
++ if (caps & XCAN_CAP_WATERMARK) |
4838 |
++ tx_max = min(tx_fifo_depth, 2); |
4839 |
++ else |
4840 |
++ tx_max = 1; |
4841 |
++ |
4842 |
+ /* Create a CAN device instance */ |
4843 |
+ ndev = alloc_candev(sizeof(struct xcan_priv), tx_max); |
4844 |
+ if (!ndev) |
4845 |
+@@ -1077,6 +1281,7 @@ static int xcan_probe(struct platform_device *pdev) |
4846 |
+ CAN_CTRLMODE_BERR_REPORTING; |
4847 |
+ priv->reg_base = addr; |
4848 |
+ priv->tx_max = tx_max; |
4849 |
++ spin_lock_init(&priv->tx_lock); |
4850 |
+ |
4851 |
+ /* Get IRQ for the device */ |
4852 |
+ ndev->irq = platform_get_irq(pdev, 0); |
4853 |
+@@ -1144,9 +1349,9 @@ static int xcan_probe(struct platform_device *pdev) |
4854 |
+ devm_can_led_init(ndev); |
4855 |
+ clk_disable_unprepare(priv->bus_clk); |
4856 |
+ clk_disable_unprepare(priv->can_clk); |
4857 |
+- netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth:%d\n", |
4858 |
++ netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth: actual %d, using %d\n", |
4859 |
+ priv->reg_base, ndev->irq, priv->can.clock.freq, |
4860 |
+- priv->tx_max); |
4861 |
++ tx_fifo_depth, priv->tx_max); |
4862 |
+ |
4863 |
+ return 0; |
4864 |
+ |
4865 |
+@@ -1182,14 +1387,6 @@ static int xcan_remove(struct platform_device *pdev) |
4866 |
+ return 0; |
4867 |
+ } |
4868 |
+ |
4869 |
+-/* Match table for OF platform binding */ |
4870 |
+-static const struct of_device_id xcan_of_match[] = { |
4871 |
+- { .compatible = "xlnx,zynq-can-1.0", }, |
4872 |
+- { .compatible = "xlnx,axi-can-1.00.a", }, |
4873 |
+- { /* end of list */ }, |
4874 |
+-}; |
4875 |
+-MODULE_DEVICE_TABLE(of, xcan_of_match); |
4876 |
+- |
4877 |
+ static struct platform_driver xcan_driver = { |
4878 |
+ .probe = xcan_probe, |
4879 |
+ .remove = xcan_remove, |
4880 |
+diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c |
4881 |
+index e3080fbd9d00..7911dc3da98e 100644 |
4882 |
+--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c |
4883 |
++++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c |
4884 |
+@@ -2891,7 +2891,7 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, |
4885 |
+ u32 srqn = qp_get_srqn(qpc) & 0xffffff; |
4886 |
+ int use_srq = (qp_get_srqn(qpc) >> 24) & 1; |
4887 |
+ struct res_srq *srq; |
4888 |
+- int local_qpn = be32_to_cpu(qpc->local_qpn) & 0xffffff; |
4889 |
++ int local_qpn = vhcr->in_modifier & 0xffffff; |
4890 |
+ |
4891 |
+ err = adjust_qp_sched_queue(dev, slave, qpc, inbox); |
4892 |
+ if (err) |
4893 |
+diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c |
4894 |
+index 7ed30d0b5273..a501f3ba6a3f 100644 |
4895 |
+--- a/drivers/usb/class/cdc-acm.c |
4896 |
++++ b/drivers/usb/class/cdc-acm.c |
4897 |
+@@ -1771,6 +1771,9 @@ static const struct usb_device_id acm_ids[] = { |
4898 |
+ { USB_DEVICE(0x09d8, 0x0320), /* Elatec GmbH TWN3 */ |
4899 |
+ .driver_info = NO_UNION_NORMAL, /* has misplaced union descriptor */ |
4900 |
+ }, |
4901 |
++ { USB_DEVICE(0x0ca6, 0xa050), /* Castles VEGA3000 */ |
4902 |
++ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ |
4903 |
++ }, |
4904 |
+ |
4905 |
+ { USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */ |
4906 |
+ .driver_info = CLEAR_HALT_CONDITIONS, |
4907 |
+diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c |
4908 |
+index 4d86da0df131..93756664592a 100644 |
4909 |
+--- a/drivers/usb/core/hub.c |
4910 |
++++ b/drivers/usb/core/hub.c |
4911 |
+@@ -1123,10 +1123,14 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) |
4912 |
+ |
4913 |
+ if (!udev || udev->state == USB_STATE_NOTATTACHED) { |
4914 |
+ /* Tell hub_wq to disconnect the device or |
4915 |
+- * check for a new connection |
4916 |
++ * check for a new connection or over current condition. |
4917 |
++ * Based on USB2.0 Spec Section 11.12.5, |
4918 |
++ * C_PORT_OVER_CURRENT could be set while |
4919 |
++ * PORT_OVER_CURRENT is not. So check for any of them. |
4920 |
+ */ |
4921 |
+ if (udev || (portstatus & USB_PORT_STAT_CONNECTION) || |
4922 |
+- (portstatus & USB_PORT_STAT_OVERCURRENT)) |
4923 |
++ (portstatus & USB_PORT_STAT_OVERCURRENT) || |
4924 |
++ (portchange & USB_PORT_STAT_C_OVERCURRENT)) |
4925 |
+ set_bit(port1, hub->change_bits); |
4926 |
+ |
4927 |
+ } else if (portstatus & USB_PORT_STAT_ENABLE) { |
4928 |
+diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c |
4929 |
+index 4191feb765b1..4800bb22cdd6 100644 |
4930 |
+--- a/drivers/usb/gadget/function/f_fs.c |
4931 |
++++ b/drivers/usb/gadget/function/f_fs.c |
4932 |
+@@ -3037,7 +3037,7 @@ static int ffs_func_setup(struct usb_function *f, |
4933 |
+ __ffs_event_add(ffs, FUNCTIONFS_SETUP); |
4934 |
+ spin_unlock_irqrestore(&ffs->ev.waitq.lock, flags); |
4935 |
+ |
4936 |
+- return USB_GADGET_DELAYED_STATUS; |
4937 |
++ return creq->wLength == 0 ? USB_GADGET_DELAYED_STATUS : 0; |
4938 |
+ } |
4939 |
+ |
4940 |
+ static void ffs_func_suspend(struct usb_function *f) |
4941 |
+diff --git a/include/net/tcp.h b/include/net/tcp.h |
4942 |
+index a3696b778757..65babd8a682d 100644 |
4943 |
+--- a/include/net/tcp.h |
4944 |
++++ b/include/net/tcp.h |
4945 |
+@@ -376,6 +376,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, |
4946 |
+ struct pipe_inode_info *pipe, size_t len, |
4947 |
+ unsigned int flags); |
4948 |
+ |
4949 |
++void tcp_enter_quickack_mode(struct sock *sk); |
4950 |
+ static inline void tcp_dec_quickack_mode(struct sock *sk, |
4951 |
+ const unsigned int pkts) |
4952 |
+ { |
4953 |
+@@ -559,6 +560,7 @@ void tcp_send_fin(struct sock *sk); |
4954 |
+ void tcp_send_active_reset(struct sock *sk, gfp_t priority); |
4955 |
+ int tcp_send_synack(struct sock *); |
4956 |
+ void tcp_push_one(struct sock *, unsigned int mss_now); |
4957 |
++void __tcp_send_ack(struct sock *sk, u32 rcv_nxt); |
4958 |
+ void tcp_send_ack(struct sock *sk); |
4959 |
+ void tcp_send_delayed_ack(struct sock *sk); |
4960 |
+ void tcp_send_loss_probe(struct sock *sk); |
4961 |
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c |
4962 |
+index 2017ffa5197a..96c9c0f0905a 100644 |
4963 |
+--- a/net/core/rtnetlink.c |
4964 |
++++ b/net/core/rtnetlink.c |
4965 |
+@@ -2087,9 +2087,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) |
4966 |
+ return err; |
4967 |
+ } |
4968 |
+ |
4969 |
+- dev->rtnl_link_state = RTNL_LINK_INITIALIZED; |
4970 |
+- |
4971 |
+- __dev_notify_flags(dev, old_flags, ~0U); |
4972 |
++ if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) { |
4973 |
++ __dev_notify_flags(dev, old_flags, 0U); |
4974 |
++ } else { |
4975 |
++ dev->rtnl_link_state = RTNL_LINK_INITIALIZED; |
4976 |
++ __dev_notify_flags(dev, old_flags, ~0U); |
4977 |
++ } |
4978 |
+ return 0; |
4979 |
+ } |
4980 |
+ EXPORT_SYMBOL(rtnl_configure_link); |
4981 |
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c |
4982 |
+index 10286432f684..c11bb6d2d00a 100644 |
4983 |
+--- a/net/ipv4/ip_output.c |
4984 |
++++ b/net/ipv4/ip_output.c |
4985 |
+@@ -480,6 +480,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) |
4986 |
+ to->dev = from->dev; |
4987 |
+ to->mark = from->mark; |
4988 |
+ |
4989 |
++ skb_copy_hash(to, from); |
4990 |
++ |
4991 |
+ /* Copy the flags to each fragment. */ |
4992 |
+ IPCB(to)->flags = IPCB(from)->flags; |
4993 |
+ |
4994 |
+diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c |
4995 |
+index ce9a7fbb7c5f..88426a6a7a85 100644 |
4996 |
+--- a/net/ipv4/ip_sockglue.c |
4997 |
++++ b/net/ipv4/ip_sockglue.c |
4998 |
+@@ -135,15 +135,18 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) |
4999 |
+ { |
5000 |
+ struct sockaddr_in sin; |
5001 |
+ const struct iphdr *iph = ip_hdr(skb); |
5002 |
+- __be16 *ports = (__be16 *)skb_transport_header(skb); |
5003 |
++ __be16 *ports; |
5004 |
++ int end; |
5005 |
+ |
5006 |
+- if (skb_transport_offset(skb) + 4 > skb->len) |
5007 |
++ end = skb_transport_offset(skb) + 4; |
5008 |
++ if (end > 0 && !pskb_may_pull(skb, end)) |
5009 |
+ return; |
5010 |
+ |
5011 |
+ /* All current transport protocols have the port numbers in the |
5012 |
+ * first four bytes of the transport header and this function is |
5013 |
+ * written with this assumption in mind. |
5014 |
+ */ |
5015 |
++ ports = (__be16 *)skb_transport_header(skb); |
5016 |
+ |
5017 |
+ sin.sin_family = AF_INET; |
5018 |
+ sin.sin_addr.s_addr = iph->daddr; |
5019 |
+diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c |
5020 |
+index 55d7da1d2ce9..e63b764e55ea 100644 |
5021 |
+--- a/net/ipv4/tcp_dctcp.c |
5022 |
++++ b/net/ipv4/tcp_dctcp.c |
5023 |
+@@ -131,23 +131,14 @@ static void dctcp_ce_state_0_to_1(struct sock *sk) |
5024 |
+ struct dctcp *ca = inet_csk_ca(sk); |
5025 |
+ struct tcp_sock *tp = tcp_sk(sk); |
5026 |
+ |
5027 |
+- /* State has changed from CE=0 to CE=1 and delayed |
5028 |
+- * ACK has not sent yet. |
5029 |
+- */ |
5030 |
+- if (!ca->ce_state && ca->delayed_ack_reserved) { |
5031 |
+- u32 tmp_rcv_nxt; |
5032 |
+- |
5033 |
+- /* Save current rcv_nxt. */ |
5034 |
+- tmp_rcv_nxt = tp->rcv_nxt; |
5035 |
+- |
5036 |
+- /* Generate previous ack with CE=0. */ |
5037 |
+- tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; |
5038 |
+- tp->rcv_nxt = ca->prior_rcv_nxt; |
5039 |
+- |
5040 |
+- tcp_send_ack(sk); |
5041 |
+- |
5042 |
+- /* Recover current rcv_nxt. */ |
5043 |
+- tp->rcv_nxt = tmp_rcv_nxt; |
5044 |
++ if (!ca->ce_state) { |
5045 |
++ /* State has changed from CE=0 to CE=1, force an immediate |
5046 |
++ * ACK to reflect the new CE state. If an ACK was delayed, |
5047 |
++ * send that first to reflect the prior CE state. |
5048 |
++ */ |
5049 |
++ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) |
5050 |
++ __tcp_send_ack(sk, ca->prior_rcv_nxt); |
5051 |
++ tcp_enter_quickack_mode(sk); |
5052 |
+ } |
5053 |
+ |
5054 |
+ ca->prior_rcv_nxt = tp->rcv_nxt; |
5055 |
+@@ -161,23 +152,14 @@ static void dctcp_ce_state_1_to_0(struct sock *sk) |
5056 |
+ struct dctcp *ca = inet_csk_ca(sk); |
5057 |
+ struct tcp_sock *tp = tcp_sk(sk); |
5058 |
+ |
5059 |
+- /* State has changed from CE=1 to CE=0 and delayed |
5060 |
+- * ACK has not sent yet. |
5061 |
+- */ |
5062 |
+- if (ca->ce_state && ca->delayed_ack_reserved) { |
5063 |
+- u32 tmp_rcv_nxt; |
5064 |
+- |
5065 |
+- /* Save current rcv_nxt. */ |
5066 |
+- tmp_rcv_nxt = tp->rcv_nxt; |
5067 |
+- |
5068 |
+- /* Generate previous ack with CE=1. */ |
5069 |
+- tp->ecn_flags |= TCP_ECN_DEMAND_CWR; |
5070 |
+- tp->rcv_nxt = ca->prior_rcv_nxt; |
5071 |
+- |
5072 |
+- tcp_send_ack(sk); |
5073 |
+- |
5074 |
+- /* Recover current rcv_nxt. */ |
5075 |
+- tp->rcv_nxt = tmp_rcv_nxt; |
5076 |
++ if (ca->ce_state) { |
5077 |
++ /* State has changed from CE=1 to CE=0, force an immediate |
5078 |
++ * ACK to reflect the new CE state. If an ACK was delayed, |
5079 |
++ * send that first to reflect the prior CE state. |
5080 |
++ */ |
5081 |
++ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) |
5082 |
++ __tcp_send_ack(sk, ca->prior_rcv_nxt); |
5083 |
++ tcp_enter_quickack_mode(sk); |
5084 |
+ } |
5085 |
+ |
5086 |
+ ca->prior_rcv_nxt = tp->rcv_nxt; |
5087 |
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c |
5088 |
+index 4350ee058441..5c645069a09a 100644 |
5089 |
+--- a/net/ipv4/tcp_input.c |
5090 |
++++ b/net/ipv4/tcp_input.c |
5091 |
+@@ -187,13 +187,14 @@ static void tcp_incr_quickack(struct sock *sk) |
5092 |
+ icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); |
5093 |
+ } |
5094 |
+ |
5095 |
+-static void tcp_enter_quickack_mode(struct sock *sk) |
5096 |
++void tcp_enter_quickack_mode(struct sock *sk) |
5097 |
+ { |
5098 |
+ struct inet_connection_sock *icsk = inet_csk(sk); |
5099 |
+ tcp_incr_quickack(sk); |
5100 |
+ icsk->icsk_ack.pingpong = 0; |
5101 |
+ icsk->icsk_ack.ato = TCP_ATO_MIN; |
5102 |
+ } |
5103 |
++EXPORT_SYMBOL(tcp_enter_quickack_mode); |
5104 |
+ |
5105 |
+ /* Send ACKs quickly, if "quick" count is not exhausted |
5106 |
+ * and the session is not interactive. |
5107 |
+@@ -4788,6 +4789,7 @@ restart: |
5108 |
+ static void tcp_collapse_ofo_queue(struct sock *sk) |
5109 |
+ { |
5110 |
+ struct tcp_sock *tp = tcp_sk(sk); |
5111 |
++ u32 range_truesize, sum_tiny = 0; |
5112 |
+ struct sk_buff *skb = skb_peek(&tp->out_of_order_queue); |
5113 |
+ struct sk_buff *head; |
5114 |
+ u32 start, end; |
5115 |
+@@ -4797,6 +4799,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) |
5116 |
+ |
5117 |
+ start = TCP_SKB_CB(skb)->seq; |
5118 |
+ end = TCP_SKB_CB(skb)->end_seq; |
5119 |
++ range_truesize = skb->truesize; |
5120 |
+ head = skb; |
5121 |
+ |
5122 |
+ for (;;) { |
5123 |
+@@ -4811,14 +4814,24 @@ static void tcp_collapse_ofo_queue(struct sock *sk) |
5124 |
+ if (!skb || |
5125 |
+ after(TCP_SKB_CB(skb)->seq, end) || |
5126 |
+ before(TCP_SKB_CB(skb)->end_seq, start)) { |
5127 |
+- tcp_collapse(sk, &tp->out_of_order_queue, |
5128 |
+- head, skb, start, end); |
5129 |
++ /* Do not attempt collapsing tiny skbs */ |
5130 |
++ if (range_truesize != head->truesize || |
5131 |
++ end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) { |
5132 |
++ tcp_collapse(sk, &tp->out_of_order_queue, |
5133 |
++ head, skb, start, end); |
5134 |
++ } else { |
5135 |
++ sum_tiny += range_truesize; |
5136 |
++ if (sum_tiny > sk->sk_rcvbuf >> 3) |
5137 |
++ return; |
5138 |
++ } |
5139 |
++ |
5140 |
+ head = skb; |
5141 |
+ if (!skb) |
5142 |
+ break; |
5143 |
+ /* Start new segment */ |
5144 |
+ start = TCP_SKB_CB(skb)->seq; |
5145 |
+ end = TCP_SKB_CB(skb)->end_seq; |
5146 |
++ range_truesize = skb->truesize; |
5147 |
+ } else { |
5148 |
+ if (before(TCP_SKB_CB(skb)->seq, start)) |
5149 |
+ start = TCP_SKB_CB(skb)->seq; |
5150 |
+@@ -4874,6 +4887,9 @@ static int tcp_prune_queue(struct sock *sk) |
5151 |
+ else if (tcp_under_memory_pressure(sk)) |
5152 |
+ tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); |
5153 |
+ |
5154 |
++ if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) |
5155 |
++ return 0; |
5156 |
++ |
5157 |
+ tcp_collapse_ofo_queue(sk); |
5158 |
+ if (!skb_queue_empty(&sk->sk_receive_queue)) |
5159 |
+ tcp_collapse(sk, &sk->sk_receive_queue, |
5160 |
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c |
5161 |
+index 2854db094864..6fa749ce231f 100644 |
5162 |
+--- a/net/ipv4/tcp_output.c |
5163 |
++++ b/net/ipv4/tcp_output.c |
5164 |
+@@ -177,8 +177,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp, |
5165 |
+ } |
5166 |
+ |
5167 |
+ /* Account for an ACK we sent. */ |
5168 |
+-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) |
5169 |
++static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, |
5170 |
++ u32 rcv_nxt) |
5171 |
+ { |
5172 |
++ struct tcp_sock *tp = tcp_sk(sk); |
5173 |
++ |
5174 |
++ if (unlikely(rcv_nxt != tp->rcv_nxt)) |
5175 |
++ return; /* Special ACK sent by DCTCP to reflect ECN */ |
5176 |
+ tcp_dec_quickack_mode(sk, pkts); |
5177 |
+ inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); |
5178 |
+ } |
5179 |
+@@ -901,8 +906,8 @@ out: |
5180 |
+ * We are working here with either a clone of the original |
5181 |
+ * SKB, or a fresh unique copy made by the retransmit engine. |
5182 |
+ */ |
5183 |
+-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, |
5184 |
+- gfp_t gfp_mask) |
5185 |
++static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, |
5186 |
++ int clone_it, gfp_t gfp_mask, u32 rcv_nxt) |
5187 |
+ { |
5188 |
+ const struct inet_connection_sock *icsk = inet_csk(sk); |
5189 |
+ struct inet_sock *inet; |
5190 |
+@@ -962,7 +967,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, |
5191 |
+ th->source = inet->inet_sport; |
5192 |
+ th->dest = inet->inet_dport; |
5193 |
+ th->seq = htonl(tcb->seq); |
5194 |
+- th->ack_seq = htonl(tp->rcv_nxt); |
5195 |
++ th->ack_seq = htonl(rcv_nxt); |
5196 |
+ *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | |
5197 |
+ tcb->tcp_flags); |
5198 |
+ |
5199 |
+@@ -1005,7 +1010,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, |
5200 |
+ icsk->icsk_af_ops->send_check(sk, skb); |
5201 |
+ |
5202 |
+ if (likely(tcb->tcp_flags & TCPHDR_ACK)) |
5203 |
+- tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); |
5204 |
++ tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt); |
5205 |
+ |
5206 |
+ if (skb->len != tcp_header_size) |
5207 |
+ tcp_event_data_sent(tp, sk); |
5208 |
+@@ -1036,6 +1041,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, |
5209 |
+ return net_xmit_eval(err); |
5210 |
+ } |
5211 |
+ |
5212 |
++static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, |
5213 |
++ gfp_t gfp_mask) |
5214 |
++{ |
5215 |
++ return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask, |
5216 |
++ tcp_sk(sk)->rcv_nxt); |
5217 |
++} |
5218 |
++ |
5219 |
+ /* This routine just queues the buffer for sending. |
5220 |
+ * |
5221 |
+ * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, |
5222 |
+@@ -3354,7 +3366,7 @@ void tcp_send_delayed_ack(struct sock *sk) |
5223 |
+ } |
5224 |
+ |
5225 |
+ /* This routine sends an ack and also updates the window. */ |
5226 |
+-void tcp_send_ack(struct sock *sk) |
5227 |
++void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) |
5228 |
+ { |
5229 |
+ struct sk_buff *buff; |
5230 |
+ |
5231 |
+@@ -3391,9 +3403,14 @@ void tcp_send_ack(struct sock *sk) |
5232 |
+ |
5233 |
+ /* Send it off, this clears delayed acks for us. */ |
5234 |
+ skb_mstamp_get(&buff->skb_mstamp); |
5235 |
+- tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); |
5236 |
++ __tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC), rcv_nxt); |
5237 |
++} |
5238 |
++EXPORT_SYMBOL_GPL(__tcp_send_ack); |
5239 |
++ |
5240 |
++void tcp_send_ack(struct sock *sk) |
5241 |
++{ |
5242 |
++ __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt); |
5243 |
+ } |
5244 |
+-EXPORT_SYMBOL_GPL(tcp_send_ack); |
5245 |
+ |
5246 |
+ /* This routine sends a packet with an out of date sequence |
5247 |
+ * number. It assumes the other end will try to ack it. |
5248 |
+diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c |
5249 |
+index cae37bfd12ab..9f6e57ded338 100644 |
5250 |
+--- a/net/ipv6/datagram.c |
5251 |
++++ b/net/ipv6/datagram.c |
5252 |
+@@ -657,13 +657,16 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, |
5253 |
+ } |
5254 |
+ if (np->rxopt.bits.rxorigdstaddr) { |
5255 |
+ struct sockaddr_in6 sin6; |
5256 |
+- __be16 *ports = (__be16 *) skb_transport_header(skb); |
5257 |
++ __be16 *ports; |
5258 |
++ int end; |
5259 |
+ |
5260 |
+- if (skb_transport_offset(skb) + 4 <= skb->len) { |
5261 |
++ end = skb_transport_offset(skb) + 4; |
5262 |
++ if (end <= 0 || pskb_may_pull(skb, end)) { |
5263 |
+ /* All current transport protocols have the port numbers in the |
5264 |
+ * first four bytes of the transport header and this function is |
5265 |
+ * written with this assumption in mind. |
5266 |
+ */ |
5267 |
++ ports = (__be16 *)skb_transport_header(skb); |
5268 |
+ |
5269 |
+ sin6.sin6_family = AF_INET6; |
5270 |
+ sin6.sin6_addr = ipv6_hdr(skb)->daddr; |
5271 |
+diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c |
5272 |
+index 74786783834b..0feede45bd28 100644 |
5273 |
+--- a/net/ipv6/ip6_output.c |
5274 |
++++ b/net/ipv6/ip6_output.c |
5275 |
+@@ -559,6 +559,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) |
5276 |
+ to->dev = from->dev; |
5277 |
+ to->mark = from->mark; |
5278 |
+ |
5279 |
++ skb_copy_hash(to, from); |
5280 |
++ |
5281 |
+ #ifdef CONFIG_NET_SCHED |
5282 |
+ to->tc_index = from->tc_index; |
5283 |
+ #endif |