1 |
commit: 8e2a2ed3d7a6fc577c4e7954c2e9968ed574aa46 |
2 |
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
3 |
AuthorDate: Thu Feb 8 00:40:27 2018 +0000 |
4 |
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
5 |
CommitDate: Thu Feb 8 00:40:56 2018 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=8e2a2ed3 |
7 |
|
8 |
Linux patch 4.14.18 |
9 |
|
10 |
1017_linux-4.14.18.patch | 3790 ++++++++++++++++++++++++++++++++++++++++++++++ |
11 |
1 file changed, 3790 insertions(+) |
12 |
|
13 |
diff --git a/1017_linux-4.14.18.patch b/1017_linux-4.14.18.patch |
14 |
new file mode 100644 |
15 |
index 0000000..07fbf45 |
16 |
--- /dev/null |
17 |
+++ b/1017_linux-4.14.18.patch |
18 |
@@ -0,0 +1,3790 @@ |
19 |
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt |
20 |
+index 8122b5f98ea1..c76afdcafbef 100644 |
21 |
+--- a/Documentation/admin-guide/kernel-parameters.txt |
22 |
++++ b/Documentation/admin-guide/kernel-parameters.txt |
23 |
+@@ -2718,8 +2718,6 @@ |
24 |
+ norandmaps Don't use address space randomization. Equivalent to |
25 |
+ echo 0 > /proc/sys/kernel/randomize_va_space |
26 |
+ |
27 |
+- noreplace-paravirt [X86,IA-64,PV_OPS] Don't patch paravirt_ops |
28 |
+- |
29 |
+ noreplace-smp [X86-32,SMP] Don't replace SMP instructions |
30 |
+ with UP alternatives |
31 |
+ |
32 |
+diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt |
33 |
+new file mode 100644 |
34 |
+index 000000000000..e9e6cbae2841 |
35 |
+--- /dev/null |
36 |
++++ b/Documentation/speculation.txt |
37 |
+@@ -0,0 +1,90 @@ |
38 |
++This document explains potential effects of speculation, and how undesirable |
39 |
++effects can be mitigated portably using common APIs. |
40 |
++ |
41 |
++=========== |
42 |
++Speculation |
43 |
++=========== |
44 |
++ |
45 |
++To improve performance and minimize average latencies, many contemporary CPUs |
46 |
++employ speculative execution techniques such as branch prediction, performing |
47 |
++work which may be discarded at a later stage. |
48 |
++ |
49 |
++Typically speculative execution cannot be observed from architectural state, |
50 |
++such as the contents of registers. However, in some cases it is possible to |
51 |
++observe its impact on microarchitectural state, such as the presence or |
52 |
++absence of data in caches. Such state may form side-channels which can be |
53 |
++observed to extract secret information. |
54 |
++ |
55 |
++For example, in the presence of branch prediction, it is possible for bounds |
56 |
++checks to be ignored by code which is speculatively executed. Consider the |
57 |
++following code: |
58 |
++ |
59 |
++ int load_array(int *array, unsigned int index) |
60 |
++ { |
61 |
++ if (index >= MAX_ARRAY_ELEMS) |
62 |
++ return 0; |
63 |
++ else |
64 |
++ return array[index]; |
65 |
++ } |
66 |
++ |
67 |
++Which, on arm64, may be compiled to an assembly sequence such as: |
68 |
++ |
69 |
++ CMP <index>, #MAX_ARRAY_ELEMS |
70 |
++ B.LT less |
71 |
++ MOV <returnval>, #0 |
72 |
++ RET |
73 |
++ less: |
74 |
++ LDR <returnval>, [<array>, <index>] |
75 |
++ RET |
76 |
++ |
77 |
++It is possible that a CPU mis-predicts the conditional branch, and |
78 |
++speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This |
79 |
++value will subsequently be discarded, but the speculated load may affect |
80 |
++microarchitectural state which can be subsequently measured. |
81 |
++ |
82 |
++More complex sequences involving multiple dependent memory accesses may |
83 |
++result in sensitive information being leaked. Consider the following |
84 |
++code, building on the prior example: |
85 |
++ |
86 |
++ int load_dependent_arrays(int *arr1, int *arr2, int index) |
87 |
++ { |
88 |
++ int val1, val2, |
89 |
++ |
90 |
++ val1 = load_array(arr1, index); |
91 |
++ val2 = load_array(arr2, val1); |
92 |
++ |
93 |
++ return val2; |
94 |
++ } |
95 |
++ |
96 |
++Under speculation, the first call to load_array() may return the value |
97 |
++of an out-of-bounds address, while the second call will influence |
98 |
++microarchitectural state dependent on this value. This may provide an |
99 |
++arbitrary read primitive. |
100 |
++ |
101 |
++==================================== |
102 |
++Mitigating speculation side-channels |
103 |
++==================================== |
104 |
++ |
105 |
++The kernel provides a generic API to ensure that bounds checks are |
106 |
++respected even under speculation. Architectures which are affected by |
107 |
++speculation-based side-channels are expected to implement these |
108 |
++primitives. |
109 |
++ |
110 |
++The array_index_nospec() helper in <linux/nospec.h> can be used to |
111 |
++prevent information from being leaked via side-channels. |
112 |
++ |
113 |
++A call to array_index_nospec(index, size) returns a sanitized index |
114 |
++value that is bounded to [0, size) even under cpu speculation |
115 |
++conditions. |
116 |
++ |
117 |
++This can be used to protect the earlier load_array() example: |
118 |
++ |
119 |
++ int load_array(int *array, unsigned int index) |
120 |
++ { |
121 |
++ if (index >= MAX_ARRAY_ELEMS) |
122 |
++ return 0; |
123 |
++ else { |
124 |
++ index = array_index_nospec(index, MAX_ARRAY_ELEMS); |
125 |
++ return array[index]; |
126 |
++ } |
127 |
++ } |
128 |
+diff --git a/Makefile b/Makefile |
129 |
+index 7ed993896dd5..a69e5da9ed86 100644 |
130 |
+--- a/Makefile |
131 |
++++ b/Makefile |
132 |
+@@ -1,7 +1,7 @@ |
133 |
+ # SPDX-License-Identifier: GPL-2.0 |
134 |
+ VERSION = 4 |
135 |
+ PATCHLEVEL = 14 |
136 |
+-SUBLEVEL = 17 |
137 |
++SUBLEVEL = 18 |
138 |
+ EXTRAVERSION = |
139 |
+ NAME = Petit Gorille |
140 |
+ |
141 |
+diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig |
142 |
+index cb782ac1c35d..fe418226df7f 100644 |
143 |
+--- a/arch/powerpc/Kconfig |
144 |
++++ b/arch/powerpc/Kconfig |
145 |
+@@ -164,6 +164,7 @@ config PPC |
146 |
+ select GENERIC_CLOCKEVENTS_BROADCAST if SMP |
147 |
+ select GENERIC_CMOS_UPDATE |
148 |
+ select GENERIC_CPU_AUTOPROBE |
149 |
++ select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64 |
150 |
+ select GENERIC_IRQ_SHOW |
151 |
+ select GENERIC_IRQ_SHOW_LEVEL |
152 |
+ select GENERIC_SMP_IDLE_THREAD |
153 |
+diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c |
154 |
+index 935059cb9e40..9527a4c6cbc2 100644 |
155 |
+--- a/arch/powerpc/kernel/setup_64.c |
156 |
++++ b/arch/powerpc/kernel/setup_64.c |
157 |
+@@ -38,6 +38,7 @@ |
158 |
+ #include <linux/memory.h> |
159 |
+ #include <linux/nmi.h> |
160 |
+ |
161 |
++#include <asm/debugfs.h> |
162 |
+ #include <asm/io.h> |
163 |
+ #include <asm/kdump.h> |
164 |
+ #include <asm/prom.h> |
165 |
+@@ -884,4 +885,41 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) |
166 |
+ if (!no_rfi_flush) |
167 |
+ rfi_flush_enable(enable); |
168 |
+ } |
169 |
++ |
170 |
++#ifdef CONFIG_DEBUG_FS |
171 |
++static int rfi_flush_set(void *data, u64 val) |
172 |
++{ |
173 |
++ if (val == 1) |
174 |
++ rfi_flush_enable(true); |
175 |
++ else if (val == 0) |
176 |
++ rfi_flush_enable(false); |
177 |
++ else |
178 |
++ return -EINVAL; |
179 |
++ |
180 |
++ return 0; |
181 |
++} |
182 |
++ |
183 |
++static int rfi_flush_get(void *data, u64 *val) |
184 |
++{ |
185 |
++ *val = rfi_flush ? 1 : 0; |
186 |
++ return 0; |
187 |
++} |
188 |
++ |
189 |
++DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); |
190 |
++ |
191 |
++static __init int rfi_flush_debugfs_init(void) |
192 |
++{ |
193 |
++ debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); |
194 |
++ return 0; |
195 |
++} |
196 |
++device_initcall(rfi_flush_debugfs_init); |
197 |
++#endif |
198 |
++ |
199 |
++ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) |
200 |
++{ |
201 |
++ if (rfi_flush) |
202 |
++ return sprintf(buf, "Mitigation: RFI Flush\n"); |
203 |
++ |
204 |
++ return sprintf(buf, "Vulnerable\n"); |
205 |
++} |
206 |
+ #endif /* CONFIG_PPC_BOOK3S_64 */ |
207 |
+diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c |
208 |
+index 03505ffbe1b6..60e21ccfb6d6 100644 |
209 |
+--- a/arch/x86/entry/common.c |
210 |
++++ b/arch/x86/entry/common.c |
211 |
+@@ -21,6 +21,7 @@ |
212 |
+ #include <linux/export.h> |
213 |
+ #include <linux/context_tracking.h> |
214 |
+ #include <linux/user-return-notifier.h> |
215 |
++#include <linux/nospec.h> |
216 |
+ #include <linux/uprobes.h> |
217 |
+ #include <linux/livepatch.h> |
218 |
+ #include <linux/syscalls.h> |
219 |
+@@ -208,7 +209,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) |
220 |
+ * special case only applies after poking regs and before the |
221 |
+ * very next return to user mode. |
222 |
+ */ |
223 |
+- current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); |
224 |
++ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); |
225 |
+ #endif |
226 |
+ |
227 |
+ user_enter_irqoff(); |
228 |
+@@ -284,7 +285,8 @@ __visible void do_syscall_64(struct pt_regs *regs) |
229 |
+ * regs->orig_ax, which changes the behavior of some syscalls. |
230 |
+ */ |
231 |
+ if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) { |
232 |
+- regs->ax = sys_call_table[nr & __SYSCALL_MASK]( |
233 |
++ nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls); |
234 |
++ regs->ax = sys_call_table[nr]( |
235 |
+ regs->di, regs->si, regs->dx, |
236 |
+ regs->r10, regs->r8, regs->r9); |
237 |
+ } |
238 |
+@@ -306,7 +308,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) |
239 |
+ unsigned int nr = (unsigned int)regs->orig_ax; |
240 |
+ |
241 |
+ #ifdef CONFIG_IA32_EMULATION |
242 |
+- current->thread.status |= TS_COMPAT; |
243 |
++ ti->status |= TS_COMPAT; |
244 |
+ #endif |
245 |
+ |
246 |
+ if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { |
247 |
+@@ -320,6 +322,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) |
248 |
+ } |
249 |
+ |
250 |
+ if (likely(nr < IA32_NR_syscalls)) { |
251 |
++ nr = array_index_nospec(nr, IA32_NR_syscalls); |
252 |
+ /* |
253 |
+ * It's possible that a 32-bit syscall implementation |
254 |
+ * takes a 64-bit parameter but nonetheless assumes that |
255 |
+diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S |
256 |
+index 60c4c342316c..2a35b1e0fb90 100644 |
257 |
+--- a/arch/x86/entry/entry_32.S |
258 |
++++ b/arch/x86/entry/entry_32.S |
259 |
+@@ -252,7 +252,8 @@ ENTRY(__switch_to_asm) |
260 |
+ * exist, overwrite the RSB with entries which capture |
261 |
+ * speculative execution to prevent attack. |
262 |
+ */ |
263 |
+- FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
264 |
++ /* Clobbers %ebx */ |
265 |
++ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
266 |
+ #endif |
267 |
+ |
268 |
+ /* restore callee-saved registers */ |
269 |
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S |
270 |
+index be6b66464f6a..16e2d72e79a0 100644 |
271 |
+--- a/arch/x86/entry/entry_64.S |
272 |
++++ b/arch/x86/entry/entry_64.S |
273 |
+@@ -232,91 +232,20 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) |
274 |
+ pushq %r9 /* pt_regs->r9 */ |
275 |
+ pushq %r10 /* pt_regs->r10 */ |
276 |
+ pushq %r11 /* pt_regs->r11 */ |
277 |
+- sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ |
278 |
+- UNWIND_HINT_REGS extra=0 |
279 |
+- |
280 |
+- TRACE_IRQS_OFF |
281 |
+- |
282 |
+- /* |
283 |
+- * If we need to do entry work or if we guess we'll need to do |
284 |
+- * exit work, go straight to the slow path. |
285 |
+- */ |
286 |
+- movq PER_CPU_VAR(current_task), %r11 |
287 |
+- testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) |
288 |
+- jnz entry_SYSCALL64_slow_path |
289 |
+- |
290 |
+-entry_SYSCALL_64_fastpath: |
291 |
+- /* |
292 |
+- * Easy case: enable interrupts and issue the syscall. If the syscall |
293 |
+- * needs pt_regs, we'll call a stub that disables interrupts again |
294 |
+- * and jumps to the slow path. |
295 |
+- */ |
296 |
+- TRACE_IRQS_ON |
297 |
+- ENABLE_INTERRUPTS(CLBR_NONE) |
298 |
+-#if __SYSCALL_MASK == ~0 |
299 |
+- cmpq $__NR_syscall_max, %rax |
300 |
+-#else |
301 |
+- andl $__SYSCALL_MASK, %eax |
302 |
+- cmpl $__NR_syscall_max, %eax |
303 |
+-#endif |
304 |
+- ja 1f /* return -ENOSYS (already in pt_regs->ax) */ |
305 |
+- movq %r10, %rcx |
306 |
+- |
307 |
+- /* |
308 |
+- * This call instruction is handled specially in stub_ptregs_64. |
309 |
+- * It might end up jumping to the slow path. If it jumps, RAX |
310 |
+- * and all argument registers are clobbered. |
311 |
+- */ |
312 |
+-#ifdef CONFIG_RETPOLINE |
313 |
+- movq sys_call_table(, %rax, 8), %rax |
314 |
+- call __x86_indirect_thunk_rax |
315 |
+-#else |
316 |
+- call *sys_call_table(, %rax, 8) |
317 |
+-#endif |
318 |
+-.Lentry_SYSCALL_64_after_fastpath_call: |
319 |
+- |
320 |
+- movq %rax, RAX(%rsp) |
321 |
+-1: |
322 |
++ pushq %rbx /* pt_regs->rbx */ |
323 |
++ pushq %rbp /* pt_regs->rbp */ |
324 |
++ pushq %r12 /* pt_regs->r12 */ |
325 |
++ pushq %r13 /* pt_regs->r13 */ |
326 |
++ pushq %r14 /* pt_regs->r14 */ |
327 |
++ pushq %r15 /* pt_regs->r15 */ |
328 |
++ UNWIND_HINT_REGS |
329 |
+ |
330 |
+- /* |
331 |
+- * If we get here, then we know that pt_regs is clean for SYSRET64. |
332 |
+- * If we see that no exit work is required (which we are required |
333 |
+- * to check with IRQs off), then we can go straight to SYSRET64. |
334 |
+- */ |
335 |
+- DISABLE_INTERRUPTS(CLBR_ANY) |
336 |
+ TRACE_IRQS_OFF |
337 |
+- movq PER_CPU_VAR(current_task), %r11 |
338 |
+- testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) |
339 |
+- jnz 1f |
340 |
+- |
341 |
+- LOCKDEP_SYS_EXIT |
342 |
+- TRACE_IRQS_ON /* user mode is traced as IRQs on */ |
343 |
+- movq RIP(%rsp), %rcx |
344 |
+- movq EFLAGS(%rsp), %r11 |
345 |
+- addq $6*8, %rsp /* skip extra regs -- they were preserved */ |
346 |
+- UNWIND_HINT_EMPTY |
347 |
+- jmp .Lpop_c_regs_except_rcx_r11_and_sysret |
348 |
+ |
349 |
+-1: |
350 |
+- /* |
351 |
+- * The fast path looked good when we started, but something changed |
352 |
+- * along the way and we need to switch to the slow path. Calling |
353 |
+- * raise(3) will trigger this, for example. IRQs are off. |
354 |
+- */ |
355 |
+- TRACE_IRQS_ON |
356 |
+- ENABLE_INTERRUPTS(CLBR_ANY) |
357 |
+- SAVE_EXTRA_REGS |
358 |
+- movq %rsp, %rdi |
359 |
+- call syscall_return_slowpath /* returns with IRQs disabled */ |
360 |
+- jmp return_from_SYSCALL_64 |
361 |
+- |
362 |
+-entry_SYSCALL64_slow_path: |
363 |
+ /* IRQs are off. */ |
364 |
+- SAVE_EXTRA_REGS |
365 |
+ movq %rsp, %rdi |
366 |
+ call do_syscall_64 /* returns with IRQs disabled */ |
367 |
+ |
368 |
+-return_from_SYSCALL_64: |
369 |
+ TRACE_IRQS_IRETQ /* we're about to change IF */ |
370 |
+ |
371 |
+ /* |
372 |
+@@ -389,7 +318,6 @@ syscall_return_via_sysret: |
373 |
+ /* rcx and r11 are already restored (see code above) */ |
374 |
+ UNWIND_HINT_EMPTY |
375 |
+ POP_EXTRA_REGS |
376 |
+-.Lpop_c_regs_except_rcx_r11_and_sysret: |
377 |
+ popq %rsi /* skip r11 */ |
378 |
+ popq %r10 |
379 |
+ popq %r9 |
380 |
+@@ -420,47 +348,6 @@ syscall_return_via_sysret: |
381 |
+ USERGS_SYSRET64 |
382 |
+ END(entry_SYSCALL_64) |
383 |
+ |
384 |
+-ENTRY(stub_ptregs_64) |
385 |
+- /* |
386 |
+- * Syscalls marked as needing ptregs land here. |
387 |
+- * If we are on the fast path, we need to save the extra regs, |
388 |
+- * which we achieve by trying again on the slow path. If we are on |
389 |
+- * the slow path, the extra regs are already saved. |
390 |
+- * |
391 |
+- * RAX stores a pointer to the C function implementing the syscall. |
392 |
+- * IRQs are on. |
393 |
+- */ |
394 |
+- cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) |
395 |
+- jne 1f |
396 |
+- |
397 |
+- /* |
398 |
+- * Called from fast path -- disable IRQs again, pop return address |
399 |
+- * and jump to slow path |
400 |
+- */ |
401 |
+- DISABLE_INTERRUPTS(CLBR_ANY) |
402 |
+- TRACE_IRQS_OFF |
403 |
+- popq %rax |
404 |
+- UNWIND_HINT_REGS extra=0 |
405 |
+- jmp entry_SYSCALL64_slow_path |
406 |
+- |
407 |
+-1: |
408 |
+- JMP_NOSPEC %rax /* Called from C */ |
409 |
+-END(stub_ptregs_64) |
410 |
+- |
411 |
+-.macro ptregs_stub func |
412 |
+-ENTRY(ptregs_\func) |
413 |
+- UNWIND_HINT_FUNC |
414 |
+- leaq \func(%rip), %rax |
415 |
+- jmp stub_ptregs_64 |
416 |
+-END(ptregs_\func) |
417 |
+-.endm |
418 |
+- |
419 |
+-/* Instantiate ptregs_stub for each ptregs-using syscall */ |
420 |
+-#define __SYSCALL_64_QUAL_(sym) |
421 |
+-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym |
422 |
+-#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) |
423 |
+-#include <asm/syscalls_64.h> |
424 |
+- |
425 |
+ /* |
426 |
+ * %rdi: prev task |
427 |
+ * %rsi: next task |
428 |
+@@ -495,7 +382,8 @@ ENTRY(__switch_to_asm) |
429 |
+ * exist, overwrite the RSB with entries which capture |
430 |
+ * speculative execution to prevent attack. |
431 |
+ */ |
432 |
+- FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
433 |
++ /* Clobbers %rbx */ |
434 |
++ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
435 |
+ #endif |
436 |
+ |
437 |
+ /* restore callee-saved registers */ |
438 |
+diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c |
439 |
+index 9c09775e589d..c176d2fab1da 100644 |
440 |
+--- a/arch/x86/entry/syscall_64.c |
441 |
++++ b/arch/x86/entry/syscall_64.c |
442 |
+@@ -7,14 +7,11 @@ |
443 |
+ #include <asm/asm-offsets.h> |
444 |
+ #include <asm/syscall.h> |
445 |
+ |
446 |
+-#define __SYSCALL_64_QUAL_(sym) sym |
447 |
+-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym |
448 |
+- |
449 |
+-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); |
450 |
++#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); |
451 |
+ #include <asm/syscalls_64.h> |
452 |
+ #undef __SYSCALL_64 |
453 |
+ |
454 |
+-#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), |
455 |
++#define __SYSCALL_64(nr, sym, qual) [nr] = sym, |
456 |
+ |
457 |
+ extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); |
458 |
+ |
459 |
+diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h |
460 |
+index 0927cdc4f946..4d111616524b 100644 |
461 |
+--- a/arch/x86/include/asm/asm-prototypes.h |
462 |
++++ b/arch/x86/include/asm/asm-prototypes.h |
463 |
+@@ -38,5 +38,7 @@ INDIRECT_THUNK(dx) |
464 |
+ INDIRECT_THUNK(si) |
465 |
+ INDIRECT_THUNK(di) |
466 |
+ INDIRECT_THUNK(bp) |
467 |
+-INDIRECT_THUNK(sp) |
468 |
++asmlinkage void __fill_rsb(void); |
469 |
++asmlinkage void __clear_rsb(void); |
470 |
++ |
471 |
+ #endif /* CONFIG_RETPOLINE */ |
472 |
+diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h |
473 |
+index 01727dbc294a..1e7c955b6303 100644 |
474 |
+--- a/arch/x86/include/asm/barrier.h |
475 |
++++ b/arch/x86/include/asm/barrier.h |
476 |
+@@ -24,6 +24,34 @@ |
477 |
+ #define wmb() asm volatile("sfence" ::: "memory") |
478 |
+ #endif |
479 |
+ |
480 |
++/** |
481 |
++ * array_index_mask_nospec() - generate a mask that is ~0UL when the |
482 |
++ * bounds check succeeds and 0 otherwise |
483 |
++ * @index: array element index |
484 |
++ * @size: number of elements in array |
485 |
++ * |
486 |
++ * Returns: |
487 |
++ * 0 - (index < size) |
488 |
++ */ |
489 |
++static inline unsigned long array_index_mask_nospec(unsigned long index, |
490 |
++ unsigned long size) |
491 |
++{ |
492 |
++ unsigned long mask; |
493 |
++ |
494 |
++ asm ("cmp %1,%2; sbb %0,%0;" |
495 |
++ :"=r" (mask) |
496 |
++ :"r"(size),"r" (index) |
497 |
++ :"cc"); |
498 |
++ return mask; |
499 |
++} |
500 |
++ |
501 |
++/* Override the default implementation from linux/nospec.h. */ |
502 |
++#define array_index_mask_nospec array_index_mask_nospec |
503 |
++ |
504 |
++/* Prevent speculative execution past this barrier. */ |
505 |
++#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ |
506 |
++ "lfence", X86_FEATURE_LFENCE_RDTSC) |
507 |
++ |
508 |
+ #ifdef CONFIG_X86_PPRO_FENCE |
509 |
+ #define dma_rmb() rmb() |
510 |
+ #else |
511 |
+diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h |
512 |
+index ea9a7dde62e5..70eddb3922ff 100644 |
513 |
+--- a/arch/x86/include/asm/cpufeature.h |
514 |
++++ b/arch/x86/include/asm/cpufeature.h |
515 |
+@@ -29,6 +29,7 @@ enum cpuid_leafs |
516 |
+ CPUID_8000_000A_EDX, |
517 |
+ CPUID_7_ECX, |
518 |
+ CPUID_8000_0007_EBX, |
519 |
++ CPUID_7_EDX, |
520 |
+ }; |
521 |
+ |
522 |
+ #ifdef CONFIG_X86_FEATURE_NAMES |
523 |
+@@ -79,8 +80,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; |
524 |
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ |
525 |
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ |
526 |
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ |
527 |
++ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ |
528 |
+ REQUIRED_MASK_CHECK || \ |
529 |
+- BUILD_BUG_ON_ZERO(NCAPINTS != 18)) |
530 |
++ BUILD_BUG_ON_ZERO(NCAPINTS != 19)) |
531 |
+ |
532 |
+ #define DISABLED_MASK_BIT_SET(feature_bit) \ |
533 |
+ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ |
534 |
+@@ -101,8 +103,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; |
535 |
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ |
536 |
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ |
537 |
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ |
538 |
++ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ |
539 |
+ DISABLED_MASK_CHECK || \ |
540 |
+- BUILD_BUG_ON_ZERO(NCAPINTS != 18)) |
541 |
++ BUILD_BUG_ON_ZERO(NCAPINTS != 19)) |
542 |
+ |
543 |
+ #define cpu_has(c, bit) \ |
544 |
+ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ |
545 |
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h |
546 |
+index 25b9375c1484..73b5fff159a4 100644 |
547 |
+--- a/arch/x86/include/asm/cpufeatures.h |
548 |
++++ b/arch/x86/include/asm/cpufeatures.h |
549 |
+@@ -13,7 +13,7 @@ |
550 |
+ /* |
551 |
+ * Defines x86 CPU feature bits |
552 |
+ */ |
553 |
+-#define NCAPINTS 18 /* N 32-bit words worth of info */ |
554 |
++#define NCAPINTS 19 /* N 32-bit words worth of info */ |
555 |
+ #define NBUGINTS 1 /* N 32-bit bug flags */ |
556 |
+ |
557 |
+ /* |
558 |
+@@ -203,14 +203,14 @@ |
559 |
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ |
560 |
+ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ |
561 |
+ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ |
562 |
+-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ |
563 |
+-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ |
564 |
++#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ |
565 |
++#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ |
566 |
+ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ |
567 |
+-#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ |
568 |
+-#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ |
569 |
+ |
570 |
+ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ |
571 |
+-#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ |
572 |
++#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ |
573 |
++ |
574 |
++#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ |
575 |
+ |
576 |
+ /* Virtualization flags: Linux defined, word 8 */ |
577 |
+ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ |
578 |
+@@ -271,6 +271,9 @@ |
579 |
+ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ |
580 |
+ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ |
581 |
+ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ |
582 |
++#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ |
583 |
++#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ |
584 |
++#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ |
585 |
+ |
586 |
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ |
587 |
+ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ |
588 |
+@@ -319,6 +322,13 @@ |
589 |
+ #define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ |
590 |
+ #define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ |
591 |
+ |
592 |
++/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ |
593 |
++#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ |
594 |
++#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ |
595 |
++#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ |
596 |
++#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ |
597 |
++#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ |
598 |
++ |
599 |
+ /* |
600 |
+ * BUG word(s) |
601 |
+ */ |
602 |
+diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h |
603 |
+index e428e16dd822..c6a3af198294 100644 |
604 |
+--- a/arch/x86/include/asm/disabled-features.h |
605 |
++++ b/arch/x86/include/asm/disabled-features.h |
606 |
+@@ -71,6 +71,7 @@ |
607 |
+ #define DISABLED_MASK15 0 |
608 |
+ #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57) |
609 |
+ #define DISABLED_MASK17 0 |
610 |
+-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) |
611 |
++#define DISABLED_MASK18 0 |
612 |
++#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) |
613 |
+ |
614 |
+ #endif /* _ASM_X86_DISABLED_FEATURES_H */ |
615 |
+diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h |
616 |
+index 64c4a30e0d39..e203169931c7 100644 |
617 |
+--- a/arch/x86/include/asm/fixmap.h |
618 |
++++ b/arch/x86/include/asm/fixmap.h |
619 |
+@@ -137,8 +137,10 @@ enum fixed_addresses { |
620 |
+ |
621 |
+ extern void reserve_top_address(unsigned long reserve); |
622 |
+ |
623 |
+-#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) |
624 |
+-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) |
625 |
++#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) |
626 |
++#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) |
627 |
++#define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) |
628 |
++#define FIXADDR_TOT_START (FIXADDR_TOP - FIXADDR_TOT_SIZE) |
629 |
+ |
630 |
+ extern int fixmaps_set; |
631 |
+ |
632 |
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h |
633 |
+index fa11fb1fa570..eb83ff1bae8f 100644 |
634 |
+--- a/arch/x86/include/asm/msr-index.h |
635 |
++++ b/arch/x86/include/asm/msr-index.h |
636 |
+@@ -39,6 +39,13 @@ |
637 |
+ |
638 |
+ /* Intel MSRs. Some also available on other CPUs */ |
639 |
+ |
640 |
++#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ |
641 |
++#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ |
642 |
++#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ |
643 |
++ |
644 |
++#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ |
645 |
++#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ |
646 |
++ |
647 |
+ #define MSR_PPIN_CTL 0x0000004e |
648 |
+ #define MSR_PPIN 0x0000004f |
649 |
+ |
650 |
+@@ -57,6 +64,11 @@ |
651 |
+ #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) |
652 |
+ |
653 |
+ #define MSR_MTRRcap 0x000000fe |
654 |
++ |
655 |
++#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a |
656 |
++#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ |
657 |
++#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ |
658 |
++ |
659 |
+ #define MSR_IA32_BBL_CR_CTL 0x00000119 |
660 |
+ #define MSR_IA32_BBL_CR_CTL3 0x0000011e |
661 |
+ |
662 |
+diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h |
663 |
+index 07962f5f6fba..30df295f6d94 100644 |
664 |
+--- a/arch/x86/include/asm/msr.h |
665 |
++++ b/arch/x86/include/asm/msr.h |
666 |
+@@ -214,8 +214,7 @@ static __always_inline unsigned long long rdtsc_ordered(void) |
667 |
+ * that some other imaginary CPU is updating continuously with a |
668 |
+ * time stamp. |
669 |
+ */ |
670 |
+- alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, |
671 |
+- "lfence", X86_FEATURE_LFENCE_RDTSC); |
672 |
++ barrier_nospec(); |
673 |
+ return rdtsc(); |
674 |
+ } |
675 |
+ |
676 |
+diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h |
677 |
+index 4ad41087ce0e..4d57894635f2 100644 |
678 |
+--- a/arch/x86/include/asm/nospec-branch.h |
679 |
++++ b/arch/x86/include/asm/nospec-branch.h |
680 |
+@@ -1,56 +1,12 @@ |
681 |
+ /* SPDX-License-Identifier: GPL-2.0 */ |
682 |
+ |
683 |
+-#ifndef __NOSPEC_BRANCH_H__ |
684 |
+-#define __NOSPEC_BRANCH_H__ |
685 |
++#ifndef _ASM_X86_NOSPEC_BRANCH_H_ |
686 |
++#define _ASM_X86_NOSPEC_BRANCH_H_ |
687 |
+ |
688 |
+ #include <asm/alternative.h> |
689 |
+ #include <asm/alternative-asm.h> |
690 |
+ #include <asm/cpufeatures.h> |
691 |
+ |
692 |
+-/* |
693 |
+- * Fill the CPU return stack buffer. |
694 |
+- * |
695 |
+- * Each entry in the RSB, if used for a speculative 'ret', contains an |
696 |
+- * infinite 'pause; lfence; jmp' loop to capture speculative execution. |
697 |
+- * |
698 |
+- * This is required in various cases for retpoline and IBRS-based |
699 |
+- * mitigations for the Spectre variant 2 vulnerability. Sometimes to |
700 |
+- * eliminate potentially bogus entries from the RSB, and sometimes |
701 |
+- * purely to ensure that it doesn't get empty, which on some CPUs would |
702 |
+- * allow predictions from other (unwanted!) sources to be used. |
703 |
+- * |
704 |
+- * We define a CPP macro such that it can be used from both .S files and |
705 |
+- * inline assembly. It's possible to do a .macro and then include that |
706 |
+- * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. |
707 |
+- */ |
708 |
+- |
709 |
+-#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ |
710 |
+-#define RSB_FILL_LOOPS 16 /* To avoid underflow */ |
711 |
+- |
712 |
+-/* |
713 |
+- * Google experimented with loop-unrolling and this turned out to be |
714 |
+- * the optimal version — two calls, each with their own speculation |
715 |
+- * trap should their return address end up getting used, in a loop. |
716 |
+- */ |
717 |
+-#define __FILL_RETURN_BUFFER(reg, nr, sp) \ |
718 |
+- mov $(nr/2), reg; \ |
719 |
+-771: \ |
720 |
+- call 772f; \ |
721 |
+-773: /* speculation trap */ \ |
722 |
+- pause; \ |
723 |
+- lfence; \ |
724 |
+- jmp 773b; \ |
725 |
+-772: \ |
726 |
+- call 774f; \ |
727 |
+-775: /* speculation trap */ \ |
728 |
+- pause; \ |
729 |
+- lfence; \ |
730 |
+- jmp 775b; \ |
731 |
+-774: \ |
732 |
+- dec reg; \ |
733 |
+- jnz 771b; \ |
734 |
+- add $(BITS_PER_LONG/8) * nr, sp; |
735 |
+- |
736 |
+ #ifdef __ASSEMBLY__ |
737 |
+ |
738 |
+ /* |
739 |
+@@ -121,17 +77,10 @@ |
740 |
+ #endif |
741 |
+ .endm |
742 |
+ |
743 |
+- /* |
744 |
+- * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP |
745 |
+- * monstrosity above, manually. |
746 |
+- */ |
747 |
+-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req |
748 |
++/* This clobbers the BX register */ |
749 |
++.macro FILL_RETURN_BUFFER nr:req ftr:req |
750 |
+ #ifdef CONFIG_RETPOLINE |
751 |
+- ANNOTATE_NOSPEC_ALTERNATIVE |
752 |
+- ALTERNATIVE "jmp .Lskip_rsb_\@", \ |
753 |
+- __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ |
754 |
+- \ftr |
755 |
+-.Lskip_rsb_\@: |
756 |
++ ALTERNATIVE "", "call __clear_rsb", \ftr |
757 |
+ #endif |
758 |
+ .endm |
759 |
+ |
760 |
+@@ -201,22 +150,25 @@ extern char __indirect_thunk_end[]; |
761 |
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest |
762 |
+ * can be followed in the host, by overwriting the RSB completely. Both |
763 |
+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future |
764 |
+- * CPUs with IBRS_ATT *might* it be avoided. |
765 |
++ * CPUs with IBRS_ALL *might* it be avoided. |
766 |
+ */ |
767 |
+ static inline void vmexit_fill_RSB(void) |
768 |
+ { |
769 |
+ #ifdef CONFIG_RETPOLINE |
770 |
+- unsigned long loops; |
771 |
+- |
772 |
+- asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE |
773 |
+- ALTERNATIVE("jmp 910f", |
774 |
+- __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), |
775 |
+- X86_FEATURE_RETPOLINE) |
776 |
+- "910:" |
777 |
+- : "=r" (loops), ASM_CALL_CONSTRAINT |
778 |
+- : : "memory" ); |
779 |
++ alternative_input("", |
780 |
++ "call __fill_rsb", |
781 |
++ X86_FEATURE_RETPOLINE, |
782 |
++ ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); |
783 |
+ #endif |
784 |
+ } |
785 |
+ |
786 |
++static inline void indirect_branch_prediction_barrier(void) |
787 |
++{ |
788 |
++ alternative_input("", |
789 |
++ "call __ibp_barrier", |
790 |
++ X86_FEATURE_USE_IBPB, |
791 |
++ ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory")); |
792 |
++} |
793 |
++ |
794 |
+ #endif /* __ASSEMBLY__ */ |
795 |
+-#endif /* __NOSPEC_BRANCH_H__ */ |
796 |
++#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ |
797 |
+diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h |
798 |
+index ce245b0cdfca..0777e18a1d23 100644 |
799 |
+--- a/arch/x86/include/asm/pgtable_32_types.h |
800 |
++++ b/arch/x86/include/asm/pgtable_32_types.h |
801 |
+@@ -44,8 +44,9 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ |
802 |
+ */ |
803 |
+ #define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) |
804 |
+ |
805 |
+-#define CPU_ENTRY_AREA_BASE \ |
806 |
+- ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK) |
807 |
++#define CPU_ENTRY_AREA_BASE \ |
808 |
++ ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ |
809 |
++ & PMD_MASK) |
810 |
+ |
811 |
+ #define PKMAP_BASE \ |
812 |
+ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) |
813 |
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h |
814 |
+index 9c18da64daa9..c57c6e77c29f 100644 |
815 |
+--- a/arch/x86/include/asm/processor.h |
816 |
++++ b/arch/x86/include/asm/processor.h |
817 |
+@@ -459,8 +459,6 @@ struct thread_struct { |
818 |
+ unsigned short gsindex; |
819 |
+ #endif |
820 |
+ |
821 |
+- u32 status; /* thread synchronous flags */ |
822 |
+- |
823 |
+ #ifdef CONFIG_X86_64 |
824 |
+ unsigned long fsbase; |
825 |
+ unsigned long gsbase; |
826 |
+@@ -970,4 +968,7 @@ bool xen_set_default_idle(void); |
827 |
+ |
828 |
+ void stop_this_cpu(void *dummy); |
829 |
+ void df_debug(struct pt_regs *regs, long error_code); |
830 |
++ |
831 |
++void __ibp_barrier(void); |
832 |
++ |
833 |
+ #endif /* _ASM_X86_PROCESSOR_H */ |
834 |
+diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h |
835 |
+index d91ba04dd007..fb3a6de7440b 100644 |
836 |
+--- a/arch/x86/include/asm/required-features.h |
837 |
++++ b/arch/x86/include/asm/required-features.h |
838 |
+@@ -106,6 +106,7 @@ |
839 |
+ #define REQUIRED_MASK15 0 |
840 |
+ #define REQUIRED_MASK16 (NEED_LA57) |
841 |
+ #define REQUIRED_MASK17 0 |
842 |
+-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) |
843 |
++#define REQUIRED_MASK18 0 |
844 |
++#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) |
845 |
+ |
846 |
+ #endif /* _ASM_X86_REQUIRED_FEATURES_H */ |
847 |
+diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h |
848 |
+index e3c95e8e61c5..03eedc21246d 100644 |
849 |
+--- a/arch/x86/include/asm/syscall.h |
850 |
++++ b/arch/x86/include/asm/syscall.h |
851 |
+@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, |
852 |
+ * TS_COMPAT is set for 32-bit syscall entries and then |
853 |
+ * remains set until we return to user mode. |
854 |
+ */ |
855 |
+- if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) |
856 |
++ if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) |
857 |
+ /* |
858 |
+ * Sign-extend the value so (int)-EFOO becomes (long)-EFOO |
859 |
+ * and will match correctly in comparisons. |
860 |
+@@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, |
861 |
+ unsigned long *args) |
862 |
+ { |
863 |
+ # ifdef CONFIG_IA32_EMULATION |
864 |
+- if (task->thread.status & TS_COMPAT) |
865 |
++ if (task->thread_info.status & TS_COMPAT) |
866 |
+ switch (i) { |
867 |
+ case 0: |
868 |
+ if (!n--) break; |
869 |
+@@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, |
870 |
+ const unsigned long *args) |
871 |
+ { |
872 |
+ # ifdef CONFIG_IA32_EMULATION |
873 |
+- if (task->thread.status & TS_COMPAT) |
874 |
++ if (task->thread_info.status & TS_COMPAT) |
875 |
+ switch (i) { |
876 |
+ case 0: |
877 |
+ if (!n--) break; |
878 |
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h |
879 |
+index 00223333821a..eda3b6823ca4 100644 |
880 |
+--- a/arch/x86/include/asm/thread_info.h |
881 |
++++ b/arch/x86/include/asm/thread_info.h |
882 |
+@@ -55,6 +55,7 @@ struct task_struct; |
883 |
+ |
884 |
+ struct thread_info { |
885 |
+ unsigned long flags; /* low level flags */ |
886 |
++ u32 status; /* thread synchronous flags */ |
887 |
+ }; |
888 |
+ |
889 |
+ #define INIT_THREAD_INFO(tsk) \ |
890 |
+@@ -221,7 +222,7 @@ static inline int arch_within_stack_frames(const void * const stack, |
891 |
+ #define in_ia32_syscall() true |
892 |
+ #else |
893 |
+ #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ |
894 |
+- current->thread.status & TS_COMPAT) |
895 |
++ current_thread_info()->status & TS_COMPAT) |
896 |
+ #endif |
897 |
+ |
898 |
+ /* |
899 |
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h |
900 |
+index 3effd3c994af..4405c4b308e8 100644 |
901 |
+--- a/arch/x86/include/asm/tlbflush.h |
902 |
++++ b/arch/x86/include/asm/tlbflush.h |
903 |
+@@ -174,6 +174,8 @@ struct tlb_state { |
904 |
+ struct mm_struct *loaded_mm; |
905 |
+ u16 loaded_mm_asid; |
906 |
+ u16 next_asid; |
907 |
++ /* last user mm's ctx id */ |
908 |
++ u64 last_ctx_id; |
909 |
+ |
910 |
+ /* |
911 |
+ * We can be in one of several states: |
912 |
+diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h |
913 |
+index 574dff4d2913..aae77eb8491c 100644 |
914 |
+--- a/arch/x86/include/asm/uaccess.h |
915 |
++++ b/arch/x86/include/asm/uaccess.h |
916 |
+@@ -124,6 +124,11 @@ extern int __get_user_bad(void); |
917 |
+ |
918 |
+ #define __uaccess_begin() stac() |
919 |
+ #define __uaccess_end() clac() |
920 |
++#define __uaccess_begin_nospec() \ |
921 |
++({ \ |
922 |
++ stac(); \ |
923 |
++ barrier_nospec(); \ |
924 |
++}) |
925 |
+ |
926 |
+ /* |
927 |
+ * This is a type: either unsigned long, if the argument fits into |
928 |
+@@ -445,7 +450,7 @@ do { \ |
929 |
+ ({ \ |
930 |
+ int __gu_err; \ |
931 |
+ __inttype(*(ptr)) __gu_val; \ |
932 |
+- __uaccess_begin(); \ |
933 |
++ __uaccess_begin_nospec(); \ |
934 |
+ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ |
935 |
+ __uaccess_end(); \ |
936 |
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \ |
937 |
+@@ -487,6 +492,10 @@ struct __large_struct { unsigned long buf[100]; }; |
938 |
+ __uaccess_begin(); \ |
939 |
+ barrier(); |
940 |
+ |
941 |
++#define uaccess_try_nospec do { \ |
942 |
++ current->thread.uaccess_err = 0; \ |
943 |
++ __uaccess_begin_nospec(); \ |
944 |
++ |
945 |
+ #define uaccess_catch(err) \ |
946 |
+ __uaccess_end(); \ |
947 |
+ (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ |
948 |
+@@ -548,7 +557,7 @@ struct __large_struct { unsigned long buf[100]; }; |
949 |
+ * get_user_ex(...); |
950 |
+ * } get_user_catch(err) |
951 |
+ */ |
952 |
+-#define get_user_try uaccess_try |
953 |
++#define get_user_try uaccess_try_nospec |
954 |
+ #define get_user_catch(err) uaccess_catch(err) |
955 |
+ |
956 |
+ #define get_user_ex(x, ptr) do { \ |
957 |
+@@ -582,7 +591,7 @@ extern void __cmpxchg_wrong_size(void) |
958 |
+ __typeof__(ptr) __uval = (uval); \ |
959 |
+ __typeof__(*(ptr)) __old = (old); \ |
960 |
+ __typeof__(*(ptr)) __new = (new); \ |
961 |
+- __uaccess_begin(); \ |
962 |
++ __uaccess_begin_nospec(); \ |
963 |
+ switch (size) { \ |
964 |
+ case 1: \ |
965 |
+ { \ |
966 |
+diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h |
967 |
+index 72950401b223..ba2dc1930630 100644 |
968 |
+--- a/arch/x86/include/asm/uaccess_32.h |
969 |
++++ b/arch/x86/include/asm/uaccess_32.h |
970 |
+@@ -29,21 +29,21 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n) |
971 |
+ switch (n) { |
972 |
+ case 1: |
973 |
+ ret = 0; |
974 |
+- __uaccess_begin(); |
975 |
++ __uaccess_begin_nospec(); |
976 |
+ __get_user_asm_nozero(*(u8 *)to, from, ret, |
977 |
+ "b", "b", "=q", 1); |
978 |
+ __uaccess_end(); |
979 |
+ return ret; |
980 |
+ case 2: |
981 |
+ ret = 0; |
982 |
+- __uaccess_begin(); |
983 |
++ __uaccess_begin_nospec(); |
984 |
+ __get_user_asm_nozero(*(u16 *)to, from, ret, |
985 |
+ "w", "w", "=r", 2); |
986 |
+ __uaccess_end(); |
987 |
+ return ret; |
988 |
+ case 4: |
989 |
+ ret = 0; |
990 |
+- __uaccess_begin(); |
991 |
++ __uaccess_begin_nospec(); |
992 |
+ __get_user_asm_nozero(*(u32 *)to, from, ret, |
993 |
+ "l", "k", "=r", 4); |
994 |
+ __uaccess_end(); |
995 |
+diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h |
996 |
+index f07ef3c575db..62546b3a398e 100644 |
997 |
+--- a/arch/x86/include/asm/uaccess_64.h |
998 |
++++ b/arch/x86/include/asm/uaccess_64.h |
999 |
+@@ -55,31 +55,31 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size) |
1000 |
+ return copy_user_generic(dst, (__force void *)src, size); |
1001 |
+ switch (size) { |
1002 |
+ case 1: |
1003 |
+- __uaccess_begin(); |
1004 |
++ __uaccess_begin_nospec(); |
1005 |
+ __get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src, |
1006 |
+ ret, "b", "b", "=q", 1); |
1007 |
+ __uaccess_end(); |
1008 |
+ return ret; |
1009 |
+ case 2: |
1010 |
+- __uaccess_begin(); |
1011 |
++ __uaccess_begin_nospec(); |
1012 |
+ __get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src, |
1013 |
+ ret, "w", "w", "=r", 2); |
1014 |
+ __uaccess_end(); |
1015 |
+ return ret; |
1016 |
+ case 4: |
1017 |
+- __uaccess_begin(); |
1018 |
++ __uaccess_begin_nospec(); |
1019 |
+ __get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src, |
1020 |
+ ret, "l", "k", "=r", 4); |
1021 |
+ __uaccess_end(); |
1022 |
+ return ret; |
1023 |
+ case 8: |
1024 |
+- __uaccess_begin(); |
1025 |
++ __uaccess_begin_nospec(); |
1026 |
+ __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, |
1027 |
+ ret, "q", "", "=r", 8); |
1028 |
+ __uaccess_end(); |
1029 |
+ return ret; |
1030 |
+ case 10: |
1031 |
+- __uaccess_begin(); |
1032 |
++ __uaccess_begin_nospec(); |
1033 |
+ __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, |
1034 |
+ ret, "q", "", "=r", 10); |
1035 |
+ if (likely(!ret)) |
1036 |
+@@ -89,7 +89,7 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size) |
1037 |
+ __uaccess_end(); |
1038 |
+ return ret; |
1039 |
+ case 16: |
1040 |
+- __uaccess_begin(); |
1041 |
++ __uaccess_begin_nospec(); |
1042 |
+ __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, |
1043 |
+ ret, "q", "", "=r", 16); |
1044 |
+ if (likely(!ret)) |
1045 |
+diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c |
1046 |
+index e0b97e4d1db5..21be0193d9dc 100644 |
1047 |
+--- a/arch/x86/kernel/alternative.c |
1048 |
++++ b/arch/x86/kernel/alternative.c |
1049 |
+@@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str) |
1050 |
+ } |
1051 |
+ __setup("noreplace-smp", setup_noreplace_smp); |
1052 |
+ |
1053 |
+-#ifdef CONFIG_PARAVIRT |
1054 |
+-static int __initdata_or_module noreplace_paravirt = 0; |
1055 |
+- |
1056 |
+-static int __init setup_noreplace_paravirt(char *str) |
1057 |
+-{ |
1058 |
+- noreplace_paravirt = 1; |
1059 |
+- return 1; |
1060 |
+-} |
1061 |
+-__setup("noreplace-paravirt", setup_noreplace_paravirt); |
1062 |
+-#endif |
1063 |
+- |
1064 |
+ #define DPRINTK(fmt, args...) \ |
1065 |
+ do { \ |
1066 |
+ if (debug_alternative) \ |
1067 |
+@@ -298,7 +287,7 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) |
1068 |
+ tgt_rip = next_rip + o_dspl; |
1069 |
+ n_dspl = tgt_rip - orig_insn; |
1070 |
+ |
1071 |
+- DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl); |
1072 |
++ DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl); |
1073 |
+ |
1074 |
+ if (tgt_rip - orig_insn >= 0) { |
1075 |
+ if (n_dspl - 2 <= 127) |
1076 |
+@@ -355,7 +344,7 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins |
1077 |
+ add_nops(instr + (a->instrlen - a->padlen), a->padlen); |
1078 |
+ local_irq_restore(flags); |
1079 |
+ |
1080 |
+- DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", |
1081 |
++ DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", |
1082 |
+ instr, a->instrlen - a->padlen, a->padlen); |
1083 |
+ } |
1084 |
+ |
1085 |
+@@ -376,7 +365,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, |
1086 |
+ u8 *instr, *replacement; |
1087 |
+ u8 insnbuf[MAX_PATCH_LEN]; |
1088 |
+ |
1089 |
+- DPRINTK("alt table %p -> %p", start, end); |
1090 |
++ DPRINTK("alt table %px, -> %px", start, end); |
1091 |
+ /* |
1092 |
+ * The scan order should be from start to end. A later scanned |
1093 |
+ * alternative code can overwrite previously scanned alternative code. |
1094 |
+@@ -400,14 +389,14 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, |
1095 |
+ continue; |
1096 |
+ } |
1097 |
+ |
1098 |
+- DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d", |
1099 |
++ DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d", |
1100 |
+ a->cpuid >> 5, |
1101 |
+ a->cpuid & 0x1f, |
1102 |
+ instr, a->instrlen, |
1103 |
+ replacement, a->replacementlen, a->padlen); |
1104 |
+ |
1105 |
+- DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr); |
1106 |
+- DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement); |
1107 |
++ DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); |
1108 |
++ DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement); |
1109 |
+ |
1110 |
+ memcpy(insnbuf, replacement, a->replacementlen); |
1111 |
+ insnbuf_sz = a->replacementlen; |
1112 |
+@@ -433,7 +422,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, |
1113 |
+ a->instrlen - a->replacementlen); |
1114 |
+ insnbuf_sz += a->instrlen - a->replacementlen; |
1115 |
+ } |
1116 |
+- DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr); |
1117 |
++ DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr); |
1118 |
+ |
1119 |
+ text_poke_early(instr, insnbuf, insnbuf_sz); |
1120 |
+ } |
1121 |
+@@ -599,9 +588,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, |
1122 |
+ struct paravirt_patch_site *p; |
1123 |
+ char insnbuf[MAX_PATCH_LEN]; |
1124 |
+ |
1125 |
+- if (noreplace_paravirt) |
1126 |
+- return; |
1127 |
+- |
1128 |
+ for (p = start; p < end; p++) { |
1129 |
+ unsigned int used; |
1130 |
+ |
1131 |
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c |
1132 |
+index 390b3dc3d438..71949bf2de5a 100644 |
1133 |
+--- a/arch/x86/kernel/cpu/bugs.c |
1134 |
++++ b/arch/x86/kernel/cpu/bugs.c |
1135 |
+@@ -11,6 +11,7 @@ |
1136 |
+ #include <linux/init.h> |
1137 |
+ #include <linux/utsname.h> |
1138 |
+ #include <linux/cpu.h> |
1139 |
++#include <linux/module.h> |
1140 |
+ |
1141 |
+ #include <asm/nospec-branch.h> |
1142 |
+ #include <asm/cmdline.h> |
1143 |
+@@ -90,20 +91,41 @@ static const char *spectre_v2_strings[] = { |
1144 |
+ }; |
1145 |
+ |
1146 |
+ #undef pr_fmt |
1147 |
+-#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt |
1148 |
++#define pr_fmt(fmt) "Spectre V2 : " fmt |
1149 |
+ |
1150 |
+ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; |
1151 |
+ |
1152 |
++#ifdef RETPOLINE |
1153 |
++static bool spectre_v2_bad_module; |
1154 |
++ |
1155 |
++bool retpoline_module_ok(bool has_retpoline) |
1156 |
++{ |
1157 |
++ if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) |
1158 |
++ return true; |
1159 |
++ |
1160 |
++ pr_err("System may be vulnerable to spectre v2\n"); |
1161 |
++ spectre_v2_bad_module = true; |
1162 |
++ return false; |
1163 |
++} |
1164 |
++ |
1165 |
++static inline const char *spectre_v2_module_string(void) |
1166 |
++{ |
1167 |
++ return spectre_v2_bad_module ? " - vulnerable module loaded" : ""; |
1168 |
++} |
1169 |
++#else |
1170 |
++static inline const char *spectre_v2_module_string(void) { return ""; } |
1171 |
++#endif |
1172 |
++ |
1173 |
+ static void __init spec2_print_if_insecure(const char *reason) |
1174 |
+ { |
1175 |
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) |
1176 |
+- pr_info("%s\n", reason); |
1177 |
++ pr_info("%s selected on command line.\n", reason); |
1178 |
+ } |
1179 |
+ |
1180 |
+ static void __init spec2_print_if_secure(const char *reason) |
1181 |
+ { |
1182 |
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) |
1183 |
+- pr_info("%s\n", reason); |
1184 |
++ pr_info("%s selected on command line.\n", reason); |
1185 |
+ } |
1186 |
+ |
1187 |
+ static inline bool retp_compiler(void) |
1188 |
+@@ -118,42 +140,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) |
1189 |
+ return len == arglen && !strncmp(arg, opt, len); |
1190 |
+ } |
1191 |
+ |
1192 |
++static const struct { |
1193 |
++ const char *option; |
1194 |
++ enum spectre_v2_mitigation_cmd cmd; |
1195 |
++ bool secure; |
1196 |
++} mitigation_options[] = { |
1197 |
++ { "off", SPECTRE_V2_CMD_NONE, false }, |
1198 |
++ { "on", SPECTRE_V2_CMD_FORCE, true }, |
1199 |
++ { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, |
1200 |
++ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, |
1201 |
++ { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, |
1202 |
++ { "auto", SPECTRE_V2_CMD_AUTO, false }, |
1203 |
++}; |
1204 |
++ |
1205 |
+ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) |
1206 |
+ { |
1207 |
+ char arg[20]; |
1208 |
+- int ret; |
1209 |
+- |
1210 |
+- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, |
1211 |
+- sizeof(arg)); |
1212 |
+- if (ret > 0) { |
1213 |
+- if (match_option(arg, ret, "off")) { |
1214 |
+- goto disable; |
1215 |
+- } else if (match_option(arg, ret, "on")) { |
1216 |
+- spec2_print_if_secure("force enabled on command line."); |
1217 |
+- return SPECTRE_V2_CMD_FORCE; |
1218 |
+- } else if (match_option(arg, ret, "retpoline")) { |
1219 |
+- spec2_print_if_insecure("retpoline selected on command line."); |
1220 |
+- return SPECTRE_V2_CMD_RETPOLINE; |
1221 |
+- } else if (match_option(arg, ret, "retpoline,amd")) { |
1222 |
+- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { |
1223 |
+- pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); |
1224 |
+- return SPECTRE_V2_CMD_AUTO; |
1225 |
+- } |
1226 |
+- spec2_print_if_insecure("AMD retpoline selected on command line."); |
1227 |
+- return SPECTRE_V2_CMD_RETPOLINE_AMD; |
1228 |
+- } else if (match_option(arg, ret, "retpoline,generic")) { |
1229 |
+- spec2_print_if_insecure("generic retpoline selected on command line."); |
1230 |
+- return SPECTRE_V2_CMD_RETPOLINE_GENERIC; |
1231 |
+- } else if (match_option(arg, ret, "auto")) { |
1232 |
++ int ret, i; |
1233 |
++ enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; |
1234 |
++ |
1235 |
++ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) |
1236 |
++ return SPECTRE_V2_CMD_NONE; |
1237 |
++ else { |
1238 |
++ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, |
1239 |
++ sizeof(arg)); |
1240 |
++ if (ret < 0) |
1241 |
++ return SPECTRE_V2_CMD_AUTO; |
1242 |
++ |
1243 |
++ for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { |
1244 |
++ if (!match_option(arg, ret, mitigation_options[i].option)) |
1245 |
++ continue; |
1246 |
++ cmd = mitigation_options[i].cmd; |
1247 |
++ break; |
1248 |
++ } |
1249 |
++ |
1250 |
++ if (i >= ARRAY_SIZE(mitigation_options)) { |
1251 |
++ pr_err("unknown option (%s). Switching to AUTO select\n", |
1252 |
++ mitigation_options[i].option); |
1253 |
+ return SPECTRE_V2_CMD_AUTO; |
1254 |
+ } |
1255 |
+ } |
1256 |
+ |
1257 |
+- if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) |
1258 |
++ if ((cmd == SPECTRE_V2_CMD_RETPOLINE || |
1259 |
++ cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || |
1260 |
++ cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && |
1261 |
++ !IS_ENABLED(CONFIG_RETPOLINE)) { |
1262 |
++ pr_err("%s selected but not compiled in. Switching to AUTO select\n", |
1263 |
++ mitigation_options[i].option); |
1264 |
+ return SPECTRE_V2_CMD_AUTO; |
1265 |
+-disable: |
1266 |
+- spec2_print_if_insecure("disabled on command line."); |
1267 |
+- return SPECTRE_V2_CMD_NONE; |
1268 |
++ } |
1269 |
++ |
1270 |
++ if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && |
1271 |
++ boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { |
1272 |
++ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); |
1273 |
++ return SPECTRE_V2_CMD_AUTO; |
1274 |
++ } |
1275 |
++ |
1276 |
++ if (mitigation_options[i].secure) |
1277 |
++ spec2_print_if_secure(mitigation_options[i].option); |
1278 |
++ else |
1279 |
++ spec2_print_if_insecure(mitigation_options[i].option); |
1280 |
++ |
1281 |
++ return cmd; |
1282 |
+ } |
1283 |
+ |
1284 |
+ /* Check for Skylake-like CPUs (for RSB handling) */ |
1285 |
+@@ -191,10 +239,10 @@ static void __init spectre_v2_select_mitigation(void) |
1286 |
+ return; |
1287 |
+ |
1288 |
+ case SPECTRE_V2_CMD_FORCE: |
1289 |
+- /* FALLTRHU */ |
1290 |
+ case SPECTRE_V2_CMD_AUTO: |
1291 |
+- goto retpoline_auto; |
1292 |
+- |
1293 |
++ if (IS_ENABLED(CONFIG_RETPOLINE)) |
1294 |
++ goto retpoline_auto; |
1295 |
++ break; |
1296 |
+ case SPECTRE_V2_CMD_RETPOLINE_AMD: |
1297 |
+ if (IS_ENABLED(CONFIG_RETPOLINE)) |
1298 |
+ goto retpoline_amd; |
1299 |
+@@ -249,6 +297,12 @@ static void __init spectre_v2_select_mitigation(void) |
1300 |
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); |
1301 |
+ pr_info("Filling RSB on context switch\n"); |
1302 |
+ } |
1303 |
++ |
1304 |
++ /* Initialize Indirect Branch Prediction Barrier if supported */ |
1305 |
++ if (boot_cpu_has(X86_FEATURE_IBPB)) { |
1306 |
++ setup_force_cpu_cap(X86_FEATURE_USE_IBPB); |
1307 |
++ pr_info("Enabling Indirect Branch Prediction Barrier\n"); |
1308 |
++ } |
1309 |
+ } |
1310 |
+ |
1311 |
+ #undef pr_fmt |
1312 |
+@@ -269,7 +323,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev, |
1313 |
+ { |
1314 |
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) |
1315 |
+ return sprintf(buf, "Not affected\n"); |
1316 |
+- return sprintf(buf, "Vulnerable\n"); |
1317 |
++ return sprintf(buf, "Mitigation: __user pointer sanitization\n"); |
1318 |
+ } |
1319 |
+ |
1320 |
+ ssize_t cpu_show_spectre_v2(struct device *dev, |
1321 |
+@@ -278,6 +332,14 @@ ssize_t cpu_show_spectre_v2(struct device *dev, |
1322 |
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) |
1323 |
+ return sprintf(buf, "Not affected\n"); |
1324 |
+ |
1325 |
+- return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); |
1326 |
++ return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], |
1327 |
++ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", |
1328 |
++ spectre_v2_module_string()); |
1329 |
+ } |
1330 |
+ #endif |
1331 |
++ |
1332 |
++void __ibp_barrier(void) |
1333 |
++{ |
1334 |
++ __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0); |
1335 |
++} |
1336 |
++EXPORT_SYMBOL_GPL(__ibp_barrier); |
1337 |
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c |
1338 |
+index 372ba3fb400f..92b66e21bae5 100644 |
1339 |
+--- a/arch/x86/kernel/cpu/common.c |
1340 |
++++ b/arch/x86/kernel/cpu/common.c |
1341 |
+@@ -47,6 +47,8 @@ |
1342 |
+ #include <asm/pat.h> |
1343 |
+ #include <asm/microcode.h> |
1344 |
+ #include <asm/microcode_intel.h> |
1345 |
++#include <asm/intel-family.h> |
1346 |
++#include <asm/cpu_device_id.h> |
1347 |
+ |
1348 |
+ #ifdef CONFIG_X86_LOCAL_APIC |
1349 |
+ #include <asm/uv/uv.h> |
1350 |
+@@ -724,6 +726,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) |
1351 |
+ } |
1352 |
+ } |
1353 |
+ |
1354 |
++static void init_speculation_control(struct cpuinfo_x86 *c) |
1355 |
++{ |
1356 |
++ /* |
1357 |
++ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, |
1358 |
++ * and they also have a different bit for STIBP support. Also, |
1359 |
++ * a hypervisor might have set the individual AMD bits even on |
1360 |
++ * Intel CPUs, for finer-grained selection of what's available. |
1361 |
++ * |
1362 |
++ * We use the AMD bits in 0x8000_0008 EBX as the generic hardware |
1363 |
++ * features, which are visible in /proc/cpuinfo and used by the |
1364 |
++ * kernel. So set those accordingly from the Intel bits. |
1365 |
++ */ |
1366 |
++ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { |
1367 |
++ set_cpu_cap(c, X86_FEATURE_IBRS); |
1368 |
++ set_cpu_cap(c, X86_FEATURE_IBPB); |
1369 |
++ } |
1370 |
++ if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) |
1371 |
++ set_cpu_cap(c, X86_FEATURE_STIBP); |
1372 |
++} |
1373 |
++ |
1374 |
+ void get_cpu_cap(struct cpuinfo_x86 *c) |
1375 |
+ { |
1376 |
+ u32 eax, ebx, ecx, edx; |
1377 |
+@@ -745,6 +767,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) |
1378 |
+ cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); |
1379 |
+ c->x86_capability[CPUID_7_0_EBX] = ebx; |
1380 |
+ c->x86_capability[CPUID_7_ECX] = ecx; |
1381 |
++ c->x86_capability[CPUID_7_EDX] = edx; |
1382 |
+ } |
1383 |
+ |
1384 |
+ /* Extended state features: level 0x0000000d */ |
1385 |
+@@ -817,6 +840,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) |
1386 |
+ c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); |
1387 |
+ |
1388 |
+ init_scattered_cpuid_features(c); |
1389 |
++ init_speculation_control(c); |
1390 |
+ |
1391 |
+ /* |
1392 |
+ * Clear/Set all flags overridden by options, after probe. |
1393 |
+@@ -852,6 +876,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) |
1394 |
+ #endif |
1395 |
+ } |
1396 |
+ |
1397 |
++static const __initconst struct x86_cpu_id cpu_no_speculation[] = { |
1398 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, |
1399 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, |
1400 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, |
1401 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, |
1402 |
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, |
1403 |
++ { X86_VENDOR_CENTAUR, 5 }, |
1404 |
++ { X86_VENDOR_INTEL, 5 }, |
1405 |
++ { X86_VENDOR_NSC, 5 }, |
1406 |
++ { X86_VENDOR_ANY, 4 }, |
1407 |
++ {} |
1408 |
++}; |
1409 |
++ |
1410 |
++static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { |
1411 |
++ { X86_VENDOR_AMD }, |
1412 |
++ {} |
1413 |
++}; |
1414 |
++ |
1415 |
++static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) |
1416 |
++{ |
1417 |
++ u64 ia32_cap = 0; |
1418 |
++ |
1419 |
++ if (x86_match_cpu(cpu_no_meltdown)) |
1420 |
++ return false; |
1421 |
++ |
1422 |
++ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) |
1423 |
++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); |
1424 |
++ |
1425 |
++ /* Rogue Data Cache Load? No! */ |
1426 |
++ if (ia32_cap & ARCH_CAP_RDCL_NO) |
1427 |
++ return false; |
1428 |
++ |
1429 |
++ return true; |
1430 |
++} |
1431 |
++ |
1432 |
+ /* |
1433 |
+ * Do minimum CPU detection early. |
1434 |
+ * Fields really needed: vendor, cpuid_level, family, model, mask, |
1435 |
+@@ -899,11 +958,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) |
1436 |
+ |
1437 |
+ setup_force_cpu_cap(X86_FEATURE_ALWAYS); |
1438 |
+ |
1439 |
+- if (c->x86_vendor != X86_VENDOR_AMD) |
1440 |
+- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); |
1441 |
+- |
1442 |
+- setup_force_cpu_bug(X86_BUG_SPECTRE_V1); |
1443 |
+- setup_force_cpu_bug(X86_BUG_SPECTRE_V2); |
1444 |
++ if (!x86_match_cpu(cpu_no_speculation)) { |
1445 |
++ if (cpu_vulnerable_to_meltdown(c)) |
1446 |
++ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); |
1447 |
++ setup_force_cpu_bug(X86_BUG_SPECTRE_V1); |
1448 |
++ setup_force_cpu_bug(X86_BUG_SPECTRE_V2); |
1449 |
++ } |
1450 |
+ |
1451 |
+ fpu__init_system(c); |
1452 |
+ |
1453 |
+diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c |
1454 |
+index b720dacac051..4cf4f8cbc69d 100644 |
1455 |
+--- a/arch/x86/kernel/cpu/intel.c |
1456 |
++++ b/arch/x86/kernel/cpu/intel.c |
1457 |
+@@ -102,6 +102,59 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) |
1458 |
+ ELF_HWCAP2 |= HWCAP2_RING3MWAIT; |
1459 |
+ } |
1460 |
+ |
1461 |
++/* |
1462 |
++ * Early microcode releases for the Spectre v2 mitigation were broken. |
1463 |
++ * Information taken from; |
1464 |
++ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf |
1465 |
++ * - https://kb.vmware.com/s/article/52345 |
1466 |
++ * - Microcode revisions observed in the wild |
1467 |
++ * - Release note from 20180108 microcode release |
1468 |
++ */ |
1469 |
++struct sku_microcode { |
1470 |
++ u8 model; |
1471 |
++ u8 stepping; |
1472 |
++ u32 microcode; |
1473 |
++}; |
1474 |
++static const struct sku_microcode spectre_bad_microcodes[] = { |
1475 |
++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, |
1476 |
++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, |
1477 |
++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, |
1478 |
++ { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, |
1479 |
++ { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, |
1480 |
++ { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, |
1481 |
++ { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, |
1482 |
++ { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, |
1483 |
++ { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, |
1484 |
++ { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, |
1485 |
++ { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, |
1486 |
++ { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, |
1487 |
++ { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 }, |
1488 |
++ { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, |
1489 |
++ { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 }, |
1490 |
++ { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 }, |
1491 |
++ { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 }, |
1492 |
++ { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, |
1493 |
++ { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, |
1494 |
++ { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, |
1495 |
++ /* Updated in the 20180108 release; blacklist until we know otherwise */ |
1496 |
++ { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, |
1497 |
++ /* Observed in the wild */ |
1498 |
++ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, |
1499 |
++ { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, |
1500 |
++}; |
1501 |
++ |
1502 |
++static bool bad_spectre_microcode(struct cpuinfo_x86 *c) |
1503 |
++{ |
1504 |
++ int i; |
1505 |
++ |
1506 |
++ for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { |
1507 |
++ if (c->x86_model == spectre_bad_microcodes[i].model && |
1508 |
++ c->x86_mask == spectre_bad_microcodes[i].stepping) |
1509 |
++ return (c->microcode <= spectre_bad_microcodes[i].microcode); |
1510 |
++ } |
1511 |
++ return false; |
1512 |
++} |
1513 |
++ |
1514 |
+ static void early_init_intel(struct cpuinfo_x86 *c) |
1515 |
+ { |
1516 |
+ u64 misc_enable; |
1517 |
+@@ -122,6 +175,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) |
1518 |
+ if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) |
1519 |
+ c->microcode = intel_get_microcode_revision(); |
1520 |
+ |
1521 |
++ /* Now if any of them are set, check the blacklist and clear the lot */ |
1522 |
++ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || |
1523 |
++ cpu_has(c, X86_FEATURE_INTEL_STIBP) || |
1524 |
++ cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || |
1525 |
++ cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { |
1526 |
++ pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); |
1527 |
++ setup_clear_cpu_cap(X86_FEATURE_IBRS); |
1528 |
++ setup_clear_cpu_cap(X86_FEATURE_IBPB); |
1529 |
++ setup_clear_cpu_cap(X86_FEATURE_STIBP); |
1530 |
++ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); |
1531 |
++ setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); |
1532 |
++ } |
1533 |
++ |
1534 |
+ /* |
1535 |
+ * Atom erratum AAE44/AAF40/AAG38/AAH41: |
1536 |
+ * |
1537 |
+diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c |
1538 |
+index d0e69769abfd..df11f5d604be 100644 |
1539 |
+--- a/arch/x86/kernel/cpu/scattered.c |
1540 |
++++ b/arch/x86/kernel/cpu/scattered.c |
1541 |
+@@ -21,8 +21,6 @@ struct cpuid_bit { |
1542 |
+ static const struct cpuid_bit cpuid_bits[] = { |
1543 |
+ { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, |
1544 |
+ { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, |
1545 |
+- { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, |
1546 |
+- { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, |
1547 |
+ { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, |
1548 |
+ { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, |
1549 |
+ { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, |
1550 |
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c |
1551 |
+index c75466232016..9eb448c7859d 100644 |
1552 |
+--- a/arch/x86/kernel/process_64.c |
1553 |
++++ b/arch/x86/kernel/process_64.c |
1554 |
+@@ -557,7 +557,7 @@ static void __set_personality_x32(void) |
1555 |
+ * Pretend to come from a x32 execve. |
1556 |
+ */ |
1557 |
+ task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT; |
1558 |
+- current->thread.status &= ~TS_COMPAT; |
1559 |
++ current_thread_info()->status &= ~TS_COMPAT; |
1560 |
+ #endif |
1561 |
+ } |
1562 |
+ |
1563 |
+@@ -571,7 +571,7 @@ static void __set_personality_ia32(void) |
1564 |
+ current->personality |= force_personality32; |
1565 |
+ /* Prepare the first "return" to user space */ |
1566 |
+ task_pt_regs(current)->orig_ax = __NR_ia32_execve; |
1567 |
+- current->thread.status |= TS_COMPAT; |
1568 |
++ current_thread_info()->status |= TS_COMPAT; |
1569 |
+ #endif |
1570 |
+ } |
1571 |
+ |
1572 |
+diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c |
1573 |
+index f37d18124648..ed5c4cdf0a34 100644 |
1574 |
+--- a/arch/x86/kernel/ptrace.c |
1575 |
++++ b/arch/x86/kernel/ptrace.c |
1576 |
+@@ -935,7 +935,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) |
1577 |
+ */ |
1578 |
+ regs->orig_ax = value; |
1579 |
+ if (syscall_get_nr(child, regs) >= 0) |
1580 |
+- child->thread.status |= TS_I386_REGS_POKED; |
1581 |
++ child->thread_info.status |= TS_I386_REGS_POKED; |
1582 |
+ break; |
1583 |
+ |
1584 |
+ case offsetof(struct user32, regs.eflags): |
1585 |
+diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c |
1586 |
+index b9e00e8f1c9b..4cdc0b27ec82 100644 |
1587 |
+--- a/arch/x86/kernel/signal.c |
1588 |
++++ b/arch/x86/kernel/signal.c |
1589 |
+@@ -787,7 +787,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) |
1590 |
+ * than the tracee. |
1591 |
+ */ |
1592 |
+ #ifdef CONFIG_IA32_EMULATION |
1593 |
+- if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) |
1594 |
++ if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) |
1595 |
+ return __NR_ia32_restart_syscall; |
1596 |
+ #endif |
1597 |
+ #ifdef CONFIG_X86_X32_ABI |
1598 |
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c |
1599 |
+index 0099e10eb045..13f5d4217e4f 100644 |
1600 |
+--- a/arch/x86/kvm/cpuid.c |
1601 |
++++ b/arch/x86/kvm/cpuid.c |
1602 |
+@@ -67,9 +67,7 @@ u64 kvm_supported_xcr0(void) |
1603 |
+ |
1604 |
+ #define F(x) bit(X86_FEATURE_##x) |
1605 |
+ |
1606 |
+-/* These are scattered features in cpufeatures.h. */ |
1607 |
+-#define KVM_CPUID_BIT_AVX512_4VNNIW 2 |
1608 |
+-#define KVM_CPUID_BIT_AVX512_4FMAPS 3 |
1609 |
++/* For scattered features from cpufeatures.h; we currently expose none */ |
1610 |
+ #define KF(x) bit(KVM_CPUID_BIT_##x) |
1611 |
+ |
1612 |
+ int kvm_update_cpuid(struct kvm_vcpu *vcpu) |
1613 |
+@@ -367,6 +365,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
1614 |
+ F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | |
1615 |
+ 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); |
1616 |
+ |
1617 |
++ /* cpuid 0x80000008.ebx */ |
1618 |
++ const u32 kvm_cpuid_8000_0008_ebx_x86_features = |
1619 |
++ F(IBPB) | F(IBRS); |
1620 |
++ |
1621 |
+ /* cpuid 0xC0000001.edx */ |
1622 |
+ const u32 kvm_cpuid_C000_0001_edx_x86_features = |
1623 |
+ F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | |
1624 |
+@@ -392,7 +394,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
1625 |
+ |
1626 |
+ /* cpuid 7.0.edx*/ |
1627 |
+ const u32 kvm_cpuid_7_0_edx_x86_features = |
1628 |
+- KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); |
1629 |
++ F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | |
1630 |
++ F(ARCH_CAPABILITIES); |
1631 |
+ |
1632 |
+ /* all calls to cpuid_count() should be made on the same cpu */ |
1633 |
+ get_cpu(); |
1634 |
+@@ -477,7 +480,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
1635 |
+ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) |
1636 |
+ entry->ecx &= ~F(PKU); |
1637 |
+ entry->edx &= kvm_cpuid_7_0_edx_x86_features; |
1638 |
+- entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX); |
1639 |
++ cpuid_mask(&entry->edx, CPUID_7_EDX); |
1640 |
+ } else { |
1641 |
+ entry->ebx = 0; |
1642 |
+ entry->ecx = 0; |
1643 |
+@@ -627,7 +630,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
1644 |
+ if (!g_phys_as) |
1645 |
+ g_phys_as = phys_as; |
1646 |
+ entry->eax = g_phys_as | (virt_as << 8); |
1647 |
+- entry->ebx = entry->edx = 0; |
1648 |
++ entry->edx = 0; |
1649 |
++ /* IBRS and IBPB aren't necessarily present in hardware cpuid */ |
1650 |
++ if (boot_cpu_has(X86_FEATURE_IBPB)) |
1651 |
++ entry->ebx |= F(IBPB); |
1652 |
++ if (boot_cpu_has(X86_FEATURE_IBRS)) |
1653 |
++ entry->ebx |= F(IBRS); |
1654 |
++ entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; |
1655 |
++ cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); |
1656 |
+ break; |
1657 |
+ } |
1658 |
+ case 0x80000019: |
1659 |
+diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h |
1660 |
+index c2cea6651279..9a327d5b6d1f 100644 |
1661 |
+--- a/arch/x86/kvm/cpuid.h |
1662 |
++++ b/arch/x86/kvm/cpuid.h |
1663 |
+@@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { |
1664 |
+ [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, |
1665 |
+ [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, |
1666 |
+ [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, |
1667 |
++ [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, |
1668 |
+ }; |
1669 |
+ |
1670 |
+ static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) |
1671 |
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c |
1672 |
+index eca6a89f2326..fab073b19528 100644 |
1673 |
+--- a/arch/x86/kvm/emulate.c |
1674 |
++++ b/arch/x86/kvm/emulate.c |
1675 |
+@@ -25,6 +25,7 @@ |
1676 |
+ #include <asm/kvm_emulate.h> |
1677 |
+ #include <linux/stringify.h> |
1678 |
+ #include <asm/debugreg.h> |
1679 |
++#include <asm/nospec-branch.h> |
1680 |
+ |
1681 |
+ #include "x86.h" |
1682 |
+ #include "tss.h" |
1683 |
+@@ -1021,8 +1022,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) |
1684 |
+ void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); |
1685 |
+ |
1686 |
+ flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; |
1687 |
+- asm("push %[flags]; popf; call *%[fastop]" |
1688 |
+- : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); |
1689 |
++ asm("push %[flags]; popf; " CALL_NOSPEC |
1690 |
++ : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags)); |
1691 |
+ return rc; |
1692 |
+ } |
1693 |
+ |
1694 |
+@@ -5350,9 +5351,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) |
1695 |
+ if (!(ctxt->d & ByteOp)) |
1696 |
+ fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; |
1697 |
+ |
1698 |
+- asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" |
1699 |
++ asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" |
1700 |
+ : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), |
1701 |
+- [fastop]"+S"(fop), ASM_CALL_CONSTRAINT |
1702 |
++ [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT |
1703 |
+ : "c"(ctxt->src2.val)); |
1704 |
+ |
1705 |
+ ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); |
1706 |
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c |
1707 |
+index 6a8284f72328..e0bc3ad0f6cd 100644 |
1708 |
+--- a/arch/x86/kvm/svm.c |
1709 |
++++ b/arch/x86/kvm/svm.c |
1710 |
+@@ -184,6 +184,8 @@ struct vcpu_svm { |
1711 |
+ u64 gs_base; |
1712 |
+ } host; |
1713 |
+ |
1714 |
++ u64 spec_ctrl; |
1715 |
++ |
1716 |
+ u32 *msrpm; |
1717 |
+ |
1718 |
+ ulong nmi_iret_rip; |
1719 |
+@@ -249,6 +251,8 @@ static const struct svm_direct_access_msrs { |
1720 |
+ { .index = MSR_CSTAR, .always = true }, |
1721 |
+ { .index = MSR_SYSCALL_MASK, .always = true }, |
1722 |
+ #endif |
1723 |
++ { .index = MSR_IA32_SPEC_CTRL, .always = false }, |
1724 |
++ { .index = MSR_IA32_PRED_CMD, .always = false }, |
1725 |
+ { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, |
1726 |
+ { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, |
1727 |
+ { .index = MSR_IA32_LASTINTFROMIP, .always = false }, |
1728 |
+@@ -529,6 +533,7 @@ struct svm_cpu_data { |
1729 |
+ struct kvm_ldttss_desc *tss_desc; |
1730 |
+ |
1731 |
+ struct page *save_area; |
1732 |
++ struct vmcb *current_vmcb; |
1733 |
+ }; |
1734 |
+ |
1735 |
+ static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); |
1736 |
+@@ -880,6 +885,25 @@ static bool valid_msr_intercept(u32 index) |
1737 |
+ return false; |
1738 |
+ } |
1739 |
+ |
1740 |
++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) |
1741 |
++{ |
1742 |
++ u8 bit_write; |
1743 |
++ unsigned long tmp; |
1744 |
++ u32 offset; |
1745 |
++ u32 *msrpm; |
1746 |
++ |
1747 |
++ msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: |
1748 |
++ to_svm(vcpu)->msrpm; |
1749 |
++ |
1750 |
++ offset = svm_msrpm_offset(msr); |
1751 |
++ bit_write = 2 * (msr & 0x0f) + 1; |
1752 |
++ tmp = msrpm[offset]; |
1753 |
++ |
1754 |
++ BUG_ON(offset == MSR_INVALID); |
1755 |
++ |
1756 |
++ return !!test_bit(bit_write, &tmp); |
1757 |
++} |
1758 |
++ |
1759 |
+ static void set_msr_interception(u32 *msrpm, unsigned msr, |
1760 |
+ int read, int write) |
1761 |
+ { |
1762 |
+@@ -1585,6 +1609,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) |
1763 |
+ u32 dummy; |
1764 |
+ u32 eax = 1; |
1765 |
+ |
1766 |
++ svm->spec_ctrl = 0; |
1767 |
++ |
1768 |
+ if (!init_event) { |
1769 |
+ svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | |
1770 |
+ MSR_IA32_APICBASE_ENABLE; |
1771 |
+@@ -1706,11 +1732,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) |
1772 |
+ __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); |
1773 |
+ kvm_vcpu_uninit(vcpu); |
1774 |
+ kmem_cache_free(kvm_vcpu_cache, svm); |
1775 |
++ /* |
1776 |
++ * The vmcb page can be recycled, causing a false negative in |
1777 |
++ * svm_vcpu_load(). So do a full IBPB now. |
1778 |
++ */ |
1779 |
++ indirect_branch_prediction_barrier(); |
1780 |
+ } |
1781 |
+ |
1782 |
+ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
1783 |
+ { |
1784 |
+ struct vcpu_svm *svm = to_svm(vcpu); |
1785 |
++ struct svm_cpu_data *sd = per_cpu(svm_data, cpu); |
1786 |
+ int i; |
1787 |
+ |
1788 |
+ if (unlikely(cpu != vcpu->cpu)) { |
1789 |
+@@ -1739,6 +1771,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
1790 |
+ if (static_cpu_has(X86_FEATURE_RDTSCP)) |
1791 |
+ wrmsrl(MSR_TSC_AUX, svm->tsc_aux); |
1792 |
+ |
1793 |
++ if (sd->current_vmcb != svm->vmcb) { |
1794 |
++ sd->current_vmcb = svm->vmcb; |
1795 |
++ indirect_branch_prediction_barrier(); |
1796 |
++ } |
1797 |
+ avic_vcpu_load(vcpu, cpu); |
1798 |
+ } |
1799 |
+ |
1800 |
+@@ -3579,6 +3615,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
1801 |
+ case MSR_VM_CR: |
1802 |
+ msr_info->data = svm->nested.vm_cr_msr; |
1803 |
+ break; |
1804 |
++ case MSR_IA32_SPEC_CTRL: |
1805 |
++ if (!msr_info->host_initiated && |
1806 |
++ !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) |
1807 |
++ return 1; |
1808 |
++ |
1809 |
++ msr_info->data = svm->spec_ctrl; |
1810 |
++ break; |
1811 |
+ case MSR_IA32_UCODE_REV: |
1812 |
+ msr_info->data = 0x01000065; |
1813 |
+ break; |
1814 |
+@@ -3670,6 +3713,49 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) |
1815 |
+ case MSR_IA32_TSC: |
1816 |
+ kvm_write_tsc(vcpu, msr); |
1817 |
+ break; |
1818 |
++ case MSR_IA32_SPEC_CTRL: |
1819 |
++ if (!msr->host_initiated && |
1820 |
++ !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) |
1821 |
++ return 1; |
1822 |
++ |
1823 |
++ /* The STIBP bit doesn't fault even if it's not advertised */ |
1824 |
++ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) |
1825 |
++ return 1; |
1826 |
++ |
1827 |
++ svm->spec_ctrl = data; |
1828 |
++ |
1829 |
++ if (!data) |
1830 |
++ break; |
1831 |
++ |
1832 |
++ /* |
1833 |
++ * For non-nested: |
1834 |
++ * When it's written (to non-zero) for the first time, pass |
1835 |
++ * it through. |
1836 |
++ * |
1837 |
++ * For nested: |
1838 |
++ * The handling of the MSR bitmap for L2 guests is done in |
1839 |
++ * nested_svm_vmrun_msrpm. |
1840 |
++ * We update the L1 MSR bit as well since it will end up |
1841 |
++ * touching the MSR anyway now. |
1842 |
++ */ |
1843 |
++ set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); |
1844 |
++ break; |
1845 |
++ case MSR_IA32_PRED_CMD: |
1846 |
++ if (!msr->host_initiated && |
1847 |
++ !guest_cpuid_has(vcpu, X86_FEATURE_IBPB)) |
1848 |
++ return 1; |
1849 |
++ |
1850 |
++ if (data & ~PRED_CMD_IBPB) |
1851 |
++ return 1; |
1852 |
++ |
1853 |
++ if (!data) |
1854 |
++ break; |
1855 |
++ |
1856 |
++ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); |
1857 |
++ if (is_guest_mode(vcpu)) |
1858 |
++ break; |
1859 |
++ set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); |
1860 |
++ break; |
1861 |
+ case MSR_STAR: |
1862 |
+ svm->vmcb->save.star = data; |
1863 |
+ break; |
1864 |
+@@ -4922,6 +5008,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) |
1865 |
+ |
1866 |
+ local_irq_enable(); |
1867 |
+ |
1868 |
++ /* |
1869 |
++ * If this vCPU has touched SPEC_CTRL, restore the guest's value if |
1870 |
++ * it's non-zero. Since vmentry is serialising on affected CPUs, there |
1871 |
++ * is no need to worry about the conditional branch over the wrmsr |
1872 |
++ * being speculatively taken. |
1873 |
++ */ |
1874 |
++ if (svm->spec_ctrl) |
1875 |
++ wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); |
1876 |
++ |
1877 |
+ asm volatile ( |
1878 |
+ "push %%" _ASM_BP "; \n\t" |
1879 |
+ "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" |
1880 |
+@@ -5014,6 +5109,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) |
1881 |
+ #endif |
1882 |
+ ); |
1883 |
+ |
1884 |
++ /* |
1885 |
++ * We do not use IBRS in the kernel. If this vCPU has used the |
1886 |
++ * SPEC_CTRL MSR it may have left it on; save the value and |
1887 |
++ * turn it off. This is much more efficient than blindly adding |
1888 |
++ * it to the atomic save/restore list. Especially as the former |
1889 |
++ * (Saving guest MSRs on vmexit) doesn't even exist in KVM. |
1890 |
++ * |
1891 |
++ * For non-nested case: |
1892 |
++ * If the L01 MSR bitmap does not intercept the MSR, then we need to |
1893 |
++ * save it. |
1894 |
++ * |
1895 |
++ * For nested case: |
1896 |
++ * If the L02 MSR bitmap does not intercept the MSR, then we need to |
1897 |
++ * save it. |
1898 |
++ */ |
1899 |
++ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) |
1900 |
++ rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); |
1901 |
++ |
1902 |
++ if (svm->spec_ctrl) |
1903 |
++ wrmsrl(MSR_IA32_SPEC_CTRL, 0); |
1904 |
++ |
1905 |
+ /* Eliminate branch target predictions from guest mode */ |
1906 |
+ vmexit_fill_RSB(); |
1907 |
+ |
1908 |
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c |
1909 |
+index a45063a9219c..0ae4b1a86168 100644 |
1910 |
+--- a/arch/x86/kvm/vmx.c |
1911 |
++++ b/arch/x86/kvm/vmx.c |
1912 |
+@@ -34,6 +34,7 @@ |
1913 |
+ #include <linux/tboot.h> |
1914 |
+ #include <linux/hrtimer.h> |
1915 |
+ #include <linux/frame.h> |
1916 |
++#include <linux/nospec.h> |
1917 |
+ #include "kvm_cache_regs.h" |
1918 |
+ #include "x86.h" |
1919 |
+ |
1920 |
+@@ -108,6 +109,14 @@ static u64 __read_mostly host_xss; |
1921 |
+ static bool __read_mostly enable_pml = 1; |
1922 |
+ module_param_named(pml, enable_pml, bool, S_IRUGO); |
1923 |
+ |
1924 |
++#define MSR_TYPE_R 1 |
1925 |
++#define MSR_TYPE_W 2 |
1926 |
++#define MSR_TYPE_RW 3 |
1927 |
++ |
1928 |
++#define MSR_BITMAP_MODE_X2APIC 1 |
1929 |
++#define MSR_BITMAP_MODE_X2APIC_APICV 2 |
1930 |
++#define MSR_BITMAP_MODE_LM 4 |
1931 |
++ |
1932 |
+ #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL |
1933 |
+ |
1934 |
+ /* Guest_tsc -> host_tsc conversion requires 64-bit division. */ |
1935 |
+@@ -182,7 +191,6 @@ module_param(ple_window_max, int, S_IRUGO); |
1936 |
+ extern const ulong vmx_return; |
1937 |
+ |
1938 |
+ #define NR_AUTOLOAD_MSRS 8 |
1939 |
+-#define VMCS02_POOL_SIZE 1 |
1940 |
+ |
1941 |
+ struct vmcs { |
1942 |
+ u32 revision_id; |
1943 |
+@@ -207,6 +215,7 @@ struct loaded_vmcs { |
1944 |
+ int soft_vnmi_blocked; |
1945 |
+ ktime_t entry_time; |
1946 |
+ s64 vnmi_blocked_time; |
1947 |
++ unsigned long *msr_bitmap; |
1948 |
+ struct list_head loaded_vmcss_on_cpu_link; |
1949 |
+ }; |
1950 |
+ |
1951 |
+@@ -223,7 +232,7 @@ struct shared_msr_entry { |
1952 |
+ * stored in guest memory specified by VMPTRLD, but is opaque to the guest, |
1953 |
+ * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. |
1954 |
+ * More than one of these structures may exist, if L1 runs multiple L2 guests. |
1955 |
+- * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the |
1956 |
++ * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the |
1957 |
+ * underlying hardware which will be used to run L2. |
1958 |
+ * This structure is packed to ensure that its layout is identical across |
1959 |
+ * machines (necessary for live migration). |
1960 |
+@@ -406,13 +415,6 @@ struct __packed vmcs12 { |
1961 |
+ */ |
1962 |
+ #define VMCS12_SIZE 0x1000 |
1963 |
+ |
1964 |
+-/* Used to remember the last vmcs02 used for some recently used vmcs12s */ |
1965 |
+-struct vmcs02_list { |
1966 |
+- struct list_head list; |
1967 |
+- gpa_t vmptr; |
1968 |
+- struct loaded_vmcs vmcs02; |
1969 |
+-}; |
1970 |
+- |
1971 |
+ /* |
1972 |
+ * The nested_vmx structure is part of vcpu_vmx, and holds information we need |
1973 |
+ * for correct emulation of VMX (i.e., nested VMX) on this vcpu. |
1974 |
+@@ -437,15 +439,15 @@ struct nested_vmx { |
1975 |
+ */ |
1976 |
+ bool sync_shadow_vmcs; |
1977 |
+ |
1978 |
+- /* vmcs02_list cache of VMCSs recently used to run L2 guests */ |
1979 |
+- struct list_head vmcs02_pool; |
1980 |
+- int vmcs02_num; |
1981 |
+ bool change_vmcs01_virtual_x2apic_mode; |
1982 |
+ /* L2 must run next, and mustn't decide to exit to L1. */ |
1983 |
+ bool nested_run_pending; |
1984 |
++ |
1985 |
++ struct loaded_vmcs vmcs02; |
1986 |
++ |
1987 |
+ /* |
1988 |
+- * Guest pages referred to in vmcs02 with host-physical pointers, so |
1989 |
+- * we must keep them pinned while L2 runs. |
1990 |
++ * Guest pages referred to in the vmcs02 with host-physical |
1991 |
++ * pointers, so we must keep them pinned while L2 runs. |
1992 |
+ */ |
1993 |
+ struct page *apic_access_page; |
1994 |
+ struct page *virtual_apic_page; |
1995 |
+@@ -454,8 +456,6 @@ struct nested_vmx { |
1996 |
+ bool pi_pending; |
1997 |
+ u16 posted_intr_nv; |
1998 |
+ |
1999 |
+- unsigned long *msr_bitmap; |
2000 |
+- |
2001 |
+ struct hrtimer preemption_timer; |
2002 |
+ bool preemption_timer_expired; |
2003 |
+ |
2004 |
+@@ -570,6 +570,7 @@ struct vcpu_vmx { |
2005 |
+ struct kvm_vcpu vcpu; |
2006 |
+ unsigned long host_rsp; |
2007 |
+ u8 fail; |
2008 |
++ u8 msr_bitmap_mode; |
2009 |
+ u32 exit_intr_info; |
2010 |
+ u32 idt_vectoring_info; |
2011 |
+ ulong rflags; |
2012 |
+@@ -581,6 +582,10 @@ struct vcpu_vmx { |
2013 |
+ u64 msr_host_kernel_gs_base; |
2014 |
+ u64 msr_guest_kernel_gs_base; |
2015 |
+ #endif |
2016 |
++ |
2017 |
++ u64 arch_capabilities; |
2018 |
++ u64 spec_ctrl; |
2019 |
++ |
2020 |
+ u32 vm_entry_controls_shadow; |
2021 |
+ u32 vm_exit_controls_shadow; |
2022 |
+ u32 secondary_exec_control; |
2023 |
+@@ -887,21 +892,18 @@ static const unsigned short vmcs_field_to_offset_table[] = { |
2024 |
+ |
2025 |
+ static inline short vmcs_field_to_offset(unsigned long field) |
2026 |
+ { |
2027 |
+- BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); |
2028 |
++ const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table); |
2029 |
++ unsigned short offset; |
2030 |
+ |
2031 |
+- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) |
2032 |
++ BUILD_BUG_ON(size > SHRT_MAX); |
2033 |
++ if (field >= size) |
2034 |
+ return -ENOENT; |
2035 |
+ |
2036 |
+- /* |
2037 |
+- * FIXME: Mitigation for CVE-2017-5753. To be replaced with a |
2038 |
+- * generic mechanism. |
2039 |
+- */ |
2040 |
+- asm("lfence"); |
2041 |
+- |
2042 |
+- if (vmcs_field_to_offset_table[field] == 0) |
2043 |
++ field = array_index_nospec(field, size); |
2044 |
++ offset = vmcs_field_to_offset_table[field]; |
2045 |
++ if (offset == 0) |
2046 |
+ return -ENOENT; |
2047 |
+- |
2048 |
+- return vmcs_field_to_offset_table[field]; |
2049 |
++ return offset; |
2050 |
+ } |
2051 |
+ |
2052 |
+ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) |
2053 |
+@@ -927,6 +929,9 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); |
2054 |
+ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); |
2055 |
+ static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, |
2056 |
+ u16 error_code); |
2057 |
++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); |
2058 |
++static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, |
2059 |
++ u32 msr, int type); |
2060 |
+ |
2061 |
+ static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
2062 |
+ static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
2063 |
+@@ -946,12 +951,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); |
2064 |
+ enum { |
2065 |
+ VMX_IO_BITMAP_A, |
2066 |
+ VMX_IO_BITMAP_B, |
2067 |
+- VMX_MSR_BITMAP_LEGACY, |
2068 |
+- VMX_MSR_BITMAP_LONGMODE, |
2069 |
+- VMX_MSR_BITMAP_LEGACY_X2APIC_APICV, |
2070 |
+- VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV, |
2071 |
+- VMX_MSR_BITMAP_LEGACY_X2APIC, |
2072 |
+- VMX_MSR_BITMAP_LONGMODE_X2APIC, |
2073 |
+ VMX_VMREAD_BITMAP, |
2074 |
+ VMX_VMWRITE_BITMAP, |
2075 |
+ VMX_BITMAP_NR |
2076 |
+@@ -961,12 +960,6 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR]; |
2077 |
+ |
2078 |
+ #define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A]) |
2079 |
+ #define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B]) |
2080 |
+-#define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY]) |
2081 |
+-#define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE]) |
2082 |
+-#define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV]) |
2083 |
+-#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV]) |
2084 |
+-#define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC]) |
2085 |
+-#define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC]) |
2086 |
+ #define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP]) |
2087 |
+ #define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP]) |
2088 |
+ |
2089 |
+@@ -1913,6 +1906,52 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) |
2090 |
+ vmcs_write32(EXCEPTION_BITMAP, eb); |
2091 |
+ } |
2092 |
+ |
2093 |
++/* |
2094 |
++ * Check if MSR is intercepted for currently loaded MSR bitmap. |
2095 |
++ */ |
2096 |
++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) |
2097 |
++{ |
2098 |
++ unsigned long *msr_bitmap; |
2099 |
++ int f = sizeof(unsigned long); |
2100 |
++ |
2101 |
++ if (!cpu_has_vmx_msr_bitmap()) |
2102 |
++ return true; |
2103 |
++ |
2104 |
++ msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; |
2105 |
++ |
2106 |
++ if (msr <= 0x1fff) { |
2107 |
++ return !!test_bit(msr, msr_bitmap + 0x800 / f); |
2108 |
++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { |
2109 |
++ msr &= 0x1fff; |
2110 |
++ return !!test_bit(msr, msr_bitmap + 0xc00 / f); |
2111 |
++ } |
2112 |
++ |
2113 |
++ return true; |
2114 |
++} |
2115 |
++ |
2116 |
++/* |
2117 |
++ * Check if MSR is intercepted for L01 MSR bitmap. |
2118 |
++ */ |
2119 |
++static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) |
2120 |
++{ |
2121 |
++ unsigned long *msr_bitmap; |
2122 |
++ int f = sizeof(unsigned long); |
2123 |
++ |
2124 |
++ if (!cpu_has_vmx_msr_bitmap()) |
2125 |
++ return true; |
2126 |
++ |
2127 |
++ msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; |
2128 |
++ |
2129 |
++ if (msr <= 0x1fff) { |
2130 |
++ return !!test_bit(msr, msr_bitmap + 0x800 / f); |
2131 |
++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { |
2132 |
++ msr &= 0x1fff; |
2133 |
++ return !!test_bit(msr, msr_bitmap + 0xc00 / f); |
2134 |
++ } |
2135 |
++ |
2136 |
++ return true; |
2137 |
++} |
2138 |
++ |
2139 |
+ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, |
2140 |
+ unsigned long entry, unsigned long exit) |
2141 |
+ { |
2142 |
+@@ -2291,6 +2330,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
2143 |
+ if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { |
2144 |
+ per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; |
2145 |
+ vmcs_load(vmx->loaded_vmcs->vmcs); |
2146 |
++ indirect_branch_prediction_barrier(); |
2147 |
+ } |
2148 |
+ |
2149 |
+ if (!already_loaded) { |
2150 |
+@@ -2567,36 +2607,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) |
2151 |
+ vmx->guest_msrs[from] = tmp; |
2152 |
+ } |
2153 |
+ |
2154 |
+-static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) |
2155 |
+-{ |
2156 |
+- unsigned long *msr_bitmap; |
2157 |
+- |
2158 |
+- if (is_guest_mode(vcpu)) |
2159 |
+- msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; |
2160 |
+- else if (cpu_has_secondary_exec_ctrls() && |
2161 |
+- (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & |
2162 |
+- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { |
2163 |
+- if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) { |
2164 |
+- if (is_long_mode(vcpu)) |
2165 |
+- msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv; |
2166 |
+- else |
2167 |
+- msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv; |
2168 |
+- } else { |
2169 |
+- if (is_long_mode(vcpu)) |
2170 |
+- msr_bitmap = vmx_msr_bitmap_longmode_x2apic; |
2171 |
+- else |
2172 |
+- msr_bitmap = vmx_msr_bitmap_legacy_x2apic; |
2173 |
+- } |
2174 |
+- } else { |
2175 |
+- if (is_long_mode(vcpu)) |
2176 |
+- msr_bitmap = vmx_msr_bitmap_longmode; |
2177 |
+- else |
2178 |
+- msr_bitmap = vmx_msr_bitmap_legacy; |
2179 |
+- } |
2180 |
+- |
2181 |
+- vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); |
2182 |
+-} |
2183 |
+- |
2184 |
+ /* |
2185 |
+ * Set up the vmcs to automatically save and restore system |
2186 |
+ * msrs. Don't touch the 64-bit msrs if the guest is in legacy |
2187 |
+@@ -2637,7 +2647,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) |
2188 |
+ vmx->save_nmsrs = save_nmsrs; |
2189 |
+ |
2190 |
+ if (cpu_has_vmx_msr_bitmap()) |
2191 |
+- vmx_set_msr_bitmap(&vmx->vcpu); |
2192 |
++ vmx_update_msr_bitmap(&vmx->vcpu); |
2193 |
+ } |
2194 |
+ |
2195 |
+ /* |
2196 |
+@@ -3273,6 +3283,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
2197 |
+ case MSR_IA32_TSC: |
2198 |
+ msr_info->data = guest_read_tsc(vcpu); |
2199 |
+ break; |
2200 |
++ case MSR_IA32_SPEC_CTRL: |
2201 |
++ if (!msr_info->host_initiated && |
2202 |
++ !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && |
2203 |
++ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) |
2204 |
++ return 1; |
2205 |
++ |
2206 |
++ msr_info->data = to_vmx(vcpu)->spec_ctrl; |
2207 |
++ break; |
2208 |
++ case MSR_IA32_ARCH_CAPABILITIES: |
2209 |
++ if (!msr_info->host_initiated && |
2210 |
++ !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) |
2211 |
++ return 1; |
2212 |
++ msr_info->data = to_vmx(vcpu)->arch_capabilities; |
2213 |
++ break; |
2214 |
+ case MSR_IA32_SYSENTER_CS: |
2215 |
+ msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); |
2216 |
+ break; |
2217 |
+@@ -3380,6 +3404,70 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
2218 |
+ case MSR_IA32_TSC: |
2219 |
+ kvm_write_tsc(vcpu, msr_info); |
2220 |
+ break; |
2221 |
++ case MSR_IA32_SPEC_CTRL: |
2222 |
++ if (!msr_info->host_initiated && |
2223 |
++ !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && |
2224 |
++ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) |
2225 |
++ return 1; |
2226 |
++ |
2227 |
++ /* The STIBP bit doesn't fault even if it's not advertised */ |
2228 |
++ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) |
2229 |
++ return 1; |
2230 |
++ |
2231 |
++ vmx->spec_ctrl = data; |
2232 |
++ |
2233 |
++ if (!data) |
2234 |
++ break; |
2235 |
++ |
2236 |
++ /* |
2237 |
++ * For non-nested: |
2238 |
++ * When it's written (to non-zero) for the first time, pass |
2239 |
++ * it through. |
2240 |
++ * |
2241 |
++ * For nested: |
2242 |
++ * The handling of the MSR bitmap for L2 guests is done in |
2243 |
++ * nested_vmx_merge_msr_bitmap. We should not touch the |
2244 |
++ * vmcs02.msr_bitmap here since it gets completely overwritten |
2245 |
++ * in the merging. We update the vmcs01 here for L1 as well |
2246 |
++ * since it will end up touching the MSR anyway now. |
2247 |
++ */ |
2248 |
++ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, |
2249 |
++ MSR_IA32_SPEC_CTRL, |
2250 |
++ MSR_TYPE_RW); |
2251 |
++ break; |
2252 |
++ case MSR_IA32_PRED_CMD: |
2253 |
++ if (!msr_info->host_initiated && |
2254 |
++ !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && |
2255 |
++ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) |
2256 |
++ return 1; |
2257 |
++ |
2258 |
++ if (data & ~PRED_CMD_IBPB) |
2259 |
++ return 1; |
2260 |
++ |
2261 |
++ if (!data) |
2262 |
++ break; |
2263 |
++ |
2264 |
++ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); |
2265 |
++ |
2266 |
++ /* |
2267 |
++ * For non-nested: |
2268 |
++ * When it's written (to non-zero) for the first time, pass |
2269 |
++ * it through. |
2270 |
++ * |
2271 |
++ * For nested: |
2272 |
++ * The handling of the MSR bitmap for L2 guests is done in |
2273 |
++ * nested_vmx_merge_msr_bitmap. We should not touch the |
2274 |
++ * vmcs02.msr_bitmap here since it gets completely overwritten |
2275 |
++ * in the merging. |
2276 |
++ */ |
2277 |
++ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, |
2278 |
++ MSR_TYPE_W); |
2279 |
++ break; |
2280 |
++ case MSR_IA32_ARCH_CAPABILITIES: |
2281 |
++ if (!msr_info->host_initiated) |
2282 |
++ return 1; |
2283 |
++ vmx->arch_capabilities = data; |
2284 |
++ break; |
2285 |
+ case MSR_IA32_CR_PAT: |
2286 |
+ if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { |
2287 |
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) |
2288 |
+@@ -3822,11 +3910,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) |
2289 |
+ return vmcs; |
2290 |
+ } |
2291 |
+ |
2292 |
+-static struct vmcs *alloc_vmcs(void) |
2293 |
+-{ |
2294 |
+- return alloc_vmcs_cpu(raw_smp_processor_id()); |
2295 |
+-} |
2296 |
+- |
2297 |
+ static void free_vmcs(struct vmcs *vmcs) |
2298 |
+ { |
2299 |
+ free_pages((unsigned long)vmcs, vmcs_config.order); |
2300 |
+@@ -3842,9 +3925,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) |
2301 |
+ loaded_vmcs_clear(loaded_vmcs); |
2302 |
+ free_vmcs(loaded_vmcs->vmcs); |
2303 |
+ loaded_vmcs->vmcs = NULL; |
2304 |
++ if (loaded_vmcs->msr_bitmap) |
2305 |
++ free_page((unsigned long)loaded_vmcs->msr_bitmap); |
2306 |
+ WARN_ON(loaded_vmcs->shadow_vmcs != NULL); |
2307 |
+ } |
2308 |
+ |
2309 |
++static struct vmcs *alloc_vmcs(void) |
2310 |
++{ |
2311 |
++ return alloc_vmcs_cpu(raw_smp_processor_id()); |
2312 |
++} |
2313 |
++ |
2314 |
++static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) |
2315 |
++{ |
2316 |
++ loaded_vmcs->vmcs = alloc_vmcs(); |
2317 |
++ if (!loaded_vmcs->vmcs) |
2318 |
++ return -ENOMEM; |
2319 |
++ |
2320 |
++ loaded_vmcs->shadow_vmcs = NULL; |
2321 |
++ loaded_vmcs_init(loaded_vmcs); |
2322 |
++ |
2323 |
++ if (cpu_has_vmx_msr_bitmap()) { |
2324 |
++ loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); |
2325 |
++ if (!loaded_vmcs->msr_bitmap) |
2326 |
++ goto out_vmcs; |
2327 |
++ memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); |
2328 |
++ } |
2329 |
++ return 0; |
2330 |
++ |
2331 |
++out_vmcs: |
2332 |
++ free_loaded_vmcs(loaded_vmcs); |
2333 |
++ return -ENOMEM; |
2334 |
++} |
2335 |
++ |
2336 |
+ static void free_kvm_area(void) |
2337 |
+ { |
2338 |
+ int cpu; |
2339 |
+@@ -4917,10 +5029,8 @@ static void free_vpid(int vpid) |
2340 |
+ spin_unlock(&vmx_vpid_lock); |
2341 |
+ } |
2342 |
+ |
2343 |
+-#define MSR_TYPE_R 1 |
2344 |
+-#define MSR_TYPE_W 2 |
2345 |
+-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, |
2346 |
+- u32 msr, int type) |
2347 |
++static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, |
2348 |
++ u32 msr, int type) |
2349 |
+ { |
2350 |
+ int f = sizeof(unsigned long); |
2351 |
+ |
2352 |
+@@ -4954,6 +5064,50 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, |
2353 |
+ } |
2354 |
+ } |
2355 |
+ |
2356 |
++static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, |
2357 |
++ u32 msr, int type) |
2358 |
++{ |
2359 |
++ int f = sizeof(unsigned long); |
2360 |
++ |
2361 |
++ if (!cpu_has_vmx_msr_bitmap()) |
2362 |
++ return; |
2363 |
++ |
2364 |
++ /* |
2365 |
++ * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals |
2366 |
++ * have the write-low and read-high bitmap offsets the wrong way round. |
2367 |
++ * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. |
2368 |
++ */ |
2369 |
++ if (msr <= 0x1fff) { |
2370 |
++ if (type & MSR_TYPE_R) |
2371 |
++ /* read-low */ |
2372 |
++ __set_bit(msr, msr_bitmap + 0x000 / f); |
2373 |
++ |
2374 |
++ if (type & MSR_TYPE_W) |
2375 |
++ /* write-low */ |
2376 |
++ __set_bit(msr, msr_bitmap + 0x800 / f); |
2377 |
++ |
2378 |
++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { |
2379 |
++ msr &= 0x1fff; |
2380 |
++ if (type & MSR_TYPE_R) |
2381 |
++ /* read-high */ |
2382 |
++ __set_bit(msr, msr_bitmap + 0x400 / f); |
2383 |
++ |
2384 |
++ if (type & MSR_TYPE_W) |
2385 |
++ /* write-high */ |
2386 |
++ __set_bit(msr, msr_bitmap + 0xc00 / f); |
2387 |
++ |
2388 |
++ } |
2389 |
++} |
2390 |
++ |
2391 |
++static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, |
2392 |
++ u32 msr, int type, bool value) |
2393 |
++{ |
2394 |
++ if (value) |
2395 |
++ vmx_enable_intercept_for_msr(msr_bitmap, msr, type); |
2396 |
++ else |
2397 |
++ vmx_disable_intercept_for_msr(msr_bitmap, msr, type); |
2398 |
++} |
2399 |
++ |
2400 |
+ /* |
2401 |
+ * If a msr is allowed by L0, we should check whether it is allowed by L1. |
2402 |
+ * The corresponding bit will be cleared unless both of L0 and L1 allow it. |
2403 |
+@@ -5000,30 +5154,70 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, |
2404 |
+ } |
2405 |
+ } |
2406 |
+ |
2407 |
+-static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) |
2408 |
++static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) |
2409 |
+ { |
2410 |
+- if (!longmode_only) |
2411 |
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, |
2412 |
+- msr, MSR_TYPE_R | MSR_TYPE_W); |
2413 |
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, |
2414 |
+- msr, MSR_TYPE_R | MSR_TYPE_W); |
2415 |
++ u8 mode = 0; |
2416 |
++ |
2417 |
++ if (cpu_has_secondary_exec_ctrls() && |
2418 |
++ (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & |
2419 |
++ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { |
2420 |
++ mode |= MSR_BITMAP_MODE_X2APIC; |
2421 |
++ if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) |
2422 |
++ mode |= MSR_BITMAP_MODE_X2APIC_APICV; |
2423 |
++ } |
2424 |
++ |
2425 |
++ if (is_long_mode(vcpu)) |
2426 |
++ mode |= MSR_BITMAP_MODE_LM; |
2427 |
++ |
2428 |
++ return mode; |
2429 |
+ } |
2430 |
+ |
2431 |
+-static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active) |
2432 |
++#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) |
2433 |
++ |
2434 |
++static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, |
2435 |
++ u8 mode) |
2436 |
+ { |
2437 |
+- if (apicv_active) { |
2438 |
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv, |
2439 |
+- msr, type); |
2440 |
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv, |
2441 |
+- msr, type); |
2442 |
+- } else { |
2443 |
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, |
2444 |
+- msr, type); |
2445 |
+- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, |
2446 |
+- msr, type); |
2447 |
++ int msr; |
2448 |
++ |
2449 |
++ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { |
2450 |
++ unsigned word = msr / BITS_PER_LONG; |
2451 |
++ msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; |
2452 |
++ msr_bitmap[word + (0x800 / sizeof(long))] = ~0; |
2453 |
++ } |
2454 |
++ |
2455 |
++ if (mode & MSR_BITMAP_MODE_X2APIC) { |
2456 |
++ /* |
2457 |
++ * TPR reads and writes can be virtualized even if virtual interrupt |
2458 |
++ * delivery is not in use. |
2459 |
++ */ |
2460 |
++ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); |
2461 |
++ if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { |
2462 |
++ vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); |
2463 |
++ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); |
2464 |
++ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); |
2465 |
++ } |
2466 |
+ } |
2467 |
+ } |
2468 |
+ |
2469 |
++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) |
2470 |
++{ |
2471 |
++ struct vcpu_vmx *vmx = to_vmx(vcpu); |
2472 |
++ unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; |
2473 |
++ u8 mode = vmx_msr_bitmap_mode(vcpu); |
2474 |
++ u8 changed = mode ^ vmx->msr_bitmap_mode; |
2475 |
++ |
2476 |
++ if (!changed) |
2477 |
++ return; |
2478 |
++ |
2479 |
++ vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, |
2480 |
++ !(mode & MSR_BITMAP_MODE_LM)); |
2481 |
++ |
2482 |
++ if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) |
2483 |
++ vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); |
2484 |
++ |
2485 |
++ vmx->msr_bitmap_mode = mode; |
2486 |
++} |
2487 |
++ |
2488 |
+ static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu) |
2489 |
+ { |
2490 |
+ return enable_apicv; |
2491 |
+@@ -5269,7 +5463,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) |
2492 |
+ } |
2493 |
+ |
2494 |
+ if (cpu_has_vmx_msr_bitmap()) |
2495 |
+- vmx_set_msr_bitmap(vcpu); |
2496 |
++ vmx_update_msr_bitmap(vcpu); |
2497 |
+ } |
2498 |
+ |
2499 |
+ static u32 vmx_exec_control(struct vcpu_vmx *vmx) |
2500 |
+@@ -5456,7 +5650,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) |
2501 |
+ vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); |
2502 |
+ } |
2503 |
+ if (cpu_has_vmx_msr_bitmap()) |
2504 |
+- vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); |
2505 |
++ vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); |
2506 |
+ |
2507 |
+ vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ |
2508 |
+ |
2509 |
+@@ -5534,6 +5728,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) |
2510 |
+ ++vmx->nmsrs; |
2511 |
+ } |
2512 |
+ |
2513 |
++ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) |
2514 |
++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); |
2515 |
+ |
2516 |
+ vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); |
2517 |
+ |
2518 |
+@@ -5564,6 +5760,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) |
2519 |
+ u64 cr0; |
2520 |
+ |
2521 |
+ vmx->rmode.vm86_active = 0; |
2522 |
++ vmx->spec_ctrl = 0; |
2523 |
+ |
2524 |
+ vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); |
2525 |
+ kvm_set_cr8(vcpu, 0); |
2526 |
+@@ -6739,7 +6936,7 @@ void vmx_enable_tdp(void) |
2527 |
+ |
2528 |
+ static __init int hardware_setup(void) |
2529 |
+ { |
2530 |
+- int r = -ENOMEM, i, msr; |
2531 |
++ int r = -ENOMEM, i; |
2532 |
+ |
2533 |
+ rdmsrl_safe(MSR_EFER, &host_efer); |
2534 |
+ |
2535 |
+@@ -6760,9 +6957,6 @@ static __init int hardware_setup(void) |
2536 |
+ |
2537 |
+ memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); |
2538 |
+ |
2539 |
+- memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); |
2540 |
+- memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); |
2541 |
+- |
2542 |
+ if (setup_vmcs_config(&vmcs_config) < 0) { |
2543 |
+ r = -EIO; |
2544 |
+ goto out; |
2545 |
+@@ -6825,42 +7019,8 @@ static __init int hardware_setup(void) |
2546 |
+ kvm_tsc_scaling_ratio_frac_bits = 48; |
2547 |
+ } |
2548 |
+ |
2549 |
+- vmx_disable_intercept_for_msr(MSR_FS_BASE, false); |
2550 |
+- vmx_disable_intercept_for_msr(MSR_GS_BASE, false); |
2551 |
+- vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); |
2552 |
+- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); |
2553 |
+- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); |
2554 |
+- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); |
2555 |
+- |
2556 |
+- memcpy(vmx_msr_bitmap_legacy_x2apic_apicv, |
2557 |
+- vmx_msr_bitmap_legacy, PAGE_SIZE); |
2558 |
+- memcpy(vmx_msr_bitmap_longmode_x2apic_apicv, |
2559 |
+- vmx_msr_bitmap_longmode, PAGE_SIZE); |
2560 |
+- memcpy(vmx_msr_bitmap_legacy_x2apic, |
2561 |
+- vmx_msr_bitmap_legacy, PAGE_SIZE); |
2562 |
+- memcpy(vmx_msr_bitmap_longmode_x2apic, |
2563 |
+- vmx_msr_bitmap_longmode, PAGE_SIZE); |
2564 |
+- |
2565 |
+ set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ |
2566 |
+ |
2567 |
+- for (msr = 0x800; msr <= 0x8ff; msr++) { |
2568 |
+- if (msr == 0x839 /* TMCCT */) |
2569 |
+- continue; |
2570 |
+- vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true); |
2571 |
+- } |
2572 |
+- |
2573 |
+- /* |
2574 |
+- * TPR reads and writes can be virtualized even if virtual interrupt |
2575 |
+- * delivery is not in use. |
2576 |
+- */ |
2577 |
+- vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true); |
2578 |
+- vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false); |
2579 |
+- |
2580 |
+- /* EOI */ |
2581 |
+- vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true); |
2582 |
+- /* SELF-IPI */ |
2583 |
+- vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true); |
2584 |
+- |
2585 |
+ if (enable_ept) |
2586 |
+ vmx_enable_tdp(); |
2587 |
+ else |
2588 |
+@@ -6963,94 +7123,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) |
2589 |
+ return handle_nop(vcpu); |
2590 |
+ } |
2591 |
+ |
2592 |
+-/* |
2593 |
+- * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. |
2594 |
+- * We could reuse a single VMCS for all the L2 guests, but we also want the |
2595 |
+- * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this |
2596 |
+- * allows keeping them loaded on the processor, and in the future will allow |
2597 |
+- * optimizations where prepare_vmcs02 doesn't need to set all the fields on |
2598 |
+- * every entry if they never change. |
2599 |
+- * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE |
2600 |
+- * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. |
2601 |
+- * |
2602 |
+- * The following functions allocate and free a vmcs02 in this pool. |
2603 |
+- */ |
2604 |
+- |
2605 |
+-/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ |
2606 |
+-static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) |
2607 |
+-{ |
2608 |
+- struct vmcs02_list *item; |
2609 |
+- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) |
2610 |
+- if (item->vmptr == vmx->nested.current_vmptr) { |
2611 |
+- list_move(&item->list, &vmx->nested.vmcs02_pool); |
2612 |
+- return &item->vmcs02; |
2613 |
+- } |
2614 |
+- |
2615 |
+- if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { |
2616 |
+- /* Recycle the least recently used VMCS. */ |
2617 |
+- item = list_last_entry(&vmx->nested.vmcs02_pool, |
2618 |
+- struct vmcs02_list, list); |
2619 |
+- item->vmptr = vmx->nested.current_vmptr; |
2620 |
+- list_move(&item->list, &vmx->nested.vmcs02_pool); |
2621 |
+- return &item->vmcs02; |
2622 |
+- } |
2623 |
+- |
2624 |
+- /* Create a new VMCS */ |
2625 |
+- item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL); |
2626 |
+- if (!item) |
2627 |
+- return NULL; |
2628 |
+- item->vmcs02.vmcs = alloc_vmcs(); |
2629 |
+- item->vmcs02.shadow_vmcs = NULL; |
2630 |
+- if (!item->vmcs02.vmcs) { |
2631 |
+- kfree(item); |
2632 |
+- return NULL; |
2633 |
+- } |
2634 |
+- loaded_vmcs_init(&item->vmcs02); |
2635 |
+- item->vmptr = vmx->nested.current_vmptr; |
2636 |
+- list_add(&(item->list), &(vmx->nested.vmcs02_pool)); |
2637 |
+- vmx->nested.vmcs02_num++; |
2638 |
+- return &item->vmcs02; |
2639 |
+-} |
2640 |
+- |
2641 |
+-/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ |
2642 |
+-static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) |
2643 |
+-{ |
2644 |
+- struct vmcs02_list *item; |
2645 |
+- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) |
2646 |
+- if (item->vmptr == vmptr) { |
2647 |
+- free_loaded_vmcs(&item->vmcs02); |
2648 |
+- list_del(&item->list); |
2649 |
+- kfree(item); |
2650 |
+- vmx->nested.vmcs02_num--; |
2651 |
+- return; |
2652 |
+- } |
2653 |
+-} |
2654 |
+- |
2655 |
+-/* |
2656 |
+- * Free all VMCSs saved for this vcpu, except the one pointed by |
2657 |
+- * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs |
2658 |
+- * must be &vmx->vmcs01. |
2659 |
+- */ |
2660 |
+-static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) |
2661 |
+-{ |
2662 |
+- struct vmcs02_list *item, *n; |
2663 |
+- |
2664 |
+- WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); |
2665 |
+- list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { |
2666 |
+- /* |
2667 |
+- * Something will leak if the above WARN triggers. Better than |
2668 |
+- * a use-after-free. |
2669 |
+- */ |
2670 |
+- if (vmx->loaded_vmcs == &item->vmcs02) |
2671 |
+- continue; |
2672 |
+- |
2673 |
+- free_loaded_vmcs(&item->vmcs02); |
2674 |
+- list_del(&item->list); |
2675 |
+- kfree(item); |
2676 |
+- vmx->nested.vmcs02_num--; |
2677 |
+- } |
2678 |
+-} |
2679 |
+- |
2680 |
+ /* |
2681 |
+ * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), |
2682 |
+ * set the success or error code of an emulated VMX instruction, as specified |
2683 |
+@@ -7231,13 +7303,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) |
2684 |
+ { |
2685 |
+ struct vcpu_vmx *vmx = to_vmx(vcpu); |
2686 |
+ struct vmcs *shadow_vmcs; |
2687 |
++ int r; |
2688 |
+ |
2689 |
+- if (cpu_has_vmx_msr_bitmap()) { |
2690 |
+- vmx->nested.msr_bitmap = |
2691 |
+- (unsigned long *)__get_free_page(GFP_KERNEL); |
2692 |
+- if (!vmx->nested.msr_bitmap) |
2693 |
+- goto out_msr_bitmap; |
2694 |
+- } |
2695 |
++ r = alloc_loaded_vmcs(&vmx->nested.vmcs02); |
2696 |
++ if (r < 0) |
2697 |
++ goto out_vmcs02; |
2698 |
+ |
2699 |
+ vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); |
2700 |
+ if (!vmx->nested.cached_vmcs12) |
2701 |
+@@ -7254,9 +7324,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) |
2702 |
+ vmx->vmcs01.shadow_vmcs = shadow_vmcs; |
2703 |
+ } |
2704 |
+ |
2705 |
+- INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); |
2706 |
+- vmx->nested.vmcs02_num = 0; |
2707 |
+- |
2708 |
+ hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, |
2709 |
+ HRTIMER_MODE_REL_PINNED); |
2710 |
+ vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; |
2711 |
+@@ -7268,9 +7335,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) |
2712 |
+ kfree(vmx->nested.cached_vmcs12); |
2713 |
+ |
2714 |
+ out_cached_vmcs12: |
2715 |
+- free_page((unsigned long)vmx->nested.msr_bitmap); |
2716 |
++ free_loaded_vmcs(&vmx->nested.vmcs02); |
2717 |
+ |
2718 |
+-out_msr_bitmap: |
2719 |
++out_vmcs02: |
2720 |
+ return -ENOMEM; |
2721 |
+ } |
2722 |
+ |
2723 |
+@@ -7412,10 +7479,6 @@ static void free_nested(struct vcpu_vmx *vmx) |
2724 |
+ free_vpid(vmx->nested.vpid02); |
2725 |
+ vmx->nested.posted_intr_nv = -1; |
2726 |
+ vmx->nested.current_vmptr = -1ull; |
2727 |
+- if (vmx->nested.msr_bitmap) { |
2728 |
+- free_page((unsigned long)vmx->nested.msr_bitmap); |
2729 |
+- vmx->nested.msr_bitmap = NULL; |
2730 |
+- } |
2731 |
+ if (enable_shadow_vmcs) { |
2732 |
+ vmx_disable_shadow_vmcs(vmx); |
2733 |
+ vmcs_clear(vmx->vmcs01.shadow_vmcs); |
2734 |
+@@ -7423,7 +7486,7 @@ static void free_nested(struct vcpu_vmx *vmx) |
2735 |
+ vmx->vmcs01.shadow_vmcs = NULL; |
2736 |
+ } |
2737 |
+ kfree(vmx->nested.cached_vmcs12); |
2738 |
+- /* Unpin physical memory we referred to in current vmcs02 */ |
2739 |
++ /* Unpin physical memory we referred to in the vmcs02 */ |
2740 |
+ if (vmx->nested.apic_access_page) { |
2741 |
+ kvm_release_page_dirty(vmx->nested.apic_access_page); |
2742 |
+ vmx->nested.apic_access_page = NULL; |
2743 |
+@@ -7439,7 +7502,7 @@ static void free_nested(struct vcpu_vmx *vmx) |
2744 |
+ vmx->nested.pi_desc = NULL; |
2745 |
+ } |
2746 |
+ |
2747 |
+- nested_free_all_saved_vmcss(vmx); |
2748 |
++ free_loaded_vmcs(&vmx->nested.vmcs02); |
2749 |
+ } |
2750 |
+ |
2751 |
+ /* Emulate the VMXOFF instruction */ |
2752 |
+@@ -7482,8 +7545,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) |
2753 |
+ vmptr + offsetof(struct vmcs12, launch_state), |
2754 |
+ &zero, sizeof(zero)); |
2755 |
+ |
2756 |
+- nested_free_vmcs02(vmx, vmptr); |
2757 |
+- |
2758 |
+ nested_vmx_succeed(vcpu); |
2759 |
+ return kvm_skip_emulated_instruction(vcpu); |
2760 |
+ } |
2761 |
+@@ -8395,10 +8456,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) |
2762 |
+ |
2763 |
+ /* |
2764 |
+ * The host physical addresses of some pages of guest memory |
2765 |
+- * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU |
2766 |
+- * may write to these pages via their host physical address while |
2767 |
+- * L2 is running, bypassing any address-translation-based dirty |
2768 |
+- * tracking (e.g. EPT write protection). |
2769 |
++ * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC |
2770 |
++ * Page). The CPU may write to these pages via their host |
2771 |
++ * physical address while L2 is running, bypassing any |
2772 |
++ * address-translation-based dirty tracking (e.g. EPT write |
2773 |
++ * protection). |
2774 |
+ * |
2775 |
+ * Mark them dirty on every exit from L2 to prevent them from |
2776 |
+ * getting out of sync with dirty tracking. |
2777 |
+@@ -8932,7 +8994,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) |
2778 |
+ } |
2779 |
+ vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); |
2780 |
+ |
2781 |
+- vmx_set_msr_bitmap(vcpu); |
2782 |
++ vmx_update_msr_bitmap(vcpu); |
2783 |
+ } |
2784 |
+ |
2785 |
+ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) |
2786 |
+@@ -9118,14 +9180,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) |
2787 |
+ #endif |
2788 |
+ "pushf\n\t" |
2789 |
+ __ASM_SIZE(push) " $%c[cs]\n\t" |
2790 |
+- "call *%[entry]\n\t" |
2791 |
++ CALL_NOSPEC |
2792 |
+ : |
2793 |
+ #ifdef CONFIG_X86_64 |
2794 |
+ [sp]"=&r"(tmp), |
2795 |
+ #endif |
2796 |
+ ASM_CALL_CONSTRAINT |
2797 |
+ : |
2798 |
+- [entry]"r"(entry), |
2799 |
++ THUNK_TARGET(entry), |
2800 |
+ [ss]"i"(__KERNEL_DS), |
2801 |
+ [cs]"i"(__KERNEL_CS) |
2802 |
+ ); |
2803 |
+@@ -9362,6 +9424,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) |
2804 |
+ |
2805 |
+ vmx_arm_hv_timer(vcpu); |
2806 |
+ |
2807 |
++ /* |
2808 |
++ * If this vCPU has touched SPEC_CTRL, restore the guest's value if |
2809 |
++ * it's non-zero. Since vmentry is serialising on affected CPUs, there |
2810 |
++ * is no need to worry about the conditional branch over the wrmsr |
2811 |
++ * being speculatively taken. |
2812 |
++ */ |
2813 |
++ if (vmx->spec_ctrl) |
2814 |
++ wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); |
2815 |
++ |
2816 |
+ vmx->__launched = vmx->loaded_vmcs->launched; |
2817 |
+ asm( |
2818 |
+ /* Store host registers */ |
2819 |
+@@ -9480,6 +9551,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) |
2820 |
+ #endif |
2821 |
+ ); |
2822 |
+ |
2823 |
++ /* |
2824 |
++ * We do not use IBRS in the kernel. If this vCPU has used the |
2825 |
++ * SPEC_CTRL MSR it may have left it on; save the value and |
2826 |
++ * turn it off. This is much more efficient than blindly adding |
2827 |
++ * it to the atomic save/restore list. Especially as the former |
2828 |
++ * (Saving guest MSRs on vmexit) doesn't even exist in KVM. |
2829 |
++ * |
2830 |
++ * For non-nested case: |
2831 |
++ * If the L01 MSR bitmap does not intercept the MSR, then we need to |
2832 |
++ * save it. |
2833 |
++ * |
2834 |
++ * For nested case: |
2835 |
++ * If the L02 MSR bitmap does not intercept the MSR, then we need to |
2836 |
++ * save it. |
2837 |
++ */ |
2838 |
++ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) |
2839 |
++ rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); |
2840 |
++ |
2841 |
++ if (vmx->spec_ctrl) |
2842 |
++ wrmsrl(MSR_IA32_SPEC_CTRL, 0); |
2843 |
++ |
2844 |
+ /* Eliminate branch target predictions from guest mode */ |
2845 |
+ vmexit_fill_RSB(); |
2846 |
+ |
2847 |
+@@ -9594,6 +9686,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) |
2848 |
+ { |
2849 |
+ int err; |
2850 |
+ struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); |
2851 |
++ unsigned long *msr_bitmap; |
2852 |
+ int cpu; |
2853 |
+ |
2854 |
+ if (!vmx) |
2855 |
+@@ -9626,13 +9719,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) |
2856 |
+ if (!vmx->guest_msrs) |
2857 |
+ goto free_pml; |
2858 |
+ |
2859 |
+- vmx->loaded_vmcs = &vmx->vmcs01; |
2860 |
+- vmx->loaded_vmcs->vmcs = alloc_vmcs(); |
2861 |
+- vmx->loaded_vmcs->shadow_vmcs = NULL; |
2862 |
+- if (!vmx->loaded_vmcs->vmcs) |
2863 |
++ err = alloc_loaded_vmcs(&vmx->vmcs01); |
2864 |
++ if (err < 0) |
2865 |
+ goto free_msrs; |
2866 |
+- loaded_vmcs_init(vmx->loaded_vmcs); |
2867 |
+ |
2868 |
++ msr_bitmap = vmx->vmcs01.msr_bitmap; |
2869 |
++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); |
2870 |
++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); |
2871 |
++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); |
2872 |
++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); |
2873 |
++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); |
2874 |
++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); |
2875 |
++ vmx->msr_bitmap_mode = 0; |
2876 |
++ |
2877 |
++ vmx->loaded_vmcs = &vmx->vmcs01; |
2878 |
+ cpu = get_cpu(); |
2879 |
+ vmx_vcpu_load(&vmx->vcpu, cpu); |
2880 |
+ vmx->vcpu.cpu = cpu; |
2881 |
+@@ -10101,10 +10201,25 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, |
2882 |
+ int msr; |
2883 |
+ struct page *page; |
2884 |
+ unsigned long *msr_bitmap_l1; |
2885 |
+- unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; |
2886 |
++ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; |
2887 |
++ /* |
2888 |
++ * pred_cmd & spec_ctrl are trying to verify two things: |
2889 |
++ * |
2890 |
++ * 1. L0 gave a permission to L1 to actually passthrough the MSR. This |
2891 |
++ * ensures that we do not accidentally generate an L02 MSR bitmap |
2892 |
++ * from the L12 MSR bitmap that is too permissive. |
2893 |
++ * 2. That L1 or L2s have actually used the MSR. This avoids |
2894 |
++ * unnecessarily merging of the bitmap if the MSR is unused. This |
2895 |
++ * works properly because we only update the L01 MSR bitmap lazily. |
2896 |
++ * So even if L0 should pass L1 these MSRs, the L01 bitmap is only |
2897 |
++ * updated to reflect this when L1 (or its L2s) actually write to |
2898 |
++ * the MSR. |
2899 |
++ */ |
2900 |
++ bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); |
2901 |
++ bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); |
2902 |
+ |
2903 |
+- /* This shortcut is ok because we support only x2APIC MSRs so far. */ |
2904 |
+- if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) |
2905 |
++ if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && |
2906 |
++ !pred_cmd && !spec_ctrl) |
2907 |
+ return false; |
2908 |
+ |
2909 |
+ page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); |
2910 |
+@@ -10137,6 +10252,19 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, |
2911 |
+ MSR_TYPE_W); |
2912 |
+ } |
2913 |
+ } |
2914 |
++ |
2915 |
++ if (spec_ctrl) |
2916 |
++ nested_vmx_disable_intercept_for_msr( |
2917 |
++ msr_bitmap_l1, msr_bitmap_l0, |
2918 |
++ MSR_IA32_SPEC_CTRL, |
2919 |
++ MSR_TYPE_R | MSR_TYPE_W); |
2920 |
++ |
2921 |
++ if (pred_cmd) |
2922 |
++ nested_vmx_disable_intercept_for_msr( |
2923 |
++ msr_bitmap_l1, msr_bitmap_l0, |
2924 |
++ MSR_IA32_PRED_CMD, |
2925 |
++ MSR_TYPE_W); |
2926 |
++ |
2927 |
+ kunmap(page); |
2928 |
+ kvm_release_page_clean(page); |
2929 |
+ |
2930 |
+@@ -10678,6 +10806,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, |
2931 |
+ if (kvm_has_tsc_control) |
2932 |
+ decache_tsc_multiplier(vmx); |
2933 |
+ |
2934 |
++ if (cpu_has_vmx_msr_bitmap()) |
2935 |
++ vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); |
2936 |
++ |
2937 |
+ if (enable_vpid) { |
2938 |
+ /* |
2939 |
+ * There is no direct mapping between vpid02 and vpid12, the |
2940 |
+@@ -10894,20 +11025,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) |
2941 |
+ { |
2942 |
+ struct vcpu_vmx *vmx = to_vmx(vcpu); |
2943 |
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
2944 |
+- struct loaded_vmcs *vmcs02; |
2945 |
+ u32 msr_entry_idx; |
2946 |
+ u32 exit_qual; |
2947 |
+ |
2948 |
+- vmcs02 = nested_get_current_vmcs02(vmx); |
2949 |
+- if (!vmcs02) |
2950 |
+- return -ENOMEM; |
2951 |
+- |
2952 |
+ enter_guest_mode(vcpu); |
2953 |
+ |
2954 |
+ if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) |
2955 |
+ vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); |
2956 |
+ |
2957 |
+- vmx_switch_vmcs(vcpu, vmcs02); |
2958 |
++ vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); |
2959 |
+ vmx_segment_cache_clear(vmx); |
2960 |
+ |
2961 |
+ if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { |
2962 |
+@@ -11476,7 +11602,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, |
2963 |
+ vmcs_write64(GUEST_IA32_DEBUGCTL, 0); |
2964 |
+ |
2965 |
+ if (cpu_has_vmx_msr_bitmap()) |
2966 |
+- vmx_set_msr_bitmap(vcpu); |
2967 |
++ vmx_update_msr_bitmap(vcpu); |
2968 |
+ |
2969 |
+ if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, |
2970 |
+ vmcs12->vm_exit_msr_load_count)) |
2971 |
+@@ -11522,10 +11648,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, |
2972 |
+ vm_exit_controls_reset_shadow(vmx); |
2973 |
+ vmx_segment_cache_clear(vmx); |
2974 |
+ |
2975 |
+- /* if no vmcs02 cache requested, remove the one we used */ |
2976 |
+- if (VMCS02_POOL_SIZE == 0) |
2977 |
+- nested_free_vmcs02(vmx, vmx->nested.current_vmptr); |
2978 |
+- |
2979 |
+ /* Update any VMCS fields that might have changed while L2 ran */ |
2980 |
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); |
2981 |
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); |
2982 |
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c |
2983 |
+index 8c28023a43b1..f97358423f9c 100644 |
2984 |
+--- a/arch/x86/kvm/x86.c |
2985 |
++++ b/arch/x86/kvm/x86.c |
2986 |
+@@ -1006,6 +1006,7 @@ static u32 msrs_to_save[] = { |
2987 |
+ #endif |
2988 |
+ MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, |
2989 |
+ MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, |
2990 |
++ MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES |
2991 |
+ }; |
2992 |
+ |
2993 |
+ static unsigned num_msrs_to_save; |
2994 |
+diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile |
2995 |
+index d435c89875c1..d0a3170e6804 100644 |
2996 |
+--- a/arch/x86/lib/Makefile |
2997 |
++++ b/arch/x86/lib/Makefile |
2998 |
+@@ -27,6 +27,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o |
2999 |
+ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o |
3000 |
+ lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o |
3001 |
+ lib-$(CONFIG_RETPOLINE) += retpoline.o |
3002 |
++OBJECT_FILES_NON_STANDARD_retpoline.o :=y |
3003 |
+ |
3004 |
+ obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o |
3005 |
+ |
3006 |
+diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S |
3007 |
+index c97d935a29e8..49b167f73215 100644 |
3008 |
+--- a/arch/x86/lib/getuser.S |
3009 |
++++ b/arch/x86/lib/getuser.S |
3010 |
+@@ -40,6 +40,8 @@ ENTRY(__get_user_1) |
3011 |
+ mov PER_CPU_VAR(current_task), %_ASM_DX |
3012 |
+ cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX |
3013 |
+ jae bad_get_user |
3014 |
++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ |
3015 |
++ and %_ASM_DX, %_ASM_AX |
3016 |
+ ASM_STAC |
3017 |
+ 1: movzbl (%_ASM_AX),%edx |
3018 |
+ xor %eax,%eax |
3019 |
+@@ -54,6 +56,8 @@ ENTRY(__get_user_2) |
3020 |
+ mov PER_CPU_VAR(current_task), %_ASM_DX |
3021 |
+ cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX |
3022 |
+ jae bad_get_user |
3023 |
++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ |
3024 |
++ and %_ASM_DX, %_ASM_AX |
3025 |
+ ASM_STAC |
3026 |
+ 2: movzwl -1(%_ASM_AX),%edx |
3027 |
+ xor %eax,%eax |
3028 |
+@@ -68,6 +72,8 @@ ENTRY(__get_user_4) |
3029 |
+ mov PER_CPU_VAR(current_task), %_ASM_DX |
3030 |
+ cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX |
3031 |
+ jae bad_get_user |
3032 |
++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ |
3033 |
++ and %_ASM_DX, %_ASM_AX |
3034 |
+ ASM_STAC |
3035 |
+ 3: movl -3(%_ASM_AX),%edx |
3036 |
+ xor %eax,%eax |
3037 |
+@@ -83,6 +89,8 @@ ENTRY(__get_user_8) |
3038 |
+ mov PER_CPU_VAR(current_task), %_ASM_DX |
3039 |
+ cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX |
3040 |
+ jae bad_get_user |
3041 |
++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ |
3042 |
++ and %_ASM_DX, %_ASM_AX |
3043 |
+ ASM_STAC |
3044 |
+ 4: movq -7(%_ASM_AX),%rdx |
3045 |
+ xor %eax,%eax |
3046 |
+@@ -94,6 +102,8 @@ ENTRY(__get_user_8) |
3047 |
+ mov PER_CPU_VAR(current_task), %_ASM_DX |
3048 |
+ cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX |
3049 |
+ jae bad_get_user_8 |
3050 |
++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ |
3051 |
++ and %_ASM_DX, %_ASM_AX |
3052 |
+ ASM_STAC |
3053 |
+ 4: movl -7(%_ASM_AX),%edx |
3054 |
+ 5: movl -3(%_ASM_AX),%ecx |
3055 |
+diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S |
3056 |
+index dfb2ba91b670..480edc3a5e03 100644 |
3057 |
+--- a/arch/x86/lib/retpoline.S |
3058 |
++++ b/arch/x86/lib/retpoline.S |
3059 |
+@@ -7,6 +7,7 @@ |
3060 |
+ #include <asm/alternative-asm.h> |
3061 |
+ #include <asm/export.h> |
3062 |
+ #include <asm/nospec-branch.h> |
3063 |
++#include <asm/bitsperlong.h> |
3064 |
+ |
3065 |
+ .macro THUNK reg |
3066 |
+ .section .text.__x86.indirect_thunk |
3067 |
+@@ -36,7 +37,6 @@ GENERATE_THUNK(_ASM_DX) |
3068 |
+ GENERATE_THUNK(_ASM_SI) |
3069 |
+ GENERATE_THUNK(_ASM_DI) |
3070 |
+ GENERATE_THUNK(_ASM_BP) |
3071 |
+-GENERATE_THUNK(_ASM_SP) |
3072 |
+ #ifdef CONFIG_64BIT |
3073 |
+ GENERATE_THUNK(r8) |
3074 |
+ GENERATE_THUNK(r9) |
3075 |
+@@ -47,3 +47,58 @@ GENERATE_THUNK(r13) |
3076 |
+ GENERATE_THUNK(r14) |
3077 |
+ GENERATE_THUNK(r15) |
3078 |
+ #endif |
3079 |
++ |
3080 |
++/* |
3081 |
++ * Fill the CPU return stack buffer. |
3082 |
++ * |
3083 |
++ * Each entry in the RSB, if used for a speculative 'ret', contains an |
3084 |
++ * infinite 'pause; lfence; jmp' loop to capture speculative execution. |
3085 |
++ * |
3086 |
++ * This is required in various cases for retpoline and IBRS-based |
3087 |
++ * mitigations for the Spectre variant 2 vulnerability. Sometimes to |
3088 |
++ * eliminate potentially bogus entries from the RSB, and sometimes |
3089 |
++ * purely to ensure that it doesn't get empty, which on some CPUs would |
3090 |
++ * allow predictions from other (unwanted!) sources to be used. |
3091 |
++ * |
3092 |
++ * Google experimented with loop-unrolling and this turned out to be |
3093 |
++ * the optimal version - two calls, each with their own speculation |
3094 |
++ * trap should their return address end up getting used, in a loop. |
3095 |
++ */ |
3096 |
++.macro STUFF_RSB nr:req sp:req |
3097 |
++ mov $(\nr / 2), %_ASM_BX |
3098 |
++ .align 16 |
3099 |
++771: |
3100 |
++ call 772f |
3101 |
++773: /* speculation trap */ |
3102 |
++ pause |
3103 |
++ lfence |
3104 |
++ jmp 773b |
3105 |
++ .align 16 |
3106 |
++772: |
3107 |
++ call 774f |
3108 |
++775: /* speculation trap */ |
3109 |
++ pause |
3110 |
++ lfence |
3111 |
++ jmp 775b |
3112 |
++ .align 16 |
3113 |
++774: |
3114 |
++ dec %_ASM_BX |
3115 |
++ jnz 771b |
3116 |
++ add $((BITS_PER_LONG/8) * \nr), \sp |
3117 |
++.endm |
3118 |
++ |
3119 |
++#define RSB_FILL_LOOPS 16 /* To avoid underflow */ |
3120 |
++ |
3121 |
++ENTRY(__fill_rsb) |
3122 |
++ STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP |
3123 |
++ ret |
3124 |
++END(__fill_rsb) |
3125 |
++EXPORT_SYMBOL_GPL(__fill_rsb) |
3126 |
++ |
3127 |
++#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ |
3128 |
++ |
3129 |
++ENTRY(__clear_rsb) |
3130 |
++ STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP |
3131 |
++ ret |
3132 |
++END(__clear_rsb) |
3133 |
++EXPORT_SYMBOL_GPL(__clear_rsb) |
3134 |
+diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c |
3135 |
+index 1b377f734e64..7add8ba06887 100644 |
3136 |
+--- a/arch/x86/lib/usercopy_32.c |
3137 |
++++ b/arch/x86/lib/usercopy_32.c |
3138 |
+@@ -331,12 +331,12 @@ do { \ |
3139 |
+ |
3140 |
+ unsigned long __copy_user_ll(void *to, const void *from, unsigned long n) |
3141 |
+ { |
3142 |
+- stac(); |
3143 |
++ __uaccess_begin_nospec(); |
3144 |
+ if (movsl_is_ok(to, from, n)) |
3145 |
+ __copy_user(to, from, n); |
3146 |
+ else |
3147 |
+ n = __copy_user_intel(to, from, n); |
3148 |
+- clac(); |
3149 |
++ __uaccess_end(); |
3150 |
+ return n; |
3151 |
+ } |
3152 |
+ EXPORT_SYMBOL(__copy_user_ll); |
3153 |
+@@ -344,7 +344,7 @@ EXPORT_SYMBOL(__copy_user_ll); |
3154 |
+ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, |
3155 |
+ unsigned long n) |
3156 |
+ { |
3157 |
+- stac(); |
3158 |
++ __uaccess_begin_nospec(); |
3159 |
+ #ifdef CONFIG_X86_INTEL_USERCOPY |
3160 |
+ if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) |
3161 |
+ n = __copy_user_intel_nocache(to, from, n); |
3162 |
+@@ -353,7 +353,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr |
3163 |
+ #else |
3164 |
+ __copy_user(to, from, n); |
3165 |
+ #endif |
3166 |
+- clac(); |
3167 |
++ __uaccess_end(); |
3168 |
+ return n; |
3169 |
+ } |
3170 |
+ EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); |
3171 |
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c |
3172 |
+index 5bfe61a5e8e3..012d02624848 100644 |
3173 |
+--- a/arch/x86/mm/tlb.c |
3174 |
++++ b/arch/x86/mm/tlb.c |
3175 |
+@@ -6,13 +6,14 @@ |
3176 |
+ #include <linux/interrupt.h> |
3177 |
+ #include <linux/export.h> |
3178 |
+ #include <linux/cpu.h> |
3179 |
++#include <linux/debugfs.h> |
3180 |
+ |
3181 |
+ #include <asm/tlbflush.h> |
3182 |
+ #include <asm/mmu_context.h> |
3183 |
++#include <asm/nospec-branch.h> |
3184 |
+ #include <asm/cache.h> |
3185 |
+ #include <asm/apic.h> |
3186 |
+ #include <asm/uv/uv.h> |
3187 |
+-#include <linux/debugfs.h> |
3188 |
+ |
3189 |
+ /* |
3190 |
+ * TLB flushing, formerly SMP-only |
3191 |
+@@ -247,6 +248,27 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, |
3192 |
+ } else { |
3193 |
+ u16 new_asid; |
3194 |
+ bool need_flush; |
3195 |
++ u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); |
3196 |
++ |
3197 |
++ /* |
3198 |
++ * Avoid user/user BTB poisoning by flushing the branch |
3199 |
++ * predictor when switching between processes. This stops |
3200 |
++ * one process from doing Spectre-v2 attacks on another. |
3201 |
++ * |
3202 |
++ * As an optimization, flush indirect branches only when |
3203 |
++ * switching into processes that disable dumping. This |
3204 |
++ * protects high value processes like gpg, without having |
3205 |
++ * too high performance overhead. IBPB is *expensive*! |
3206 |
++ * |
3207 |
++ * This will not flush branches when switching into kernel |
3208 |
++ * threads. It will also not flush if we switch to idle |
3209 |
++ * thread and back to the same process. It will flush if we |
3210 |
++ * switch to a different non-dumpable process. |
3211 |
++ */ |
3212 |
++ if (tsk && tsk->mm && |
3213 |
++ tsk->mm->context.ctx_id != last_ctx_id && |
3214 |
++ get_dumpable(tsk->mm) != SUID_DUMP_USER) |
3215 |
++ indirect_branch_prediction_barrier(); |
3216 |
+ |
3217 |
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) { |
3218 |
+ /* |
3219 |
+@@ -292,6 +314,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, |
3220 |
+ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); |
3221 |
+ } |
3222 |
+ |
3223 |
++ /* |
3224 |
++ * Record last user mm's context id, so we can avoid |
3225 |
++ * flushing branch buffer with IBPB if we switch back |
3226 |
++ * to the same user. |
3227 |
++ */ |
3228 |
++ if (next != &init_mm) |
3229 |
++ this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); |
3230 |
++ |
3231 |
+ this_cpu_write(cpu_tlbstate.loaded_mm, next); |
3232 |
+ this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); |
3233 |
+ } |
3234 |
+@@ -369,6 +399,7 @@ void initialize_tlbstate_and_flush(void) |
3235 |
+ write_cr3(build_cr3(mm->pgd, 0)); |
3236 |
+ |
3237 |
+ /* Reinitialize tlbstate. */ |
3238 |
++ this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id); |
3239 |
+ this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); |
3240 |
+ this_cpu_write(cpu_tlbstate.next_asid, 1); |
3241 |
+ this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); |
3242 |
+diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c |
3243 |
+index a9020f82eea7..58403052514f 100644 |
3244 |
+--- a/drivers/auxdisplay/img-ascii-lcd.c |
3245 |
++++ b/drivers/auxdisplay/img-ascii-lcd.c |
3246 |
+@@ -443,3 +443,7 @@ static struct platform_driver img_ascii_lcd_driver = { |
3247 |
+ .remove = img_ascii_lcd_remove, |
3248 |
+ }; |
3249 |
+ module_platform_driver(img_ascii_lcd_driver); |
3250 |
++ |
3251 |
++MODULE_DESCRIPTION("Imagination Technologies ASCII LCD Display"); |
3252 |
++MODULE_AUTHOR("Paul Burton <paul.burton@××××.com>"); |
3253 |
++MODULE_LICENSE("GPL"); |
3254 |
+diff --git a/drivers/fpga/fpga-region.c b/drivers/fpga/fpga-region.c |
3255 |
+index d9ab7c75b14f..e0c73ceba2ed 100644 |
3256 |
+--- a/drivers/fpga/fpga-region.c |
3257 |
++++ b/drivers/fpga/fpga-region.c |
3258 |
+@@ -147,6 +147,7 @@ static struct fpga_manager *fpga_region_get_manager(struct fpga_region *region) |
3259 |
+ mgr_node = of_parse_phandle(np, "fpga-mgr", 0); |
3260 |
+ if (mgr_node) { |
3261 |
+ mgr = of_fpga_mgr_get(mgr_node); |
3262 |
++ of_node_put(mgr_node); |
3263 |
+ of_node_put(np); |
3264 |
+ return mgr; |
3265 |
+ } |
3266 |
+@@ -192,10 +193,13 @@ static int fpga_region_get_bridges(struct fpga_region *region, |
3267 |
+ parent_br = region_np->parent; |
3268 |
+ |
3269 |
+ /* If overlay has a list of bridges, use it. */ |
3270 |
+- if (of_parse_phandle(overlay, "fpga-bridges", 0)) |
3271 |
++ br = of_parse_phandle(overlay, "fpga-bridges", 0); |
3272 |
++ if (br) { |
3273 |
++ of_node_put(br); |
3274 |
+ np = overlay; |
3275 |
+- else |
3276 |
++ } else { |
3277 |
+ np = region_np; |
3278 |
++ } |
3279 |
+ |
3280 |
+ for (i = 0; ; i++) { |
3281 |
+ br = of_parse_phandle(np, "fpga-bridges", i); |
3282 |
+@@ -203,12 +207,15 @@ static int fpga_region_get_bridges(struct fpga_region *region, |
3283 |
+ break; |
3284 |
+ |
3285 |
+ /* If parent bridge is in list, skip it. */ |
3286 |
+- if (br == parent_br) |
3287 |
++ if (br == parent_br) { |
3288 |
++ of_node_put(br); |
3289 |
+ continue; |
3290 |
++ } |
3291 |
+ |
3292 |
+ /* If node is a bridge, get it and add to list */ |
3293 |
+ ret = fpga_bridge_get_to_list(br, region->info, |
3294 |
+ ®ion->bridge_list); |
3295 |
++ of_node_put(br); |
3296 |
+ |
3297 |
+ /* If any of the bridges are in use, give up */ |
3298 |
+ if (ret == -EBUSY) { |
3299 |
+diff --git a/drivers/iio/accel/kxsd9-i2c.c b/drivers/iio/accel/kxsd9-i2c.c |
3300 |
+index 98fbb628d5bd..38411e1c155b 100644 |
3301 |
+--- a/drivers/iio/accel/kxsd9-i2c.c |
3302 |
++++ b/drivers/iio/accel/kxsd9-i2c.c |
3303 |
+@@ -63,3 +63,6 @@ static struct i2c_driver kxsd9_i2c_driver = { |
3304 |
+ .id_table = kxsd9_i2c_id, |
3305 |
+ }; |
3306 |
+ module_i2c_driver(kxsd9_i2c_driver); |
3307 |
++ |
3308 |
++MODULE_LICENSE("GPL v2"); |
3309 |
++MODULE_DESCRIPTION("KXSD9 accelerometer I2C interface"); |
3310 |
+diff --git a/drivers/iio/adc/qcom-vadc-common.c b/drivers/iio/adc/qcom-vadc-common.c |
3311 |
+index 47d24ae5462f..fe3d7826783c 100644 |
3312 |
+--- a/drivers/iio/adc/qcom-vadc-common.c |
3313 |
++++ b/drivers/iio/adc/qcom-vadc-common.c |
3314 |
+@@ -5,6 +5,7 @@ |
3315 |
+ #include <linux/math64.h> |
3316 |
+ #include <linux/log2.h> |
3317 |
+ #include <linux/err.h> |
3318 |
++#include <linux/module.h> |
3319 |
+ |
3320 |
+ #include "qcom-vadc-common.h" |
3321 |
+ |
3322 |
+@@ -229,3 +230,6 @@ int qcom_vadc_decimation_from_dt(u32 value) |
3323 |
+ return __ffs64(value / VADC_DECIMATION_MIN); |
3324 |
+ } |
3325 |
+ EXPORT_SYMBOL(qcom_vadc_decimation_from_dt); |
3326 |
++ |
3327 |
++MODULE_LICENSE("GPL v2"); |
3328 |
++MODULE_DESCRIPTION("Qualcomm ADC common functionality"); |
3329 |
+diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c |
3330 |
+index 866aa3ce1ac9..6cf0006d4c8d 100644 |
3331 |
+--- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c |
3332 |
++++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c |
3333 |
+@@ -436,3 +436,7 @@ int pxa2xx_pinctrl_exit(struct platform_device *pdev) |
3334 |
+ return 0; |
3335 |
+ } |
3336 |
+ EXPORT_SYMBOL_GPL(pxa2xx_pinctrl_exit); |
3337 |
++ |
3338 |
++MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@××××.fr>"); |
3339 |
++MODULE_DESCRIPTION("Marvell PXA2xx pinctrl driver"); |
3340 |
++MODULE_LICENSE("GPL v2"); |
3341 |
+diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c |
3342 |
+index 3a14cccbd7ff..7948acf14601 100644 |
3343 |
+--- a/drivers/tty/serial/serial_core.c |
3344 |
++++ b/drivers/tty/serial/serial_core.c |
3345 |
+@@ -987,6 +987,8 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, |
3346 |
+ } |
3347 |
+ } else { |
3348 |
+ retval = uart_startup(tty, state, 1); |
3349 |
++ if (retval == 0) |
3350 |
++ tty_port_set_initialized(port, true); |
3351 |
+ if (retval > 0) |
3352 |
+ retval = 0; |
3353 |
+ } |
3354 |
+diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h |
3355 |
+index 1c65817673db..41615f38bcff 100644 |
3356 |
+--- a/include/linux/fdtable.h |
3357 |
++++ b/include/linux/fdtable.h |
3358 |
+@@ -10,6 +10,7 @@ |
3359 |
+ #include <linux/compiler.h> |
3360 |
+ #include <linux/spinlock.h> |
3361 |
+ #include <linux/rcupdate.h> |
3362 |
++#include <linux/nospec.h> |
3363 |
+ #include <linux/types.h> |
3364 |
+ #include <linux/init.h> |
3365 |
+ #include <linux/fs.h> |
3366 |
+@@ -82,8 +83,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i |
3367 |
+ { |
3368 |
+ struct fdtable *fdt = rcu_dereference_raw(files->fdt); |
3369 |
+ |
3370 |
+- if (fd < fdt->max_fds) |
3371 |
++ if (fd < fdt->max_fds) { |
3372 |
++ fd = array_index_nospec(fd, fdt->max_fds); |
3373 |
+ return rcu_dereference_raw(fdt->fd[fd]); |
3374 |
++ } |
3375 |
+ return NULL; |
3376 |
+ } |
3377 |
+ |
3378 |
+diff --git a/include/linux/init.h b/include/linux/init.h |
3379 |
+index f38b993edacb..943139a563e3 100644 |
3380 |
+--- a/include/linux/init.h |
3381 |
++++ b/include/linux/init.h |
3382 |
+@@ -5,6 +5,13 @@ |
3383 |
+ #include <linux/compiler.h> |
3384 |
+ #include <linux/types.h> |
3385 |
+ |
3386 |
++/* Built-in __init functions needn't be compiled with retpoline */ |
3387 |
++#if defined(RETPOLINE) && !defined(MODULE) |
3388 |
++#define __noretpoline __attribute__((indirect_branch("keep"))) |
3389 |
++#else |
3390 |
++#define __noretpoline |
3391 |
++#endif |
3392 |
++ |
3393 |
+ /* These macros are used to mark some functions or |
3394 |
+ * initialized data (doesn't apply to uninitialized data) |
3395 |
+ * as `initialization' functions. The kernel can take this |
3396 |
+@@ -40,7 +47,7 @@ |
3397 |
+ |
3398 |
+ /* These are for everybody (although not all archs will actually |
3399 |
+ discard it in modules) */ |
3400 |
+-#define __init __section(.init.text) __cold __inittrace __latent_entropy |
3401 |
++#define __init __section(.init.text) __cold __inittrace __latent_entropy __noretpoline |
3402 |
+ #define __initdata __section(.init.data) |
3403 |
+ #define __initconst __section(.init.rodata) |
3404 |
+ #define __exitdata __section(.exit.data) |
3405 |
+diff --git a/include/linux/module.h b/include/linux/module.h |
3406 |
+index fe5aa3736707..b1cc541f2ddf 100644 |
3407 |
+--- a/include/linux/module.h |
3408 |
++++ b/include/linux/module.h |
3409 |
+@@ -794,6 +794,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, |
3410 |
+ static inline void module_bug_cleanup(struct module *mod) {} |
3411 |
+ #endif /* CONFIG_GENERIC_BUG */ |
3412 |
+ |
3413 |
++#ifdef RETPOLINE |
3414 |
++extern bool retpoline_module_ok(bool has_retpoline); |
3415 |
++#else |
3416 |
++static inline bool retpoline_module_ok(bool has_retpoline) |
3417 |
++{ |
3418 |
++ return true; |
3419 |
++} |
3420 |
++#endif |
3421 |
++ |
3422 |
+ #ifdef CONFIG_MODULE_SIG |
3423 |
+ static inline bool module_sig_ok(struct module *module) |
3424 |
+ { |
3425 |
+diff --git a/include/linux/nospec.h b/include/linux/nospec.h |
3426 |
+new file mode 100644 |
3427 |
+index 000000000000..b99bced39ac2 |
3428 |
+--- /dev/null |
3429 |
++++ b/include/linux/nospec.h |
3430 |
+@@ -0,0 +1,72 @@ |
3431 |
++// SPDX-License-Identifier: GPL-2.0 |
3432 |
++// Copyright(c) 2018 Linus Torvalds. All rights reserved. |
3433 |
++// Copyright(c) 2018 Alexei Starovoitov. All rights reserved. |
3434 |
++// Copyright(c) 2018 Intel Corporation. All rights reserved. |
3435 |
++ |
3436 |
++#ifndef _LINUX_NOSPEC_H |
3437 |
++#define _LINUX_NOSPEC_H |
3438 |
++ |
3439 |
++/** |
3440 |
++ * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise |
3441 |
++ * @index: array element index |
3442 |
++ * @size: number of elements in array |
3443 |
++ * |
3444 |
++ * When @index is out of bounds (@index >= @size), the sign bit will be |
3445 |
++ * set. Extend the sign bit to all bits and invert, giving a result of |
3446 |
++ * zero for an out of bounds index, or ~0 if within bounds [0, @size). |
3447 |
++ */ |
3448 |
++#ifndef array_index_mask_nospec |
3449 |
++static inline unsigned long array_index_mask_nospec(unsigned long index, |
3450 |
++ unsigned long size) |
3451 |
++{ |
3452 |
++ /* |
3453 |
++ * Warn developers about inappropriate array_index_nospec() usage. |
3454 |
++ * |
3455 |
++ * Even if the CPU speculates past the WARN_ONCE branch, the |
3456 |
++ * sign bit of @index is taken into account when generating the |
3457 |
++ * mask. |
3458 |
++ * |
3459 |
++ * This warning is compiled out when the compiler can infer that |
3460 |
++ * @index and @size are less than LONG_MAX. |
3461 |
++ */ |
3462 |
++ if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, |
3463 |
++ "array_index_nospec() limited to range of [0, LONG_MAX]\n")) |
3464 |
++ return 0; |
3465 |
++ |
3466 |
++ /* |
3467 |
++ * Always calculate and emit the mask even if the compiler |
3468 |
++ * thinks the mask is not needed. The compiler does not take |
3469 |
++ * into account the value of @index under speculation. |
3470 |
++ */ |
3471 |
++ OPTIMIZER_HIDE_VAR(index); |
3472 |
++ return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1); |
3473 |
++} |
3474 |
++#endif |
3475 |
++ |
3476 |
++/* |
3477 |
++ * array_index_nospec - sanitize an array index after a bounds check |
3478 |
++ * |
3479 |
++ * For a code sequence like: |
3480 |
++ * |
3481 |
++ * if (index < size) { |
3482 |
++ * index = array_index_nospec(index, size); |
3483 |
++ * val = array[index]; |
3484 |
++ * } |
3485 |
++ * |
3486 |
++ * ...if the CPU speculates past the bounds check then |
3487 |
++ * array_index_nospec() will clamp the index within the range of [0, |
3488 |
++ * size). |
3489 |
++ */ |
3490 |
++#define array_index_nospec(index, size) \ |
3491 |
++({ \ |
3492 |
++ typeof(index) _i = (index); \ |
3493 |
++ typeof(size) _s = (size); \ |
3494 |
++ unsigned long _mask = array_index_mask_nospec(_i, _s); \ |
3495 |
++ \ |
3496 |
++ BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ |
3497 |
++ BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ |
3498 |
++ \ |
3499 |
++ _i &= _mask; \ |
3500 |
++ _i; \ |
3501 |
++}) |
3502 |
++#endif /* _LINUX_NOSPEC_H */ |
3503 |
+diff --git a/kernel/module.c b/kernel/module.c |
3504 |
+index de66ec825992..690c0651c40f 100644 |
3505 |
+--- a/kernel/module.c |
3506 |
++++ b/kernel/module.c |
3507 |
+@@ -2855,6 +2855,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info) |
3508 |
+ } |
3509 |
+ #endif /* CONFIG_LIVEPATCH */ |
3510 |
+ |
3511 |
++static void check_modinfo_retpoline(struct module *mod, struct load_info *info) |
3512 |
++{ |
3513 |
++ if (retpoline_module_ok(get_modinfo(info, "retpoline"))) |
3514 |
++ return; |
3515 |
++ |
3516 |
++ pr_warn("%s: loading module not compiled with retpoline compiler.\n", |
3517 |
++ mod->name); |
3518 |
++} |
3519 |
++ |
3520 |
+ /* Sets info->hdr and info->len. */ |
3521 |
+ static int copy_module_from_user(const void __user *umod, unsigned long len, |
3522 |
+ struct load_info *info) |
3523 |
+@@ -3021,6 +3030,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) |
3524 |
+ add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); |
3525 |
+ } |
3526 |
+ |
3527 |
++ check_modinfo_retpoline(mod, info); |
3528 |
++ |
3529 |
+ if (get_modinfo(info, "staging")) { |
3530 |
+ add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); |
3531 |
+ pr_warn("%s: module is from the staging directory, the quality " |
3532 |
+diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c |
3533 |
+index d396cb61a280..81bef0676e1d 100644 |
3534 |
+--- a/net/wireless/nl80211.c |
3535 |
++++ b/net/wireless/nl80211.c |
3536 |
+@@ -16,6 +16,7 @@ |
3537 |
+ #include <linux/nl80211.h> |
3538 |
+ #include <linux/rtnetlink.h> |
3539 |
+ #include <linux/netlink.h> |
3540 |
++#include <linux/nospec.h> |
3541 |
+ #include <linux/etherdevice.h> |
3542 |
+ #include <net/net_namespace.h> |
3543 |
+ #include <net/genetlink.h> |
3544 |
+@@ -2056,20 +2057,22 @@ static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { |
3545 |
+ static int parse_txq_params(struct nlattr *tb[], |
3546 |
+ struct ieee80211_txq_params *txq_params) |
3547 |
+ { |
3548 |
++ u8 ac; |
3549 |
++ |
3550 |
+ if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] || |
3551 |
+ !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] || |
3552 |
+ !tb[NL80211_TXQ_ATTR_AIFS]) |
3553 |
+ return -EINVAL; |
3554 |
+ |
3555 |
+- txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); |
3556 |
++ ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); |
3557 |
+ txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]); |
3558 |
+ txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]); |
3559 |
+ txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]); |
3560 |
+ txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]); |
3561 |
+ |
3562 |
+- if (txq_params->ac >= NL80211_NUM_ACS) |
3563 |
++ if (ac >= NL80211_NUM_ACS) |
3564 |
+ return -EINVAL; |
3565 |
+- |
3566 |
++ txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS); |
3567 |
+ return 0; |
3568 |
+ } |
3569 |
+ |
3570 |
+diff --git a/scripts/faddr2line b/scripts/faddr2line |
3571 |
+index 39e07d8574dd..7721d5b2b0c0 100755 |
3572 |
+--- a/scripts/faddr2line |
3573 |
++++ b/scripts/faddr2line |
3574 |
+@@ -44,10 +44,10 @@ |
3575 |
+ set -o errexit |
3576 |
+ set -o nounset |
3577 |
+ |
3578 |
+-READELF="${CROSS_COMPILE}readelf" |
3579 |
+-ADDR2LINE="${CROSS_COMPILE}addr2line" |
3580 |
+-SIZE="${CROSS_COMPILE}size" |
3581 |
+-NM="${CROSS_COMPILE}nm" |
3582 |
++READELF="${CROSS_COMPILE:-}readelf" |
3583 |
++ADDR2LINE="${CROSS_COMPILE:-}addr2line" |
3584 |
++SIZE="${CROSS_COMPILE:-}size" |
3585 |
++NM="${CROSS_COMPILE:-}nm" |
3586 |
+ |
3587 |
+ command -v awk >/dev/null 2>&1 || die "awk isn't installed" |
3588 |
+ command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed" |
3589 |
+diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c |
3590 |
+index 98314b400a95..54deaa1066cf 100644 |
3591 |
+--- a/scripts/mod/modpost.c |
3592 |
++++ b/scripts/mod/modpost.c |
3593 |
+@@ -2165,6 +2165,14 @@ static void add_intree_flag(struct buffer *b, int is_intree) |
3594 |
+ buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n"); |
3595 |
+ } |
3596 |
+ |
3597 |
++/* Cannot check for assembler */ |
3598 |
++static void add_retpoline(struct buffer *b) |
3599 |
++{ |
3600 |
++ buf_printf(b, "\n#ifdef RETPOLINE\n"); |
3601 |
++ buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n"); |
3602 |
++ buf_printf(b, "#endif\n"); |
3603 |
++} |
3604 |
++ |
3605 |
+ static void add_staging_flag(struct buffer *b, const char *name) |
3606 |
+ { |
3607 |
+ static const char *staging_dir = "drivers/staging"; |
3608 |
+@@ -2506,6 +2514,7 @@ int main(int argc, char **argv) |
3609 |
+ err |= check_modname_len(mod); |
3610 |
+ add_header(&buf, mod); |
3611 |
+ add_intree_flag(&buf, !external_module); |
3612 |
++ add_retpoline(&buf); |
3613 |
+ add_staging_flag(&buf, mod->name); |
3614 |
+ err |= add_versions(&buf, mod); |
3615 |
+ add_depends(&buf, mod, modules); |
3616 |
+diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c |
3617 |
+index 712ed6598c48..ebdf9bd5a64c 100644 |
3618 |
+--- a/sound/soc/codecs/pcm512x-spi.c |
3619 |
++++ b/sound/soc/codecs/pcm512x-spi.c |
3620 |
+@@ -70,3 +70,7 @@ static struct spi_driver pcm512x_spi_driver = { |
3621 |
+ }; |
3622 |
+ |
3623 |
+ module_spi_driver(pcm512x_spi_driver); |
3624 |
++ |
3625 |
++MODULE_DESCRIPTION("ASoC PCM512x codec driver - SPI"); |
3626 |
++MODULE_AUTHOR("Mark Brown <broonie@××××××.org>"); |
3627 |
++MODULE_LICENSE("GPL v2"); |
3628 |
+diff --git a/tools/objtool/check.c b/tools/objtool/check.c |
3629 |
+index f40d46e24bcc..9cd028aa1509 100644 |
3630 |
+--- a/tools/objtool/check.c |
3631 |
++++ b/tools/objtool/check.c |
3632 |
+@@ -543,18 +543,14 @@ static int add_call_destinations(struct objtool_file *file) |
3633 |
+ dest_off = insn->offset + insn->len + insn->immediate; |
3634 |
+ insn->call_dest = find_symbol_by_offset(insn->sec, |
3635 |
+ dest_off); |
3636 |
+- /* |
3637 |
+- * FIXME: Thanks to retpolines, it's now considered |
3638 |
+- * normal for a function to call within itself. So |
3639 |
+- * disable this warning for now. |
3640 |
+- */ |
3641 |
+-#if 0 |
3642 |
+- if (!insn->call_dest) { |
3643 |
+- WARN_FUNC("can't find call dest symbol at offset 0x%lx", |
3644 |
+- insn->sec, insn->offset, dest_off); |
3645 |
++ |
3646 |
++ if (!insn->call_dest && !insn->ignore) { |
3647 |
++ WARN_FUNC("unsupported intra-function call", |
3648 |
++ insn->sec, insn->offset); |
3649 |
++ WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."); |
3650 |
+ return -1; |
3651 |
+ } |
3652 |
+-#endif |
3653 |
++ |
3654 |
+ } else if (rela->sym->type == STT_SECTION) { |
3655 |
+ insn->call_dest = find_symbol_by_offset(rela->sym->sec, |
3656 |
+ rela->addend+4); |
3657 |
+@@ -598,7 +594,7 @@ static int handle_group_alt(struct objtool_file *file, |
3658 |
+ struct instruction *orig_insn, |
3659 |
+ struct instruction **new_insn) |
3660 |
+ { |
3661 |
+- struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; |
3662 |
++ struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL; |
3663 |
+ unsigned long dest_off; |
3664 |
+ |
3665 |
+ last_orig_insn = NULL; |
3666 |
+@@ -614,28 +610,30 @@ static int handle_group_alt(struct objtool_file *file, |
3667 |
+ last_orig_insn = insn; |
3668 |
+ } |
3669 |
+ |
3670 |
+- if (!next_insn_same_sec(file, last_orig_insn)) { |
3671 |
+- WARN("%s: don't know how to handle alternatives at end of section", |
3672 |
+- special_alt->orig_sec->name); |
3673 |
+- return -1; |
3674 |
+- } |
3675 |
+- |
3676 |
+- fake_jump = malloc(sizeof(*fake_jump)); |
3677 |
+- if (!fake_jump) { |
3678 |
+- WARN("malloc failed"); |
3679 |
+- return -1; |
3680 |
++ if (next_insn_same_sec(file, last_orig_insn)) { |
3681 |
++ fake_jump = malloc(sizeof(*fake_jump)); |
3682 |
++ if (!fake_jump) { |
3683 |
++ WARN("malloc failed"); |
3684 |
++ return -1; |
3685 |
++ } |
3686 |
++ memset(fake_jump, 0, sizeof(*fake_jump)); |
3687 |
++ INIT_LIST_HEAD(&fake_jump->alts); |
3688 |
++ clear_insn_state(&fake_jump->state); |
3689 |
++ |
3690 |
++ fake_jump->sec = special_alt->new_sec; |
3691 |
++ fake_jump->offset = -1; |
3692 |
++ fake_jump->type = INSN_JUMP_UNCONDITIONAL; |
3693 |
++ fake_jump->jump_dest = list_next_entry(last_orig_insn, list); |
3694 |
++ fake_jump->ignore = true; |
3695 |
+ } |
3696 |
+- memset(fake_jump, 0, sizeof(*fake_jump)); |
3697 |
+- INIT_LIST_HEAD(&fake_jump->alts); |
3698 |
+- clear_insn_state(&fake_jump->state); |
3699 |
+- |
3700 |
+- fake_jump->sec = special_alt->new_sec; |
3701 |
+- fake_jump->offset = -1; |
3702 |
+- fake_jump->type = INSN_JUMP_UNCONDITIONAL; |
3703 |
+- fake_jump->jump_dest = list_next_entry(last_orig_insn, list); |
3704 |
+- fake_jump->ignore = true; |
3705 |
+ |
3706 |
+ if (!special_alt->new_len) { |
3707 |
++ if (!fake_jump) { |
3708 |
++ WARN("%s: empty alternative at end of section", |
3709 |
++ special_alt->orig_sec->name); |
3710 |
++ return -1; |
3711 |
++ } |
3712 |
++ |
3713 |
+ *new_insn = fake_jump; |
3714 |
+ return 0; |
3715 |
+ } |
3716 |
+@@ -648,6 +646,8 @@ static int handle_group_alt(struct objtool_file *file, |
3717 |
+ |
3718 |
+ last_new_insn = insn; |
3719 |
+ |
3720 |
++ insn->ignore = orig_insn->ignore_alts; |
3721 |
++ |
3722 |
+ if (insn->type != INSN_JUMP_CONDITIONAL && |
3723 |
+ insn->type != INSN_JUMP_UNCONDITIONAL) |
3724 |
+ continue; |
3725 |
+@@ -656,8 +656,14 @@ static int handle_group_alt(struct objtool_file *file, |
3726 |
+ continue; |
3727 |
+ |
3728 |
+ dest_off = insn->offset + insn->len + insn->immediate; |
3729 |
+- if (dest_off == special_alt->new_off + special_alt->new_len) |
3730 |
++ if (dest_off == special_alt->new_off + special_alt->new_len) { |
3731 |
++ if (!fake_jump) { |
3732 |
++ WARN("%s: alternative jump to end of section", |
3733 |
++ special_alt->orig_sec->name); |
3734 |
++ return -1; |
3735 |
++ } |
3736 |
+ insn->jump_dest = fake_jump; |
3737 |
++ } |
3738 |
+ |
3739 |
+ if (!insn->jump_dest) { |
3740 |
+ WARN_FUNC("can't find alternative jump destination", |
3741 |
+@@ -672,7 +678,8 @@ static int handle_group_alt(struct objtool_file *file, |
3742 |
+ return -1; |
3743 |
+ } |
3744 |
+ |
3745 |
+- list_add(&fake_jump->list, &last_new_insn->list); |
3746 |
++ if (fake_jump) |
3747 |
++ list_add(&fake_jump->list, &last_new_insn->list); |
3748 |
+ |
3749 |
+ return 0; |
3750 |
+ } |
3751 |
+@@ -729,10 +736,6 @@ static int add_special_section_alts(struct objtool_file *file) |
3752 |
+ goto out; |
3753 |
+ } |
3754 |
+ |
3755 |
+- /* Ignore retpoline alternatives. */ |
3756 |
+- if (orig_insn->ignore_alts) |
3757 |
+- continue; |
3758 |
+- |
3759 |
+ new_insn = NULL; |
3760 |
+ if (!special_alt->group || special_alt->new_len) { |
3761 |
+ new_insn = find_insn(file, special_alt->new_sec, |
3762 |
+@@ -1089,11 +1092,11 @@ static int decode_sections(struct objtool_file *file) |
3763 |
+ if (ret) |
3764 |
+ return ret; |
3765 |
+ |
3766 |
+- ret = add_call_destinations(file); |
3767 |
++ ret = add_special_section_alts(file); |
3768 |
+ if (ret) |
3769 |
+ return ret; |
3770 |
+ |
3771 |
+- ret = add_special_section_alts(file); |
3772 |
++ ret = add_call_destinations(file); |
3773 |
+ if (ret) |
3774 |
+ return ret; |
3775 |
+ |
3776 |
+@@ -1720,10 +1723,12 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, |
3777 |
+ |
3778 |
+ insn->visited = true; |
3779 |
+ |
3780 |
+- list_for_each_entry(alt, &insn->alts, list) { |
3781 |
+- ret = validate_branch(file, alt->insn, state); |
3782 |
+- if (ret) |
3783 |
+- return 1; |
3784 |
++ if (!insn->ignore_alts) { |
3785 |
++ list_for_each_entry(alt, &insn->alts, list) { |
3786 |
++ ret = validate_branch(file, alt->insn, state); |
3787 |
++ if (ret) |
3788 |
++ return 1; |
3789 |
++ } |
3790 |
+ } |
3791 |
+ |
3792 |
+ switch (insn->type) { |
3793 |
+diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c |
3794 |
+index e61fe703197b..18384d9be4e1 100644 |
3795 |
+--- a/tools/objtool/orc_gen.c |
3796 |
++++ b/tools/objtool/orc_gen.c |
3797 |
+@@ -98,6 +98,11 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, |
3798 |
+ struct orc_entry *orc; |
3799 |
+ struct rela *rela; |
3800 |
+ |
3801 |
++ if (!insn_sec->sym) { |
3802 |
++ WARN("missing symbol for section %s", insn_sec->name); |
3803 |
++ return -1; |
3804 |
++ } |
3805 |
++ |
3806 |
+ /* populate ORC data */ |
3807 |
+ orc = (struct orc_entry *)u_sec->data->d_buf + idx; |
3808 |
+ memcpy(orc, o, sizeof(*orc)); |