Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:4.15 commit in: /
Date: Thu, 08 Feb 2018 00:38:57
Message-Id: 1518050324.8f7885176c06072da594527eb21ae23cfd8ddbf5.mpagano@gentoo
1 commit: 8f7885176c06072da594527eb21ae23cfd8ddbf5
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Thu Feb 8 00:38:44 2018 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Thu Feb 8 00:38:44 2018 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=8f788517
7
8 Linux patch 4.15.2
9
10 0000_README | 4 +
11 1001_linux-4.15.2.patch | 3696 +++++++++++++++++++++++++++++++++++++++++++++++
12 2 files changed, 3700 insertions(+)
13
14 diff --git a/0000_README b/0000_README
15 index da07a38..db575f6 100644
16 --- a/0000_README
17 +++ b/0000_README
18 @@ -47,6 +47,10 @@ Patch: 1000_linux-4.15.1.patch
19 From: http://www.kernel.org
20 Desc: Linux 4.15.1
21
22 +Patch: 1001_linux-4.15.2.patch
23 +From: http://www.kernel.org
24 +Desc: Linux 4.15.2
25 +
26 Patch: 1500_XATTR_USER_PREFIX.patch
27 From: https://bugs.gentoo.org/show_bug.cgi?id=470644
28 Desc: Support for namespace user.pax.* on tmpfs.
29
30 diff --git a/1001_linux-4.15.2.patch b/1001_linux-4.15.2.patch
31 new file mode 100644
32 index 0000000..e9d606b
33 --- /dev/null
34 +++ b/1001_linux-4.15.2.patch
35 @@ -0,0 +1,3696 @@
36 +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
37 +index 46b26bfee27b..1e762c210f1b 100644
38 +--- a/Documentation/admin-guide/kernel-parameters.txt
39 ++++ b/Documentation/admin-guide/kernel-parameters.txt
40 +@@ -2742,8 +2742,6 @@
41 + norandmaps Don't use address space randomization. Equivalent to
42 + echo 0 > /proc/sys/kernel/randomize_va_space
43 +
44 +- noreplace-paravirt [X86,IA-64,PV_OPS] Don't patch paravirt_ops
45 +-
46 + noreplace-smp [X86-32,SMP] Don't replace SMP instructions
47 + with UP alternatives
48 +
49 +diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt
50 +new file mode 100644
51 +index 000000000000..e9e6cbae2841
52 +--- /dev/null
53 ++++ b/Documentation/speculation.txt
54 +@@ -0,0 +1,90 @@
55 ++This document explains potential effects of speculation, and how undesirable
56 ++effects can be mitigated portably using common APIs.
57 ++
58 ++===========
59 ++Speculation
60 ++===========
61 ++
62 ++To improve performance and minimize average latencies, many contemporary CPUs
63 ++employ speculative execution techniques such as branch prediction, performing
64 ++work which may be discarded at a later stage.
65 ++
66 ++Typically speculative execution cannot be observed from architectural state,
67 ++such as the contents of registers. However, in some cases it is possible to
68 ++observe its impact on microarchitectural state, such as the presence or
69 ++absence of data in caches. Such state may form side-channels which can be
70 ++observed to extract secret information.
71 ++
72 ++For example, in the presence of branch prediction, it is possible for bounds
73 ++checks to be ignored by code which is speculatively executed. Consider the
74 ++following code:
75 ++
76 ++ int load_array(int *array, unsigned int index)
77 ++ {
78 ++ if (index >= MAX_ARRAY_ELEMS)
79 ++ return 0;
80 ++ else
81 ++ return array[index];
82 ++ }
83 ++
84 ++Which, on arm64, may be compiled to an assembly sequence such as:
85 ++
86 ++ CMP <index>, #MAX_ARRAY_ELEMS
87 ++ B.LT less
88 ++ MOV <returnval>, #0
89 ++ RET
90 ++ less:
91 ++ LDR <returnval>, [<array>, <index>]
92 ++ RET
93 ++
94 ++It is possible that a CPU mis-predicts the conditional branch, and
95 ++speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This
96 ++value will subsequently be discarded, but the speculated load may affect
97 ++microarchitectural state which can be subsequently measured.
98 ++
99 ++More complex sequences involving multiple dependent memory accesses may
100 ++result in sensitive information being leaked. Consider the following
101 ++code, building on the prior example:
102 ++
103 ++ int load_dependent_arrays(int *arr1, int *arr2, int index)
104 ++ {
105 ++ int val1, val2,
106 ++
107 ++ val1 = load_array(arr1, index);
108 ++ val2 = load_array(arr2, val1);
109 ++
110 ++ return val2;
111 ++ }
112 ++
113 ++Under speculation, the first call to load_array() may return the value
114 ++of an out-of-bounds address, while the second call will influence
115 ++microarchitectural state dependent on this value. This may provide an
116 ++arbitrary read primitive.
117 ++
118 ++====================================
119 ++Mitigating speculation side-channels
120 ++====================================
121 ++
122 ++The kernel provides a generic API to ensure that bounds checks are
123 ++respected even under speculation. Architectures which are affected by
124 ++speculation-based side-channels are expected to implement these
125 ++primitives.
126 ++
127 ++The array_index_nospec() helper in <linux/nospec.h> can be used to
128 ++prevent information from being leaked via side-channels.
129 ++
130 ++A call to array_index_nospec(index, size) returns a sanitized index
131 ++value that is bounded to [0, size) even under cpu speculation
132 ++conditions.
133 ++
134 ++This can be used to protect the earlier load_array() example:
135 ++
136 ++ int load_array(int *array, unsigned int index)
137 ++ {
138 ++ if (index >= MAX_ARRAY_ELEMS)
139 ++ return 0;
140 ++ else {
141 ++ index = array_index_nospec(index, MAX_ARRAY_ELEMS);
142 ++ return array[index];
143 ++ }
144 ++ }
145 +diff --git a/Makefile b/Makefile
146 +index af101b556ba0..54f1bc10b531 100644
147 +--- a/Makefile
148 ++++ b/Makefile
149 +@@ -1,7 +1,7 @@
150 + # SPDX-License-Identifier: GPL-2.0
151 + VERSION = 4
152 + PATCHLEVEL = 15
153 +-SUBLEVEL = 1
154 ++SUBLEVEL = 2
155 + EXTRAVERSION =
156 + NAME = Fearless Coyote
157 +
158 +diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
159 +index d7d3cc24baf4..21dbdf0e476b 100644
160 +--- a/arch/x86/entry/common.c
161 ++++ b/arch/x86/entry/common.c
162 +@@ -21,6 +21,7 @@
163 + #include <linux/export.h>
164 + #include <linux/context_tracking.h>
165 + #include <linux/user-return-notifier.h>
166 ++#include <linux/nospec.h>
167 + #include <linux/uprobes.h>
168 + #include <linux/livepatch.h>
169 + #include <linux/syscalls.h>
170 +@@ -206,7 +207,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
171 + * special case only applies after poking regs and before the
172 + * very next return to user mode.
173 + */
174 +- current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
175 ++ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
176 + #endif
177 +
178 + user_enter_irqoff();
179 +@@ -282,7 +283,8 @@ __visible void do_syscall_64(struct pt_regs *regs)
180 + * regs->orig_ax, which changes the behavior of some syscalls.
181 + */
182 + if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
183 +- regs->ax = sys_call_table[nr & __SYSCALL_MASK](
184 ++ nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls);
185 ++ regs->ax = sys_call_table[nr](
186 + regs->di, regs->si, regs->dx,
187 + regs->r10, regs->r8, regs->r9);
188 + }
189 +@@ -304,7 +306,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
190 + unsigned int nr = (unsigned int)regs->orig_ax;
191 +
192 + #ifdef CONFIG_IA32_EMULATION
193 +- current->thread.status |= TS_COMPAT;
194 ++ ti->status |= TS_COMPAT;
195 + #endif
196 +
197 + if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
198 +@@ -318,6 +320,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
199 + }
200 +
201 + if (likely(nr < IA32_NR_syscalls)) {
202 ++ nr = array_index_nospec(nr, IA32_NR_syscalls);
203 + /*
204 + * It's possible that a 32-bit syscall implementation
205 + * takes a 64-bit parameter but nonetheless assumes that
206 +diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
207 +index 60c4c342316c..2a35b1e0fb90 100644
208 +--- a/arch/x86/entry/entry_32.S
209 ++++ b/arch/x86/entry/entry_32.S
210 +@@ -252,7 +252,8 @@ ENTRY(__switch_to_asm)
211 + * exist, overwrite the RSB with entries which capture
212 + * speculative execution to prevent attack.
213 + */
214 +- FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
215 ++ /* Clobbers %ebx */
216 ++ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
217 + #endif
218 +
219 + /* restore callee-saved registers */
220 +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
221 +index ff6f8022612c..c752abe89d80 100644
222 +--- a/arch/x86/entry/entry_64.S
223 ++++ b/arch/x86/entry/entry_64.S
224 +@@ -236,91 +236,20 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
225 + pushq %r9 /* pt_regs->r9 */
226 + pushq %r10 /* pt_regs->r10 */
227 + pushq %r11 /* pt_regs->r11 */
228 +- sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
229 +- UNWIND_HINT_REGS extra=0
230 +-
231 +- TRACE_IRQS_OFF
232 +-
233 +- /*
234 +- * If we need to do entry work or if we guess we'll need to do
235 +- * exit work, go straight to the slow path.
236 +- */
237 +- movq PER_CPU_VAR(current_task), %r11
238 +- testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
239 +- jnz entry_SYSCALL64_slow_path
240 +-
241 +-entry_SYSCALL_64_fastpath:
242 +- /*
243 +- * Easy case: enable interrupts and issue the syscall. If the syscall
244 +- * needs pt_regs, we'll call a stub that disables interrupts again
245 +- * and jumps to the slow path.
246 +- */
247 +- TRACE_IRQS_ON
248 +- ENABLE_INTERRUPTS(CLBR_NONE)
249 +-#if __SYSCALL_MASK == ~0
250 +- cmpq $__NR_syscall_max, %rax
251 +-#else
252 +- andl $__SYSCALL_MASK, %eax
253 +- cmpl $__NR_syscall_max, %eax
254 +-#endif
255 +- ja 1f /* return -ENOSYS (already in pt_regs->ax) */
256 +- movq %r10, %rcx
257 +-
258 +- /*
259 +- * This call instruction is handled specially in stub_ptregs_64.
260 +- * It might end up jumping to the slow path. If it jumps, RAX
261 +- * and all argument registers are clobbered.
262 +- */
263 +-#ifdef CONFIG_RETPOLINE
264 +- movq sys_call_table(, %rax, 8), %rax
265 +- call __x86_indirect_thunk_rax
266 +-#else
267 +- call *sys_call_table(, %rax, 8)
268 +-#endif
269 +-.Lentry_SYSCALL_64_after_fastpath_call:
270 +-
271 +- movq %rax, RAX(%rsp)
272 +-1:
273 ++ pushq %rbx /* pt_regs->rbx */
274 ++ pushq %rbp /* pt_regs->rbp */
275 ++ pushq %r12 /* pt_regs->r12 */
276 ++ pushq %r13 /* pt_regs->r13 */
277 ++ pushq %r14 /* pt_regs->r14 */
278 ++ pushq %r15 /* pt_regs->r15 */
279 ++ UNWIND_HINT_REGS
280 +
281 +- /*
282 +- * If we get here, then we know that pt_regs is clean for SYSRET64.
283 +- * If we see that no exit work is required (which we are required
284 +- * to check with IRQs off), then we can go straight to SYSRET64.
285 +- */
286 +- DISABLE_INTERRUPTS(CLBR_ANY)
287 + TRACE_IRQS_OFF
288 +- movq PER_CPU_VAR(current_task), %r11
289 +- testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
290 +- jnz 1f
291 +-
292 +- LOCKDEP_SYS_EXIT
293 +- TRACE_IRQS_ON /* user mode is traced as IRQs on */
294 +- movq RIP(%rsp), %rcx
295 +- movq EFLAGS(%rsp), %r11
296 +- addq $6*8, %rsp /* skip extra regs -- they were preserved */
297 +- UNWIND_HINT_EMPTY
298 +- jmp .Lpop_c_regs_except_rcx_r11_and_sysret
299 +
300 +-1:
301 +- /*
302 +- * The fast path looked good when we started, but something changed
303 +- * along the way and we need to switch to the slow path. Calling
304 +- * raise(3) will trigger this, for example. IRQs are off.
305 +- */
306 +- TRACE_IRQS_ON
307 +- ENABLE_INTERRUPTS(CLBR_ANY)
308 +- SAVE_EXTRA_REGS
309 +- movq %rsp, %rdi
310 +- call syscall_return_slowpath /* returns with IRQs disabled */
311 +- jmp return_from_SYSCALL_64
312 +-
313 +-entry_SYSCALL64_slow_path:
314 + /* IRQs are off. */
315 +- SAVE_EXTRA_REGS
316 + movq %rsp, %rdi
317 + call do_syscall_64 /* returns with IRQs disabled */
318 +
319 +-return_from_SYSCALL_64:
320 + TRACE_IRQS_IRETQ /* we're about to change IF */
321 +
322 + /*
323 +@@ -393,7 +322,6 @@ syscall_return_via_sysret:
324 + /* rcx and r11 are already restored (see code above) */
325 + UNWIND_HINT_EMPTY
326 + POP_EXTRA_REGS
327 +-.Lpop_c_regs_except_rcx_r11_and_sysret:
328 + popq %rsi /* skip r11 */
329 + popq %r10
330 + popq %r9
331 +@@ -424,47 +352,6 @@ syscall_return_via_sysret:
332 + USERGS_SYSRET64
333 + END(entry_SYSCALL_64)
334 +
335 +-ENTRY(stub_ptregs_64)
336 +- /*
337 +- * Syscalls marked as needing ptregs land here.
338 +- * If we are on the fast path, we need to save the extra regs,
339 +- * which we achieve by trying again on the slow path. If we are on
340 +- * the slow path, the extra regs are already saved.
341 +- *
342 +- * RAX stores a pointer to the C function implementing the syscall.
343 +- * IRQs are on.
344 +- */
345 +- cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
346 +- jne 1f
347 +-
348 +- /*
349 +- * Called from fast path -- disable IRQs again, pop return address
350 +- * and jump to slow path
351 +- */
352 +- DISABLE_INTERRUPTS(CLBR_ANY)
353 +- TRACE_IRQS_OFF
354 +- popq %rax
355 +- UNWIND_HINT_REGS extra=0
356 +- jmp entry_SYSCALL64_slow_path
357 +-
358 +-1:
359 +- JMP_NOSPEC %rax /* Called from C */
360 +-END(stub_ptregs_64)
361 +-
362 +-.macro ptregs_stub func
363 +-ENTRY(ptregs_\func)
364 +- UNWIND_HINT_FUNC
365 +- leaq \func(%rip), %rax
366 +- jmp stub_ptregs_64
367 +-END(ptregs_\func)
368 +-.endm
369 +-
370 +-/* Instantiate ptregs_stub for each ptregs-using syscall */
371 +-#define __SYSCALL_64_QUAL_(sym)
372 +-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
373 +-#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
374 +-#include <asm/syscalls_64.h>
375 +-
376 + /*
377 + * %rdi: prev task
378 + * %rsi: next task
379 +@@ -499,7 +386,8 @@ ENTRY(__switch_to_asm)
380 + * exist, overwrite the RSB with entries which capture
381 + * speculative execution to prevent attack.
382 + */
383 +- FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
384 ++ /* Clobbers %rbx */
385 ++ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
386 + #endif
387 +
388 + /* restore callee-saved registers */
389 +diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
390 +index 9c09775e589d..c176d2fab1da 100644
391 +--- a/arch/x86/entry/syscall_64.c
392 ++++ b/arch/x86/entry/syscall_64.c
393 +@@ -7,14 +7,11 @@
394 + #include <asm/asm-offsets.h>
395 + #include <asm/syscall.h>
396 +
397 +-#define __SYSCALL_64_QUAL_(sym) sym
398 +-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
399 +-
400 +-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
401 ++#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
402 + #include <asm/syscalls_64.h>
403 + #undef __SYSCALL_64
404 +
405 +-#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
406 ++#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
407 +
408 + extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
409 +
410 +diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
411 +index 1908214b9125..4d111616524b 100644
412 +--- a/arch/x86/include/asm/asm-prototypes.h
413 ++++ b/arch/x86/include/asm/asm-prototypes.h
414 +@@ -38,4 +38,7 @@ INDIRECT_THUNK(dx)
415 + INDIRECT_THUNK(si)
416 + INDIRECT_THUNK(di)
417 + INDIRECT_THUNK(bp)
418 ++asmlinkage void __fill_rsb(void);
419 ++asmlinkage void __clear_rsb(void);
420 ++
421 + #endif /* CONFIG_RETPOLINE */
422 +diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
423 +index 7fb336210e1b..30d406146016 100644
424 +--- a/arch/x86/include/asm/barrier.h
425 ++++ b/arch/x86/include/asm/barrier.h
426 +@@ -24,6 +24,34 @@
427 + #define wmb() asm volatile("sfence" ::: "memory")
428 + #endif
429 +
430 ++/**
431 ++ * array_index_mask_nospec() - generate a mask that is ~0UL when the
432 ++ * bounds check succeeds and 0 otherwise
433 ++ * @index: array element index
434 ++ * @size: number of elements in array
435 ++ *
436 ++ * Returns:
437 ++ * 0 - (index < size)
438 ++ */
439 ++static inline unsigned long array_index_mask_nospec(unsigned long index,
440 ++ unsigned long size)
441 ++{
442 ++ unsigned long mask;
443 ++
444 ++ asm ("cmp %1,%2; sbb %0,%0;"
445 ++ :"=r" (mask)
446 ++ :"r"(size),"r" (index)
447 ++ :"cc");
448 ++ return mask;
449 ++}
450 ++
451 ++/* Override the default implementation from linux/nospec.h. */
452 ++#define array_index_mask_nospec array_index_mask_nospec
453 ++
454 ++/* Prevent speculative execution past this barrier. */
455 ++#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
456 ++ "lfence", X86_FEATURE_LFENCE_RDTSC)
457 ++
458 + #ifdef CONFIG_X86_PPRO_FENCE
459 + #define dma_rmb() rmb()
460 + #else
461 +diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
462 +index ea9a7dde62e5..70eddb3922ff 100644
463 +--- a/arch/x86/include/asm/cpufeature.h
464 ++++ b/arch/x86/include/asm/cpufeature.h
465 +@@ -29,6 +29,7 @@ enum cpuid_leafs
466 + CPUID_8000_000A_EDX,
467 + CPUID_7_ECX,
468 + CPUID_8000_0007_EBX,
469 ++ CPUID_7_EDX,
470 + };
471 +
472 + #ifdef CONFIG_X86_FEATURE_NAMES
473 +@@ -79,8 +80,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
474 + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \
475 + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \
476 + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
477 ++ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
478 + REQUIRED_MASK_CHECK || \
479 +- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
480 ++ BUILD_BUG_ON_ZERO(NCAPINTS != 19))
481 +
482 + #define DISABLED_MASK_BIT_SET(feature_bit) \
483 + ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
484 +@@ -101,8 +103,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
485 + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \
486 + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \
487 + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
488 ++ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
489 + DISABLED_MASK_CHECK || \
490 +- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
491 ++ BUILD_BUG_ON_ZERO(NCAPINTS != 19))
492 +
493 + #define cpu_has(c, bit) \
494 + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
495 +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
496 +index 25b9375c1484..73b5fff159a4 100644
497 +--- a/arch/x86/include/asm/cpufeatures.h
498 ++++ b/arch/x86/include/asm/cpufeatures.h
499 +@@ -13,7 +13,7 @@
500 + /*
501 + * Defines x86 CPU feature bits
502 + */
503 +-#define NCAPINTS 18 /* N 32-bit words worth of info */
504 ++#define NCAPINTS 19 /* N 32-bit words worth of info */
505 + #define NBUGINTS 1 /* N 32-bit bug flags */
506 +
507 + /*
508 +@@ -203,14 +203,14 @@
509 + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
510 + #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
511 + #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
512 +-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
513 +-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
514 ++#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
515 ++#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
516 + #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
517 +-#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
518 +-#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
519 +
520 + #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
521 +-#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
522 ++#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
523 ++
524 ++#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
525 +
526 + /* Virtualization flags: Linux defined, word 8 */
527 + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
528 +@@ -271,6 +271,9 @@
529 + #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
530 + #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
531 + #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
532 ++#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
533 ++#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
534 ++#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
535 +
536 + /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
537 + #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
538 +@@ -319,6 +322,13 @@
539 + #define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
540 + #define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
541 +
542 ++/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
543 ++#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
544 ++#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
545 ++#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
546 ++#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
547 ++#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
548 ++
549 + /*
550 + * BUG word(s)
551 + */
552 +diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
553 +index b027633e7300..33833d1909af 100644
554 +--- a/arch/x86/include/asm/disabled-features.h
555 ++++ b/arch/x86/include/asm/disabled-features.h
556 +@@ -77,6 +77,7 @@
557 + #define DISABLED_MASK15 0
558 + #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP)
559 + #define DISABLED_MASK17 0
560 +-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
561 ++#define DISABLED_MASK18 0
562 ++#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
563 +
564 + #endif /* _ASM_X86_DISABLED_FEATURES_H */
565 +diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
566 +index 64c4a30e0d39..e203169931c7 100644
567 +--- a/arch/x86/include/asm/fixmap.h
568 ++++ b/arch/x86/include/asm/fixmap.h
569 +@@ -137,8 +137,10 @@ enum fixed_addresses {
570 +
571 + extern void reserve_top_address(unsigned long reserve);
572 +
573 +-#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
574 +-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
575 ++#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
576 ++#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
577 ++#define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
578 ++#define FIXADDR_TOT_START (FIXADDR_TOP - FIXADDR_TOT_SIZE)
579 +
580 + extern int fixmaps_set;
581 +
582 +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
583 +index e7b983a35506..e520a1e6fc11 100644
584 +--- a/arch/x86/include/asm/msr-index.h
585 ++++ b/arch/x86/include/asm/msr-index.h
586 +@@ -39,6 +39,13 @@
587 +
588 + /* Intel MSRs. Some also available on other CPUs */
589 +
590 ++#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
591 ++#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
592 ++#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
593 ++
594 ++#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
595 ++#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
596 ++
597 + #define MSR_PPIN_CTL 0x0000004e
598 + #define MSR_PPIN 0x0000004f
599 +
600 +@@ -57,6 +64,11 @@
601 + #define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
602 +
603 + #define MSR_MTRRcap 0x000000fe
604 ++
605 ++#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
606 ++#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
607 ++#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
608 ++
609 + #define MSR_IA32_BBL_CR_CTL 0x00000119
610 + #define MSR_IA32_BBL_CR_CTL3 0x0000011e
611 +
612 +diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
613 +index 07962f5f6fba..30df295f6d94 100644
614 +--- a/arch/x86/include/asm/msr.h
615 ++++ b/arch/x86/include/asm/msr.h
616 +@@ -214,8 +214,7 @@ static __always_inline unsigned long long rdtsc_ordered(void)
617 + * that some other imaginary CPU is updating continuously with a
618 + * time stamp.
619 + */
620 +- alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
621 +- "lfence", X86_FEATURE_LFENCE_RDTSC);
622 ++ barrier_nospec();
623 + return rdtsc();
624 + }
625 +
626 +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
627 +index 4ad41087ce0e..4d57894635f2 100644
628 +--- a/arch/x86/include/asm/nospec-branch.h
629 ++++ b/arch/x86/include/asm/nospec-branch.h
630 +@@ -1,56 +1,12 @@
631 + /* SPDX-License-Identifier: GPL-2.0 */
632 +
633 +-#ifndef __NOSPEC_BRANCH_H__
634 +-#define __NOSPEC_BRANCH_H__
635 ++#ifndef _ASM_X86_NOSPEC_BRANCH_H_
636 ++#define _ASM_X86_NOSPEC_BRANCH_H_
637 +
638 + #include <asm/alternative.h>
639 + #include <asm/alternative-asm.h>
640 + #include <asm/cpufeatures.h>
641 +
642 +-/*
643 +- * Fill the CPU return stack buffer.
644 +- *
645 +- * Each entry in the RSB, if used for a speculative 'ret', contains an
646 +- * infinite 'pause; lfence; jmp' loop to capture speculative execution.
647 +- *
648 +- * This is required in various cases for retpoline and IBRS-based
649 +- * mitigations for the Spectre variant 2 vulnerability. Sometimes to
650 +- * eliminate potentially bogus entries from the RSB, and sometimes
651 +- * purely to ensure that it doesn't get empty, which on some CPUs would
652 +- * allow predictions from other (unwanted!) sources to be used.
653 +- *
654 +- * We define a CPP macro such that it can be used from both .S files and
655 +- * inline assembly. It's possible to do a .macro and then include that
656 +- * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
657 +- */
658 +-
659 +-#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
660 +-#define RSB_FILL_LOOPS 16 /* To avoid underflow */
661 +-
662 +-/*
663 +- * Google experimented with loop-unrolling and this turned out to be
664 +- * the optimal version — two calls, each with their own speculation
665 +- * trap should their return address end up getting used, in a loop.
666 +- */
667 +-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
668 +- mov $(nr/2), reg; \
669 +-771: \
670 +- call 772f; \
671 +-773: /* speculation trap */ \
672 +- pause; \
673 +- lfence; \
674 +- jmp 773b; \
675 +-772: \
676 +- call 774f; \
677 +-775: /* speculation trap */ \
678 +- pause; \
679 +- lfence; \
680 +- jmp 775b; \
681 +-774: \
682 +- dec reg; \
683 +- jnz 771b; \
684 +- add $(BITS_PER_LONG/8) * nr, sp;
685 +-
686 + #ifdef __ASSEMBLY__
687 +
688 + /*
689 +@@ -121,17 +77,10 @@
690 + #endif
691 + .endm
692 +
693 +- /*
694 +- * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
695 +- * monstrosity above, manually.
696 +- */
697 +-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
698 ++/* This clobbers the BX register */
699 ++.macro FILL_RETURN_BUFFER nr:req ftr:req
700 + #ifdef CONFIG_RETPOLINE
701 +- ANNOTATE_NOSPEC_ALTERNATIVE
702 +- ALTERNATIVE "jmp .Lskip_rsb_\@", \
703 +- __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
704 +- \ftr
705 +-.Lskip_rsb_\@:
706 ++ ALTERNATIVE "", "call __clear_rsb", \ftr
707 + #endif
708 + .endm
709 +
710 +@@ -201,22 +150,25 @@ extern char __indirect_thunk_end[];
711 + * On VMEXIT we must ensure that no RSB predictions learned in the guest
712 + * can be followed in the host, by overwriting the RSB completely. Both
713 + * retpoline and IBRS mitigations for Spectre v2 need this; only on future
714 +- * CPUs with IBRS_ATT *might* it be avoided.
715 ++ * CPUs with IBRS_ALL *might* it be avoided.
716 + */
717 + static inline void vmexit_fill_RSB(void)
718 + {
719 + #ifdef CONFIG_RETPOLINE
720 +- unsigned long loops;
721 +-
722 +- asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
723 +- ALTERNATIVE("jmp 910f",
724 +- __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
725 +- X86_FEATURE_RETPOLINE)
726 +- "910:"
727 +- : "=r" (loops), ASM_CALL_CONSTRAINT
728 +- : : "memory" );
729 ++ alternative_input("",
730 ++ "call __fill_rsb",
731 ++ X86_FEATURE_RETPOLINE,
732 ++ ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
733 + #endif
734 + }
735 +
736 ++static inline void indirect_branch_prediction_barrier(void)
737 ++{
738 ++ alternative_input("",
739 ++ "call __ibp_barrier",
740 ++ X86_FEATURE_USE_IBPB,
741 ++ ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory"));
742 ++}
743 ++
744 + #endif /* __ASSEMBLY__ */
745 +-#endif /* __NOSPEC_BRANCH_H__ */
746 ++#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
747 +diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
748 +index ce245b0cdfca..0777e18a1d23 100644
749 +--- a/arch/x86/include/asm/pgtable_32_types.h
750 ++++ b/arch/x86/include/asm/pgtable_32_types.h
751 +@@ -44,8 +44,9 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
752 + */
753 + #define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
754 +
755 +-#define CPU_ENTRY_AREA_BASE \
756 +- ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
757 ++#define CPU_ENTRY_AREA_BASE \
758 ++ ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \
759 ++ & PMD_MASK)
760 +
761 + #define PKMAP_BASE \
762 + ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
763 +diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
764 +index d3a67fba200a..513f9604c192 100644
765 +--- a/arch/x86/include/asm/processor.h
766 ++++ b/arch/x86/include/asm/processor.h
767 +@@ -460,8 +460,6 @@ struct thread_struct {
768 + unsigned short gsindex;
769 + #endif
770 +
771 +- u32 status; /* thread synchronous flags */
772 +-
773 + #ifdef CONFIG_X86_64
774 + unsigned long fsbase;
775 + unsigned long gsbase;
776 +@@ -971,4 +969,7 @@ bool xen_set_default_idle(void);
777 +
778 + void stop_this_cpu(void *dummy);
779 + void df_debug(struct pt_regs *regs, long error_code);
780 ++
781 ++void __ibp_barrier(void);
782 ++
783 + #endif /* _ASM_X86_PROCESSOR_H */
784 +diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
785 +index d91ba04dd007..fb3a6de7440b 100644
786 +--- a/arch/x86/include/asm/required-features.h
787 ++++ b/arch/x86/include/asm/required-features.h
788 +@@ -106,6 +106,7 @@
789 + #define REQUIRED_MASK15 0
790 + #define REQUIRED_MASK16 (NEED_LA57)
791 + #define REQUIRED_MASK17 0
792 +-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
793 ++#define REQUIRED_MASK18 0
794 ++#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
795 +
796 + #endif /* _ASM_X86_REQUIRED_FEATURES_H */
797 +diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
798 +index e3c95e8e61c5..03eedc21246d 100644
799 +--- a/arch/x86/include/asm/syscall.h
800 ++++ b/arch/x86/include/asm/syscall.h
801 +@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
802 + * TS_COMPAT is set for 32-bit syscall entries and then
803 + * remains set until we return to user mode.
804 + */
805 +- if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
806 ++ if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED))
807 + /*
808 + * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
809 + * and will match correctly in comparisons.
810 +@@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
811 + unsigned long *args)
812 + {
813 + # ifdef CONFIG_IA32_EMULATION
814 +- if (task->thread.status & TS_COMPAT)
815 ++ if (task->thread_info.status & TS_COMPAT)
816 + switch (i) {
817 + case 0:
818 + if (!n--) break;
819 +@@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
820 + const unsigned long *args)
821 + {
822 + # ifdef CONFIG_IA32_EMULATION
823 +- if (task->thread.status & TS_COMPAT)
824 ++ if (task->thread_info.status & TS_COMPAT)
825 + switch (i) {
826 + case 0:
827 + if (!n--) break;
828 +diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
829 +index 00223333821a..eda3b6823ca4 100644
830 +--- a/arch/x86/include/asm/thread_info.h
831 ++++ b/arch/x86/include/asm/thread_info.h
832 +@@ -55,6 +55,7 @@ struct task_struct;
833 +
834 + struct thread_info {
835 + unsigned long flags; /* low level flags */
836 ++ u32 status; /* thread synchronous flags */
837 + };
838 +
839 + #define INIT_THREAD_INFO(tsk) \
840 +@@ -221,7 +222,7 @@ static inline int arch_within_stack_frames(const void * const stack,
841 + #define in_ia32_syscall() true
842 + #else
843 + #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
844 +- current->thread.status & TS_COMPAT)
845 ++ current_thread_info()->status & TS_COMPAT)
846 + #endif
847 +
848 + /*
849 +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
850 +index d33e4a26dc7e..2b8f18ca5874 100644
851 +--- a/arch/x86/include/asm/tlbflush.h
852 ++++ b/arch/x86/include/asm/tlbflush.h
853 +@@ -174,6 +174,8 @@ struct tlb_state {
854 + struct mm_struct *loaded_mm;
855 + u16 loaded_mm_asid;
856 + u16 next_asid;
857 ++ /* last user mm's ctx id */
858 ++ u64 last_ctx_id;
859 +
860 + /*
861 + * We can be in one of several states:
862 +diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
863 +index 574dff4d2913..aae77eb8491c 100644
864 +--- a/arch/x86/include/asm/uaccess.h
865 ++++ b/arch/x86/include/asm/uaccess.h
866 +@@ -124,6 +124,11 @@ extern int __get_user_bad(void);
867 +
868 + #define __uaccess_begin() stac()
869 + #define __uaccess_end() clac()
870 ++#define __uaccess_begin_nospec() \
871 ++({ \
872 ++ stac(); \
873 ++ barrier_nospec(); \
874 ++})
875 +
876 + /*
877 + * This is a type: either unsigned long, if the argument fits into
878 +@@ -445,7 +450,7 @@ do { \
879 + ({ \
880 + int __gu_err; \
881 + __inttype(*(ptr)) __gu_val; \
882 +- __uaccess_begin(); \
883 ++ __uaccess_begin_nospec(); \
884 + __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
885 + __uaccess_end(); \
886 + (x) = (__force __typeof__(*(ptr)))__gu_val; \
887 +@@ -487,6 +492,10 @@ struct __large_struct { unsigned long buf[100]; };
888 + __uaccess_begin(); \
889 + barrier();
890 +
891 ++#define uaccess_try_nospec do { \
892 ++ current->thread.uaccess_err = 0; \
893 ++ __uaccess_begin_nospec(); \
894 ++
895 + #define uaccess_catch(err) \
896 + __uaccess_end(); \
897 + (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \
898 +@@ -548,7 +557,7 @@ struct __large_struct { unsigned long buf[100]; };
899 + * get_user_ex(...);
900 + * } get_user_catch(err)
901 + */
902 +-#define get_user_try uaccess_try
903 ++#define get_user_try uaccess_try_nospec
904 + #define get_user_catch(err) uaccess_catch(err)
905 +
906 + #define get_user_ex(x, ptr) do { \
907 +@@ -582,7 +591,7 @@ extern void __cmpxchg_wrong_size(void)
908 + __typeof__(ptr) __uval = (uval); \
909 + __typeof__(*(ptr)) __old = (old); \
910 + __typeof__(*(ptr)) __new = (new); \
911 +- __uaccess_begin(); \
912 ++ __uaccess_begin_nospec(); \
913 + switch (size) { \
914 + case 1: \
915 + { \
916 +diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
917 +index 72950401b223..ba2dc1930630 100644
918 +--- a/arch/x86/include/asm/uaccess_32.h
919 ++++ b/arch/x86/include/asm/uaccess_32.h
920 +@@ -29,21 +29,21 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n)
921 + switch (n) {
922 + case 1:
923 + ret = 0;
924 +- __uaccess_begin();
925 ++ __uaccess_begin_nospec();
926 + __get_user_asm_nozero(*(u8 *)to, from, ret,
927 + "b", "b", "=q", 1);
928 + __uaccess_end();
929 + return ret;
930 + case 2:
931 + ret = 0;
932 +- __uaccess_begin();
933 ++ __uaccess_begin_nospec();
934 + __get_user_asm_nozero(*(u16 *)to, from, ret,
935 + "w", "w", "=r", 2);
936 + __uaccess_end();
937 + return ret;
938 + case 4:
939 + ret = 0;
940 +- __uaccess_begin();
941 ++ __uaccess_begin_nospec();
942 + __get_user_asm_nozero(*(u32 *)to, from, ret,
943 + "l", "k", "=r", 4);
944 + __uaccess_end();
945 +diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
946 +index f07ef3c575db..62546b3a398e 100644
947 +--- a/arch/x86/include/asm/uaccess_64.h
948 ++++ b/arch/x86/include/asm/uaccess_64.h
949 +@@ -55,31 +55,31 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
950 + return copy_user_generic(dst, (__force void *)src, size);
951 + switch (size) {
952 + case 1:
953 +- __uaccess_begin();
954 ++ __uaccess_begin_nospec();
955 + __get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src,
956 + ret, "b", "b", "=q", 1);
957 + __uaccess_end();
958 + return ret;
959 + case 2:
960 +- __uaccess_begin();
961 ++ __uaccess_begin_nospec();
962 + __get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src,
963 + ret, "w", "w", "=r", 2);
964 + __uaccess_end();
965 + return ret;
966 + case 4:
967 +- __uaccess_begin();
968 ++ __uaccess_begin_nospec();
969 + __get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src,
970 + ret, "l", "k", "=r", 4);
971 + __uaccess_end();
972 + return ret;
973 + case 8:
974 +- __uaccess_begin();
975 ++ __uaccess_begin_nospec();
976 + __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
977 + ret, "q", "", "=r", 8);
978 + __uaccess_end();
979 + return ret;
980 + case 10:
981 +- __uaccess_begin();
982 ++ __uaccess_begin_nospec();
983 + __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
984 + ret, "q", "", "=r", 10);
985 + if (likely(!ret))
986 +@@ -89,7 +89,7 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
987 + __uaccess_end();
988 + return ret;
989 + case 16:
990 +- __uaccess_begin();
991 ++ __uaccess_begin_nospec();
992 + __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
993 + ret, "q", "", "=r", 16);
994 + if (likely(!ret))
995 +diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
996 +index 4817d743c263..a481763a3776 100644
997 +--- a/arch/x86/kernel/alternative.c
998 ++++ b/arch/x86/kernel/alternative.c
999 +@@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str)
1000 + }
1001 + __setup("noreplace-smp", setup_noreplace_smp);
1002 +
1003 +-#ifdef CONFIG_PARAVIRT
1004 +-static int __initdata_or_module noreplace_paravirt = 0;
1005 +-
1006 +-static int __init setup_noreplace_paravirt(char *str)
1007 +-{
1008 +- noreplace_paravirt = 1;
1009 +- return 1;
1010 +-}
1011 +-__setup("noreplace-paravirt", setup_noreplace_paravirt);
1012 +-#endif
1013 +-
1014 + #define DPRINTK(fmt, args...) \
1015 + do { \
1016 + if (debug_alternative) \
1017 +@@ -298,7 +287,7 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
1018 + tgt_rip = next_rip + o_dspl;
1019 + n_dspl = tgt_rip - orig_insn;
1020 +
1021 +- DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl);
1022 ++ DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl);
1023 +
1024 + if (tgt_rip - orig_insn >= 0) {
1025 + if (n_dspl - 2 <= 127)
1026 +@@ -355,7 +344,7 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins
1027 + add_nops(instr + (a->instrlen - a->padlen), a->padlen);
1028 + local_irq_restore(flags);
1029 +
1030 +- DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
1031 ++ DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
1032 + instr, a->instrlen - a->padlen, a->padlen);
1033 + }
1034 +
1035 +@@ -376,7 +365,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
1036 + u8 *instr, *replacement;
1037 + u8 insnbuf[MAX_PATCH_LEN];
1038 +
1039 +- DPRINTK("alt table %p -> %p", start, end);
1040 ++ DPRINTK("alt table %px, -> %px", start, end);
1041 + /*
1042 + * The scan order should be from start to end. A later scanned
1043 + * alternative code can overwrite previously scanned alternative code.
1044 +@@ -400,14 +389,14 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
1045 + continue;
1046 + }
1047 +
1048 +- DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
1049 ++ DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d",
1050 + a->cpuid >> 5,
1051 + a->cpuid & 0x1f,
1052 + instr, a->instrlen,
1053 + replacement, a->replacementlen, a->padlen);
1054 +
1055 +- DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
1056 +- DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
1057 ++ DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
1058 ++ DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
1059 +
1060 + memcpy(insnbuf, replacement, a->replacementlen);
1061 + insnbuf_sz = a->replacementlen;
1062 +@@ -433,7 +422,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
1063 + a->instrlen - a->replacementlen);
1064 + insnbuf_sz += a->instrlen - a->replacementlen;
1065 + }
1066 +- DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr);
1067 ++ DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr);
1068 +
1069 + text_poke_early(instr, insnbuf, insnbuf_sz);
1070 + }
1071 +@@ -599,9 +588,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
1072 + struct paravirt_patch_site *p;
1073 + char insnbuf[MAX_PATCH_LEN];
1074 +
1075 +- if (noreplace_paravirt)
1076 +- return;
1077 +-
1078 + for (p = start; p < end; p++) {
1079 + unsigned int used;
1080 +
1081 +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
1082 +index 390b3dc3d438..71949bf2de5a 100644
1083 +--- a/arch/x86/kernel/cpu/bugs.c
1084 ++++ b/arch/x86/kernel/cpu/bugs.c
1085 +@@ -11,6 +11,7 @@
1086 + #include <linux/init.h>
1087 + #include <linux/utsname.h>
1088 + #include <linux/cpu.h>
1089 ++#include <linux/module.h>
1090 +
1091 + #include <asm/nospec-branch.h>
1092 + #include <asm/cmdline.h>
1093 +@@ -90,20 +91,41 @@ static const char *spectre_v2_strings[] = {
1094 + };
1095 +
1096 + #undef pr_fmt
1097 +-#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
1098 ++#define pr_fmt(fmt) "Spectre V2 : " fmt
1099 +
1100 + static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
1101 +
1102 ++#ifdef RETPOLINE
1103 ++static bool spectre_v2_bad_module;
1104 ++
1105 ++bool retpoline_module_ok(bool has_retpoline)
1106 ++{
1107 ++ if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
1108 ++ return true;
1109 ++
1110 ++ pr_err("System may be vulnerable to spectre v2\n");
1111 ++ spectre_v2_bad_module = true;
1112 ++ return false;
1113 ++}
1114 ++
1115 ++static inline const char *spectre_v2_module_string(void)
1116 ++{
1117 ++ return spectre_v2_bad_module ? " - vulnerable module loaded" : "";
1118 ++}
1119 ++#else
1120 ++static inline const char *spectre_v2_module_string(void) { return ""; }
1121 ++#endif
1122 ++
1123 + static void __init spec2_print_if_insecure(const char *reason)
1124 + {
1125 + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1126 +- pr_info("%s\n", reason);
1127 ++ pr_info("%s selected on command line.\n", reason);
1128 + }
1129 +
1130 + static void __init spec2_print_if_secure(const char *reason)
1131 + {
1132 + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1133 +- pr_info("%s\n", reason);
1134 ++ pr_info("%s selected on command line.\n", reason);
1135 + }
1136 +
1137 + static inline bool retp_compiler(void)
1138 +@@ -118,42 +140,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt)
1139 + return len == arglen && !strncmp(arg, opt, len);
1140 + }
1141 +
1142 ++static const struct {
1143 ++ const char *option;
1144 ++ enum spectre_v2_mitigation_cmd cmd;
1145 ++ bool secure;
1146 ++} mitigation_options[] = {
1147 ++ { "off", SPECTRE_V2_CMD_NONE, false },
1148 ++ { "on", SPECTRE_V2_CMD_FORCE, true },
1149 ++ { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
1150 ++ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
1151 ++ { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
1152 ++ { "auto", SPECTRE_V2_CMD_AUTO, false },
1153 ++};
1154 ++
1155 + static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
1156 + {
1157 + char arg[20];
1158 +- int ret;
1159 +-
1160 +- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
1161 +- sizeof(arg));
1162 +- if (ret > 0) {
1163 +- if (match_option(arg, ret, "off")) {
1164 +- goto disable;
1165 +- } else if (match_option(arg, ret, "on")) {
1166 +- spec2_print_if_secure("force enabled on command line.");
1167 +- return SPECTRE_V2_CMD_FORCE;
1168 +- } else if (match_option(arg, ret, "retpoline")) {
1169 +- spec2_print_if_insecure("retpoline selected on command line.");
1170 +- return SPECTRE_V2_CMD_RETPOLINE;
1171 +- } else if (match_option(arg, ret, "retpoline,amd")) {
1172 +- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
1173 +- pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
1174 +- return SPECTRE_V2_CMD_AUTO;
1175 +- }
1176 +- spec2_print_if_insecure("AMD retpoline selected on command line.");
1177 +- return SPECTRE_V2_CMD_RETPOLINE_AMD;
1178 +- } else if (match_option(arg, ret, "retpoline,generic")) {
1179 +- spec2_print_if_insecure("generic retpoline selected on command line.");
1180 +- return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
1181 +- } else if (match_option(arg, ret, "auto")) {
1182 ++ int ret, i;
1183 ++ enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
1184 ++
1185 ++ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
1186 ++ return SPECTRE_V2_CMD_NONE;
1187 ++ else {
1188 ++ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
1189 ++ sizeof(arg));
1190 ++ if (ret < 0)
1191 ++ return SPECTRE_V2_CMD_AUTO;
1192 ++
1193 ++ for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
1194 ++ if (!match_option(arg, ret, mitigation_options[i].option))
1195 ++ continue;
1196 ++ cmd = mitigation_options[i].cmd;
1197 ++ break;
1198 ++ }
1199 ++
1200 ++ if (i >= ARRAY_SIZE(mitigation_options)) {
1201 ++ pr_err("unknown option (%s). Switching to AUTO select\n",
1202 ++ mitigation_options[i].option);
1203 + return SPECTRE_V2_CMD_AUTO;
1204 + }
1205 + }
1206 +
1207 +- if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
1208 ++ if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
1209 ++ cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
1210 ++ cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
1211 ++ !IS_ENABLED(CONFIG_RETPOLINE)) {
1212 ++ pr_err("%s selected but not compiled in. Switching to AUTO select\n",
1213 ++ mitigation_options[i].option);
1214 + return SPECTRE_V2_CMD_AUTO;
1215 +-disable:
1216 +- spec2_print_if_insecure("disabled on command line.");
1217 +- return SPECTRE_V2_CMD_NONE;
1218 ++ }
1219 ++
1220 ++ if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
1221 ++ boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
1222 ++ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
1223 ++ return SPECTRE_V2_CMD_AUTO;
1224 ++ }
1225 ++
1226 ++ if (mitigation_options[i].secure)
1227 ++ spec2_print_if_secure(mitigation_options[i].option);
1228 ++ else
1229 ++ spec2_print_if_insecure(mitigation_options[i].option);
1230 ++
1231 ++ return cmd;
1232 + }
1233 +
1234 + /* Check for Skylake-like CPUs (for RSB handling) */
1235 +@@ -191,10 +239,10 @@ static void __init spectre_v2_select_mitigation(void)
1236 + return;
1237 +
1238 + case SPECTRE_V2_CMD_FORCE:
1239 +- /* FALLTRHU */
1240 + case SPECTRE_V2_CMD_AUTO:
1241 +- goto retpoline_auto;
1242 +-
1243 ++ if (IS_ENABLED(CONFIG_RETPOLINE))
1244 ++ goto retpoline_auto;
1245 ++ break;
1246 + case SPECTRE_V2_CMD_RETPOLINE_AMD:
1247 + if (IS_ENABLED(CONFIG_RETPOLINE))
1248 + goto retpoline_amd;
1249 +@@ -249,6 +297,12 @@ static void __init spectre_v2_select_mitigation(void)
1250 + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
1251 + pr_info("Filling RSB on context switch\n");
1252 + }
1253 ++
1254 ++ /* Initialize Indirect Branch Prediction Barrier if supported */
1255 ++ if (boot_cpu_has(X86_FEATURE_IBPB)) {
1256 ++ setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
1257 ++ pr_info("Enabling Indirect Branch Prediction Barrier\n");
1258 ++ }
1259 + }
1260 +
1261 + #undef pr_fmt
1262 +@@ -269,7 +323,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev,
1263 + {
1264 + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
1265 + return sprintf(buf, "Not affected\n");
1266 +- return sprintf(buf, "Vulnerable\n");
1267 ++ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
1268 + }
1269 +
1270 + ssize_t cpu_show_spectre_v2(struct device *dev,
1271 +@@ -278,6 +332,14 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
1272 + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1273 + return sprintf(buf, "Not affected\n");
1274 +
1275 +- return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
1276 ++ return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
1277 ++ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
1278 ++ spectre_v2_module_string());
1279 + }
1280 + #endif
1281 ++
1282 ++void __ibp_barrier(void)
1283 ++{
1284 ++ __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
1285 ++}
1286 ++EXPORT_SYMBOL_GPL(__ibp_barrier);
1287 +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
1288 +index ef29ad001991..d63f4b5706e4 100644
1289 +--- a/arch/x86/kernel/cpu/common.c
1290 ++++ b/arch/x86/kernel/cpu/common.c
1291 +@@ -47,6 +47,8 @@
1292 + #include <asm/pat.h>
1293 + #include <asm/microcode.h>
1294 + #include <asm/microcode_intel.h>
1295 ++#include <asm/intel-family.h>
1296 ++#include <asm/cpu_device_id.h>
1297 +
1298 + #ifdef CONFIG_X86_LOCAL_APIC
1299 + #include <asm/uv/uv.h>
1300 +@@ -748,6 +750,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
1301 + }
1302 + }
1303 +
1304 ++static void init_speculation_control(struct cpuinfo_x86 *c)
1305 ++{
1306 ++ /*
1307 ++ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
1308 ++ * and they also have a different bit for STIBP support. Also,
1309 ++ * a hypervisor might have set the individual AMD bits even on
1310 ++ * Intel CPUs, for finer-grained selection of what's available.
1311 ++ *
1312 ++ * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
1313 ++ * features, which are visible in /proc/cpuinfo and used by the
1314 ++ * kernel. So set those accordingly from the Intel bits.
1315 ++ */
1316 ++ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
1317 ++ set_cpu_cap(c, X86_FEATURE_IBRS);
1318 ++ set_cpu_cap(c, X86_FEATURE_IBPB);
1319 ++ }
1320 ++ if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
1321 ++ set_cpu_cap(c, X86_FEATURE_STIBP);
1322 ++}
1323 ++
1324 + void get_cpu_cap(struct cpuinfo_x86 *c)
1325 + {
1326 + u32 eax, ebx, ecx, edx;
1327 +@@ -769,6 +791,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
1328 + cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
1329 + c->x86_capability[CPUID_7_0_EBX] = ebx;
1330 + c->x86_capability[CPUID_7_ECX] = ecx;
1331 ++ c->x86_capability[CPUID_7_EDX] = edx;
1332 + }
1333 +
1334 + /* Extended state features: level 0x0000000d */
1335 +@@ -841,6 +864,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
1336 + c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
1337 +
1338 + init_scattered_cpuid_features(c);
1339 ++ init_speculation_control(c);
1340 +
1341 + /*
1342 + * Clear/Set all flags overridden by options, after probe.
1343 +@@ -876,6 +900,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
1344 + #endif
1345 + }
1346 +
1347 ++static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
1348 ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
1349 ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
1350 ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
1351 ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
1352 ++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
1353 ++ { X86_VENDOR_CENTAUR, 5 },
1354 ++ { X86_VENDOR_INTEL, 5 },
1355 ++ { X86_VENDOR_NSC, 5 },
1356 ++ { X86_VENDOR_ANY, 4 },
1357 ++ {}
1358 ++};
1359 ++
1360 ++static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
1361 ++ { X86_VENDOR_AMD },
1362 ++ {}
1363 ++};
1364 ++
1365 ++static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
1366 ++{
1367 ++ u64 ia32_cap = 0;
1368 ++
1369 ++ if (x86_match_cpu(cpu_no_meltdown))
1370 ++ return false;
1371 ++
1372 ++ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
1373 ++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
1374 ++
1375 ++ /* Rogue Data Cache Load? No! */
1376 ++ if (ia32_cap & ARCH_CAP_RDCL_NO)
1377 ++ return false;
1378 ++
1379 ++ return true;
1380 ++}
1381 ++
1382 + /*
1383 + * Do minimum CPU detection early.
1384 + * Fields really needed: vendor, cpuid_level, family, model, mask,
1385 +@@ -923,11 +982,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
1386 +
1387 + setup_force_cpu_cap(X86_FEATURE_ALWAYS);
1388 +
1389 +- if (c->x86_vendor != X86_VENDOR_AMD)
1390 +- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
1391 +-
1392 +- setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
1393 +- setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
1394 ++ if (!x86_match_cpu(cpu_no_speculation)) {
1395 ++ if (cpu_vulnerable_to_meltdown(c))
1396 ++ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
1397 ++ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
1398 ++ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
1399 ++ }
1400 +
1401 + fpu__init_system(c);
1402 +
1403 +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
1404 +index b1af22073e28..319bf989fad1 100644
1405 +--- a/arch/x86/kernel/cpu/intel.c
1406 ++++ b/arch/x86/kernel/cpu/intel.c
1407 +@@ -102,6 +102,59 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
1408 + ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
1409 + }
1410 +
1411 ++/*
1412 ++ * Early microcode releases for the Spectre v2 mitigation were broken.
1413 ++ * Information taken from;
1414 ++ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
1415 ++ * - https://kb.vmware.com/s/article/52345
1416 ++ * - Microcode revisions observed in the wild
1417 ++ * - Release note from 20180108 microcode release
1418 ++ */
1419 ++struct sku_microcode {
1420 ++ u8 model;
1421 ++ u8 stepping;
1422 ++ u32 microcode;
1423 ++};
1424 ++static const struct sku_microcode spectre_bad_microcodes[] = {
1425 ++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 },
1426 ++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 },
1427 ++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 },
1428 ++ { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 },
1429 ++ { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 },
1430 ++ { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
1431 ++ { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
1432 ++ { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 },
1433 ++ { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 },
1434 ++ { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
1435 ++ { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
1436 ++ { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 },
1437 ++ { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 },
1438 ++ { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 },
1439 ++ { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 },
1440 ++ { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 },
1441 ++ { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 },
1442 ++ { INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
1443 ++ { INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
1444 ++ { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
1445 ++ /* Updated in the 20180108 release; blacklist until we know otherwise */
1446 ++ { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 },
1447 ++ /* Observed in the wild */
1448 ++ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
1449 ++ { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
1450 ++};
1451 ++
1452 ++static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
1453 ++{
1454 ++ int i;
1455 ++
1456 ++ for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
1457 ++ if (c->x86_model == spectre_bad_microcodes[i].model &&
1458 ++ c->x86_mask == spectre_bad_microcodes[i].stepping)
1459 ++ return (c->microcode <= spectre_bad_microcodes[i].microcode);
1460 ++ }
1461 ++ return false;
1462 ++}
1463 ++
1464 + static void early_init_intel(struct cpuinfo_x86 *c)
1465 + {
1466 + u64 misc_enable;
1467 +@@ -122,6 +175,19 @@ static void early_init_intel(struct cpuinfo_x86 *c)
1468 + if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
1469 + c->microcode = intel_get_microcode_revision();
1470 +
1471 ++ /* Now if any of them are set, check the blacklist and clear the lot */
1472 ++ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
1473 ++ cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
1474 ++ cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
1475 ++ cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
1476 ++ pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
1477 ++ setup_clear_cpu_cap(X86_FEATURE_IBRS);
1478 ++ setup_clear_cpu_cap(X86_FEATURE_IBPB);
1479 ++ setup_clear_cpu_cap(X86_FEATURE_STIBP);
1480 ++ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
1481 ++ setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
1482 ++ }
1483 ++
1484 + /*
1485 + * Atom erratum AAE44/AAF40/AAG38/AAH41:
1486 + *
1487 +diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
1488 +index d0e69769abfd..df11f5d604be 100644
1489 +--- a/arch/x86/kernel/cpu/scattered.c
1490 ++++ b/arch/x86/kernel/cpu/scattered.c
1491 +@@ -21,8 +21,6 @@ struct cpuid_bit {
1492 + static const struct cpuid_bit cpuid_bits[] = {
1493 + { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
1494 + { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
1495 +- { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 },
1496 +- { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 },
1497 + { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
1498 + { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
1499 + { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
1500 +diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
1501 +index c75466232016..9eb448c7859d 100644
1502 +--- a/arch/x86/kernel/process_64.c
1503 ++++ b/arch/x86/kernel/process_64.c
1504 +@@ -557,7 +557,7 @@ static void __set_personality_x32(void)
1505 + * Pretend to come from a x32 execve.
1506 + */
1507 + task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
1508 +- current->thread.status &= ~TS_COMPAT;
1509 ++ current_thread_info()->status &= ~TS_COMPAT;
1510 + #endif
1511 + }
1512 +
1513 +@@ -571,7 +571,7 @@ static void __set_personality_ia32(void)
1514 + current->personality |= force_personality32;
1515 + /* Prepare the first "return" to user space */
1516 + task_pt_regs(current)->orig_ax = __NR_ia32_execve;
1517 +- current->thread.status |= TS_COMPAT;
1518 ++ current_thread_info()->status |= TS_COMPAT;
1519 + #endif
1520 + }
1521 +
1522 +diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
1523 +index f37d18124648..ed5c4cdf0a34 100644
1524 +--- a/arch/x86/kernel/ptrace.c
1525 ++++ b/arch/x86/kernel/ptrace.c
1526 +@@ -935,7 +935,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
1527 + */
1528 + regs->orig_ax = value;
1529 + if (syscall_get_nr(child, regs) >= 0)
1530 +- child->thread.status |= TS_I386_REGS_POKED;
1531 ++ child->thread_info.status |= TS_I386_REGS_POKED;
1532 + break;
1533 +
1534 + case offsetof(struct user32, regs.eflags):
1535 +diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
1536 +index b9e00e8f1c9b..4cdc0b27ec82 100644
1537 +--- a/arch/x86/kernel/signal.c
1538 ++++ b/arch/x86/kernel/signal.c
1539 +@@ -787,7 +787,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
1540 + * than the tracee.
1541 + */
1542 + #ifdef CONFIG_IA32_EMULATION
1543 +- if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
1544 ++ if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
1545 + return __NR_ia32_restart_syscall;
1546 + #endif
1547 + #ifdef CONFIG_X86_X32_ABI
1548 +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
1549 +index 0099e10eb045..13f5d4217e4f 100644
1550 +--- a/arch/x86/kvm/cpuid.c
1551 ++++ b/arch/x86/kvm/cpuid.c
1552 +@@ -67,9 +67,7 @@ u64 kvm_supported_xcr0(void)
1553 +
1554 + #define F(x) bit(X86_FEATURE_##x)
1555 +
1556 +-/* These are scattered features in cpufeatures.h. */
1557 +-#define KVM_CPUID_BIT_AVX512_4VNNIW 2
1558 +-#define KVM_CPUID_BIT_AVX512_4FMAPS 3
1559 ++/* For scattered features from cpufeatures.h; we currently expose none */
1560 + #define KF(x) bit(KVM_CPUID_BIT_##x)
1561 +
1562 + int kvm_update_cpuid(struct kvm_vcpu *vcpu)
1563 +@@ -367,6 +365,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1564 + F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
1565 + 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
1566 +
1567 ++ /* cpuid 0x80000008.ebx */
1568 ++ const u32 kvm_cpuid_8000_0008_ebx_x86_features =
1569 ++ F(IBPB) | F(IBRS);
1570 ++
1571 + /* cpuid 0xC0000001.edx */
1572 + const u32 kvm_cpuid_C000_0001_edx_x86_features =
1573 + F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
1574 +@@ -392,7 +394,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1575 +
1576 + /* cpuid 7.0.edx*/
1577 + const u32 kvm_cpuid_7_0_edx_x86_features =
1578 +- KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
1579 ++ F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
1580 ++ F(ARCH_CAPABILITIES);
1581 +
1582 + /* all calls to cpuid_count() should be made on the same cpu */
1583 + get_cpu();
1584 +@@ -477,7 +480,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1585 + if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
1586 + entry->ecx &= ~F(PKU);
1587 + entry->edx &= kvm_cpuid_7_0_edx_x86_features;
1588 +- entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX);
1589 ++ cpuid_mask(&entry->edx, CPUID_7_EDX);
1590 + } else {
1591 + entry->ebx = 0;
1592 + entry->ecx = 0;
1593 +@@ -627,7 +630,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1594 + if (!g_phys_as)
1595 + g_phys_as = phys_as;
1596 + entry->eax = g_phys_as | (virt_as << 8);
1597 +- entry->ebx = entry->edx = 0;
1598 ++ entry->edx = 0;
1599 ++ /* IBRS and IBPB aren't necessarily present in hardware cpuid */
1600 ++ if (boot_cpu_has(X86_FEATURE_IBPB))
1601 ++ entry->ebx |= F(IBPB);
1602 ++ if (boot_cpu_has(X86_FEATURE_IBRS))
1603 ++ entry->ebx |= F(IBRS);
1604 ++ entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
1605 ++ cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
1606 + break;
1607 + }
1608 + case 0x80000019:
1609 +diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
1610 +index c2cea6651279..9a327d5b6d1f 100644
1611 +--- a/arch/x86/kvm/cpuid.h
1612 ++++ b/arch/x86/kvm/cpuid.h
1613 +@@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
1614 + [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX},
1615 + [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
1616 + [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX},
1617 ++ [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
1618 + };
1619 +
1620 + static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature)
1621 +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
1622 +index b514b2b2845a..290ecf711aec 100644
1623 +--- a/arch/x86/kvm/emulate.c
1624 ++++ b/arch/x86/kvm/emulate.c
1625 +@@ -25,6 +25,7 @@
1626 + #include <asm/kvm_emulate.h>
1627 + #include <linux/stringify.h>
1628 + #include <asm/debugreg.h>
1629 ++#include <asm/nospec-branch.h>
1630 +
1631 + #include "x86.h"
1632 + #include "tss.h"
1633 +@@ -1021,8 +1022,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1634 + void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
1635 +
1636 + flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1637 +- asm("push %[flags]; popf; call *%[fastop]"
1638 +- : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
1639 ++ asm("push %[flags]; popf; " CALL_NOSPEC
1640 ++ : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
1641 + return rc;
1642 + }
1643 +
1644 +@@ -5335,9 +5336,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
1645 + if (!(ctxt->d & ByteOp))
1646 + fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
1647 +
1648 +- asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
1649 ++ asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
1650 + : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
1651 +- [fastop]"+S"(fop), ASM_CALL_CONSTRAINT
1652 ++ [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
1653 + : "c"(ctxt->src2.val));
1654 +
1655 + ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
1656 +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
1657 +index f40d0da1f1d3..4e3c79530526 100644
1658 +--- a/arch/x86/kvm/svm.c
1659 ++++ b/arch/x86/kvm/svm.c
1660 +@@ -184,6 +184,8 @@ struct vcpu_svm {
1661 + u64 gs_base;
1662 + } host;
1663 +
1664 ++ u64 spec_ctrl;
1665 ++
1666 + u32 *msrpm;
1667 +
1668 + ulong nmi_iret_rip;
1669 +@@ -249,6 +251,8 @@ static const struct svm_direct_access_msrs {
1670 + { .index = MSR_CSTAR, .always = true },
1671 + { .index = MSR_SYSCALL_MASK, .always = true },
1672 + #endif
1673 ++ { .index = MSR_IA32_SPEC_CTRL, .always = false },
1674 ++ { .index = MSR_IA32_PRED_CMD, .always = false },
1675 + { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
1676 + { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
1677 + { .index = MSR_IA32_LASTINTFROMIP, .always = false },
1678 +@@ -529,6 +533,7 @@ struct svm_cpu_data {
1679 + struct kvm_ldttss_desc *tss_desc;
1680 +
1681 + struct page *save_area;
1682 ++ struct vmcb *current_vmcb;
1683 + };
1684 +
1685 + static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
1686 +@@ -880,6 +885,25 @@ static bool valid_msr_intercept(u32 index)
1687 + return false;
1688 + }
1689 +
1690 ++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
1691 ++{
1692 ++ u8 bit_write;
1693 ++ unsigned long tmp;
1694 ++ u32 offset;
1695 ++ u32 *msrpm;
1696 ++
1697 ++ msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
1698 ++ to_svm(vcpu)->msrpm;
1699 ++
1700 ++ offset = svm_msrpm_offset(msr);
1701 ++ bit_write = 2 * (msr & 0x0f) + 1;
1702 ++ tmp = msrpm[offset];
1703 ++
1704 ++ BUG_ON(offset == MSR_INVALID);
1705 ++
1706 ++ return !!test_bit(bit_write, &tmp);
1707 ++}
1708 ++
1709 + static void set_msr_interception(u32 *msrpm, unsigned msr,
1710 + int read, int write)
1711 + {
1712 +@@ -1582,6 +1606,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
1713 + u32 dummy;
1714 + u32 eax = 1;
1715 +
1716 ++ svm->spec_ctrl = 0;
1717 ++
1718 + if (!init_event) {
1719 + svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
1720 + MSR_IA32_APICBASE_ENABLE;
1721 +@@ -1703,11 +1729,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
1722 + __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
1723 + kvm_vcpu_uninit(vcpu);
1724 + kmem_cache_free(kvm_vcpu_cache, svm);
1725 ++ /*
1726 ++ * The vmcb page can be recycled, causing a false negative in
1727 ++ * svm_vcpu_load(). So do a full IBPB now.
1728 ++ */
1729 ++ indirect_branch_prediction_barrier();
1730 + }
1731 +
1732 + static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1733 + {
1734 + struct vcpu_svm *svm = to_svm(vcpu);
1735 ++ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
1736 + int i;
1737 +
1738 + if (unlikely(cpu != vcpu->cpu)) {
1739 +@@ -1736,6 +1768,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1740 + if (static_cpu_has(X86_FEATURE_RDTSCP))
1741 + wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
1742 +
1743 ++ if (sd->current_vmcb != svm->vmcb) {
1744 ++ sd->current_vmcb = svm->vmcb;
1745 ++ indirect_branch_prediction_barrier();
1746 ++ }
1747 + avic_vcpu_load(vcpu, cpu);
1748 + }
1749 +
1750 +@@ -3593,6 +3629,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1751 + case MSR_VM_CR:
1752 + msr_info->data = svm->nested.vm_cr_msr;
1753 + break;
1754 ++ case MSR_IA32_SPEC_CTRL:
1755 ++ if (!msr_info->host_initiated &&
1756 ++ !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
1757 ++ return 1;
1758 ++
1759 ++ msr_info->data = svm->spec_ctrl;
1760 ++ break;
1761 + case MSR_IA32_UCODE_REV:
1762 + msr_info->data = 0x01000065;
1763 + break;
1764 +@@ -3684,6 +3727,49 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
1765 + case MSR_IA32_TSC:
1766 + kvm_write_tsc(vcpu, msr);
1767 + break;
1768 ++ case MSR_IA32_SPEC_CTRL:
1769 ++ if (!msr->host_initiated &&
1770 ++ !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
1771 ++ return 1;
1772 ++
1773 ++ /* The STIBP bit doesn't fault even if it's not advertised */
1774 ++ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
1775 ++ return 1;
1776 ++
1777 ++ svm->spec_ctrl = data;
1778 ++
1779 ++ if (!data)
1780 ++ break;
1781 ++
1782 ++ /*
1783 ++ * For non-nested:
1784 ++ * When it's written (to non-zero) for the first time, pass
1785 ++ * it through.
1786 ++ *
1787 ++ * For nested:
1788 ++ * The handling of the MSR bitmap for L2 guests is done in
1789 ++ * nested_svm_vmrun_msrpm.
1790 ++ * We update the L1 MSR bit as well since it will end up
1791 ++ * touching the MSR anyway now.
1792 ++ */
1793 ++ set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
1794 ++ break;
1795 ++ case MSR_IA32_PRED_CMD:
1796 ++ if (!msr->host_initiated &&
1797 ++ !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
1798 ++ return 1;
1799 ++
1800 ++ if (data & ~PRED_CMD_IBPB)
1801 ++ return 1;
1802 ++
1803 ++ if (!data)
1804 ++ break;
1805 ++
1806 ++ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
1807 ++ if (is_guest_mode(vcpu))
1808 ++ break;
1809 ++ set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
1810 ++ break;
1811 + case MSR_STAR:
1812 + svm->vmcb->save.star = data;
1813 + break;
1814 +@@ -4936,6 +5022,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
1815 +
1816 + local_irq_enable();
1817 +
1818 ++ /*
1819 ++ * If this vCPU has touched SPEC_CTRL, restore the guest's value if
1820 ++ * it's non-zero. Since vmentry is serialising on affected CPUs, there
1821 ++ * is no need to worry about the conditional branch over the wrmsr
1822 ++ * being speculatively taken.
1823 ++ */
1824 ++ if (svm->spec_ctrl)
1825 ++ wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
1826 ++
1827 + asm volatile (
1828 + "push %%" _ASM_BP "; \n\t"
1829 + "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
1830 +@@ -5028,6 +5123,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
1831 + #endif
1832 + );
1833 +
1834 ++ /*
1835 ++ * We do not use IBRS in the kernel. If this vCPU has used the
1836 ++ * SPEC_CTRL MSR it may have left it on; save the value and
1837 ++ * turn it off. This is much more efficient than blindly adding
1838 ++ * it to the atomic save/restore list. Especially as the former
1839 ++ * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
1840 ++ *
1841 ++ * For non-nested case:
1842 ++ * If the L01 MSR bitmap does not intercept the MSR, then we need to
1843 ++ * save it.
1844 ++ *
1845 ++ * For nested case:
1846 ++ * If the L02 MSR bitmap does not intercept the MSR, then we need to
1847 ++ * save it.
1848 ++ */
1849 ++ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
1850 ++ rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
1851 ++
1852 ++ if (svm->spec_ctrl)
1853 ++ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
1854 ++
1855 + /* Eliminate branch target predictions from guest mode */
1856 + vmexit_fill_RSB();
1857 +
1858 +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
1859 +index c829d89e2e63..bee4c49f6dd0 100644
1860 +--- a/arch/x86/kvm/vmx.c
1861 ++++ b/arch/x86/kvm/vmx.c
1862 +@@ -34,6 +34,7 @@
1863 + #include <linux/tboot.h>
1864 + #include <linux/hrtimer.h>
1865 + #include <linux/frame.h>
1866 ++#include <linux/nospec.h>
1867 + #include "kvm_cache_regs.h"
1868 + #include "x86.h"
1869 +
1870 +@@ -111,6 +112,14 @@ static u64 __read_mostly host_xss;
1871 + static bool __read_mostly enable_pml = 1;
1872 + module_param_named(pml, enable_pml, bool, S_IRUGO);
1873 +
1874 ++#define MSR_TYPE_R 1
1875 ++#define MSR_TYPE_W 2
1876 ++#define MSR_TYPE_RW 3
1877 ++
1878 ++#define MSR_BITMAP_MODE_X2APIC 1
1879 ++#define MSR_BITMAP_MODE_X2APIC_APICV 2
1880 ++#define MSR_BITMAP_MODE_LM 4
1881 ++
1882 + #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
1883 +
1884 + /* Guest_tsc -> host_tsc conversion requires 64-bit division. */
1885 +@@ -185,7 +194,6 @@ module_param(ple_window_max, int, S_IRUGO);
1886 + extern const ulong vmx_return;
1887 +
1888 + #define NR_AUTOLOAD_MSRS 8
1889 +-#define VMCS02_POOL_SIZE 1
1890 +
1891 + struct vmcs {
1892 + u32 revision_id;
1893 +@@ -210,6 +218,7 @@ struct loaded_vmcs {
1894 + int soft_vnmi_blocked;
1895 + ktime_t entry_time;
1896 + s64 vnmi_blocked_time;
1897 ++ unsigned long *msr_bitmap;
1898 + struct list_head loaded_vmcss_on_cpu_link;
1899 + };
1900 +
1901 +@@ -226,7 +235,7 @@ struct shared_msr_entry {
1902 + * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
1903 + * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
1904 + * More than one of these structures may exist, if L1 runs multiple L2 guests.
1905 +- * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
1906 ++ * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
1907 + * underlying hardware which will be used to run L2.
1908 + * This structure is packed to ensure that its layout is identical across
1909 + * machines (necessary for live migration).
1910 +@@ -409,13 +418,6 @@ struct __packed vmcs12 {
1911 + */
1912 + #define VMCS12_SIZE 0x1000
1913 +
1914 +-/* Used to remember the last vmcs02 used for some recently used vmcs12s */
1915 +-struct vmcs02_list {
1916 +- struct list_head list;
1917 +- gpa_t vmptr;
1918 +- struct loaded_vmcs vmcs02;
1919 +-};
1920 +-
1921 + /*
1922 + * The nested_vmx structure is part of vcpu_vmx, and holds information we need
1923 + * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
1924 +@@ -440,15 +442,15 @@ struct nested_vmx {
1925 + */
1926 + bool sync_shadow_vmcs;
1927 +
1928 +- /* vmcs02_list cache of VMCSs recently used to run L2 guests */
1929 +- struct list_head vmcs02_pool;
1930 +- int vmcs02_num;
1931 + bool change_vmcs01_virtual_x2apic_mode;
1932 + /* L2 must run next, and mustn't decide to exit to L1. */
1933 + bool nested_run_pending;
1934 ++
1935 ++ struct loaded_vmcs vmcs02;
1936 ++
1937 + /*
1938 +- * Guest pages referred to in vmcs02 with host-physical pointers, so
1939 +- * we must keep them pinned while L2 runs.
1940 ++ * Guest pages referred to in the vmcs02 with host-physical
1941 ++ * pointers, so we must keep them pinned while L2 runs.
1942 + */
1943 + struct page *apic_access_page;
1944 + struct page *virtual_apic_page;
1945 +@@ -457,8 +459,6 @@ struct nested_vmx {
1946 + bool pi_pending;
1947 + u16 posted_intr_nv;
1948 +
1949 +- unsigned long *msr_bitmap;
1950 +-
1951 + struct hrtimer preemption_timer;
1952 + bool preemption_timer_expired;
1953 +
1954 +@@ -581,6 +581,7 @@ struct vcpu_vmx {
1955 + struct kvm_vcpu vcpu;
1956 + unsigned long host_rsp;
1957 + u8 fail;
1958 ++ u8 msr_bitmap_mode;
1959 + u32 exit_intr_info;
1960 + u32 idt_vectoring_info;
1961 + ulong rflags;
1962 +@@ -592,6 +593,10 @@ struct vcpu_vmx {
1963 + u64 msr_host_kernel_gs_base;
1964 + u64 msr_guest_kernel_gs_base;
1965 + #endif
1966 ++
1967 ++ u64 arch_capabilities;
1968 ++ u64 spec_ctrl;
1969 ++
1970 + u32 vm_entry_controls_shadow;
1971 + u32 vm_exit_controls_shadow;
1972 + u32 secondary_exec_control;
1973 +@@ -898,21 +903,18 @@ static const unsigned short vmcs_field_to_offset_table[] = {
1974 +
1975 + static inline short vmcs_field_to_offset(unsigned long field)
1976 + {
1977 +- BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
1978 ++ const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
1979 ++ unsigned short offset;
1980 +
1981 +- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
1982 ++ BUILD_BUG_ON(size > SHRT_MAX);
1983 ++ if (field >= size)
1984 + return -ENOENT;
1985 +
1986 +- /*
1987 +- * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
1988 +- * generic mechanism.
1989 +- */
1990 +- asm("lfence");
1991 +-
1992 +- if (vmcs_field_to_offset_table[field] == 0)
1993 ++ field = array_index_nospec(field, size);
1994 ++ offset = vmcs_field_to_offset_table[field];
1995 ++ if (offset == 0)
1996 + return -ENOENT;
1997 +-
1998 +- return vmcs_field_to_offset_table[field];
1999 ++ return offset;
2000 + }
2001 +
2002 + static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
2003 +@@ -935,6 +937,9 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
2004 + static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
2005 + static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
2006 + u16 error_code);
2007 ++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
2008 ++static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
2009 ++ u32 msr, int type);
2010 +
2011 + static DEFINE_PER_CPU(struct vmcs *, vmxarea);
2012 + static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
2013 +@@ -954,12 +959,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
2014 + enum {
2015 + VMX_IO_BITMAP_A,
2016 + VMX_IO_BITMAP_B,
2017 +- VMX_MSR_BITMAP_LEGACY,
2018 +- VMX_MSR_BITMAP_LONGMODE,
2019 +- VMX_MSR_BITMAP_LEGACY_X2APIC_APICV,
2020 +- VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV,
2021 +- VMX_MSR_BITMAP_LEGACY_X2APIC,
2022 +- VMX_MSR_BITMAP_LONGMODE_X2APIC,
2023 + VMX_VMREAD_BITMAP,
2024 + VMX_VMWRITE_BITMAP,
2025 + VMX_BITMAP_NR
2026 +@@ -969,12 +968,6 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
2027 +
2028 + #define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A])
2029 + #define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B])
2030 +-#define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY])
2031 +-#define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE])
2032 +-#define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV])
2033 +-#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV])
2034 +-#define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC])
2035 +-#define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC])
2036 + #define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
2037 + #define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
2038 +
2039 +@@ -1918,6 +1911,52 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
2040 + vmcs_write32(EXCEPTION_BITMAP, eb);
2041 + }
2042 +
2043 ++/*
2044 ++ * Check if MSR is intercepted for currently loaded MSR bitmap.
2045 ++ */
2046 ++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
2047 ++{
2048 ++ unsigned long *msr_bitmap;
2049 ++ int f = sizeof(unsigned long);
2050 ++
2051 ++ if (!cpu_has_vmx_msr_bitmap())
2052 ++ return true;
2053 ++
2054 ++ msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
2055 ++
2056 ++ if (msr <= 0x1fff) {
2057 ++ return !!test_bit(msr, msr_bitmap + 0x800 / f);
2058 ++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
2059 ++ msr &= 0x1fff;
2060 ++ return !!test_bit(msr, msr_bitmap + 0xc00 / f);
2061 ++ }
2062 ++
2063 ++ return true;
2064 ++}
2065 ++
2066 ++/*
2067 ++ * Check if MSR is intercepted for L01 MSR bitmap.
2068 ++ */
2069 ++static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
2070 ++{
2071 ++ unsigned long *msr_bitmap;
2072 ++ int f = sizeof(unsigned long);
2073 ++
2074 ++ if (!cpu_has_vmx_msr_bitmap())
2075 ++ return true;
2076 ++
2077 ++ msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
2078 ++
2079 ++ if (msr <= 0x1fff) {
2080 ++ return !!test_bit(msr, msr_bitmap + 0x800 / f);
2081 ++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
2082 ++ msr &= 0x1fff;
2083 ++ return !!test_bit(msr, msr_bitmap + 0xc00 / f);
2084 ++ }
2085 ++
2086 ++ return true;
2087 ++}
2088 ++
2089 + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
2090 + unsigned long entry, unsigned long exit)
2091 + {
2092 +@@ -2296,6 +2335,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2093 + if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
2094 + per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
2095 + vmcs_load(vmx->loaded_vmcs->vmcs);
2096 ++ indirect_branch_prediction_barrier();
2097 + }
2098 +
2099 + if (!already_loaded) {
2100 +@@ -2572,36 +2612,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
2101 + vmx->guest_msrs[from] = tmp;
2102 + }
2103 +
2104 +-static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
2105 +-{
2106 +- unsigned long *msr_bitmap;
2107 +-
2108 +- if (is_guest_mode(vcpu))
2109 +- msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
2110 +- else if (cpu_has_secondary_exec_ctrls() &&
2111 +- (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
2112 +- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
2113 +- if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
2114 +- if (is_long_mode(vcpu))
2115 +- msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv;
2116 +- else
2117 +- msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv;
2118 +- } else {
2119 +- if (is_long_mode(vcpu))
2120 +- msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
2121 +- else
2122 +- msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
2123 +- }
2124 +- } else {
2125 +- if (is_long_mode(vcpu))
2126 +- msr_bitmap = vmx_msr_bitmap_longmode;
2127 +- else
2128 +- msr_bitmap = vmx_msr_bitmap_legacy;
2129 +- }
2130 +-
2131 +- vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
2132 +-}
2133 +-
2134 + /*
2135 + * Set up the vmcs to automatically save and restore system
2136 + * msrs. Don't touch the 64-bit msrs if the guest is in legacy
2137 +@@ -2642,7 +2652,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
2138 + vmx->save_nmsrs = save_nmsrs;
2139 +
2140 + if (cpu_has_vmx_msr_bitmap())
2141 +- vmx_set_msr_bitmap(&vmx->vcpu);
2142 ++ vmx_update_msr_bitmap(&vmx->vcpu);
2143 + }
2144 +
2145 + /*
2146 +@@ -3276,6 +3286,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2147 + case MSR_IA32_TSC:
2148 + msr_info->data = guest_read_tsc(vcpu);
2149 + break;
2150 ++ case MSR_IA32_SPEC_CTRL:
2151 ++ if (!msr_info->host_initiated &&
2152 ++ !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
2153 ++ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
2154 ++ return 1;
2155 ++
2156 ++ msr_info->data = to_vmx(vcpu)->spec_ctrl;
2157 ++ break;
2158 ++ case MSR_IA32_ARCH_CAPABILITIES:
2159 ++ if (!msr_info->host_initiated &&
2160 ++ !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
2161 ++ return 1;
2162 ++ msr_info->data = to_vmx(vcpu)->arch_capabilities;
2163 ++ break;
2164 + case MSR_IA32_SYSENTER_CS:
2165 + msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
2166 + break;
2167 +@@ -3383,6 +3407,70 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2168 + case MSR_IA32_TSC:
2169 + kvm_write_tsc(vcpu, msr_info);
2170 + break;
2171 ++ case MSR_IA32_SPEC_CTRL:
2172 ++ if (!msr_info->host_initiated &&
2173 ++ !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
2174 ++ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
2175 ++ return 1;
2176 ++
2177 ++ /* The STIBP bit doesn't fault even if it's not advertised */
2178 ++ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
2179 ++ return 1;
2180 ++
2181 ++ vmx->spec_ctrl = data;
2182 ++
2183 ++ if (!data)
2184 ++ break;
2185 ++
2186 ++ /*
2187 ++ * For non-nested:
2188 ++ * When it's written (to non-zero) for the first time, pass
2189 ++ * it through.
2190 ++ *
2191 ++ * For nested:
2192 ++ * The handling of the MSR bitmap for L2 guests is done in
2193 ++ * nested_vmx_merge_msr_bitmap. We should not touch the
2194 ++ * vmcs02.msr_bitmap here since it gets completely overwritten
2195 ++ * in the merging. We update the vmcs01 here for L1 as well
2196 ++ * since it will end up touching the MSR anyway now.
2197 ++ */
2198 ++ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
2199 ++ MSR_IA32_SPEC_CTRL,
2200 ++ MSR_TYPE_RW);
2201 ++ break;
2202 ++ case MSR_IA32_PRED_CMD:
2203 ++ if (!msr_info->host_initiated &&
2204 ++ !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) &&
2205 ++ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
2206 ++ return 1;
2207 ++
2208 ++ if (data & ~PRED_CMD_IBPB)
2209 ++ return 1;
2210 ++
2211 ++ if (!data)
2212 ++ break;
2213 ++
2214 ++ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
2215 ++
2216 ++ /*
2217 ++ * For non-nested:
2218 ++ * When it's written (to non-zero) for the first time, pass
2219 ++ * it through.
2220 ++ *
2221 ++ * For nested:
2222 ++ * The handling of the MSR bitmap for L2 guests is done in
2223 ++ * nested_vmx_merge_msr_bitmap. We should not touch the
2224 ++ * vmcs02.msr_bitmap here since it gets completely overwritten
2225 ++ * in the merging.
2226 ++ */
2227 ++ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
2228 ++ MSR_TYPE_W);
2229 ++ break;
2230 ++ case MSR_IA32_ARCH_CAPABILITIES:
2231 ++ if (!msr_info->host_initiated)
2232 ++ return 1;
2233 ++ vmx->arch_capabilities = data;
2234 ++ break;
2235 + case MSR_IA32_CR_PAT:
2236 + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2237 + if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
2238 +@@ -3837,11 +3925,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
2239 + return vmcs;
2240 + }
2241 +
2242 +-static struct vmcs *alloc_vmcs(void)
2243 +-{
2244 +- return alloc_vmcs_cpu(raw_smp_processor_id());
2245 +-}
2246 +-
2247 + static void free_vmcs(struct vmcs *vmcs)
2248 + {
2249 + free_pages((unsigned long)vmcs, vmcs_config.order);
2250 +@@ -3857,9 +3940,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
2251 + loaded_vmcs_clear(loaded_vmcs);
2252 + free_vmcs(loaded_vmcs->vmcs);
2253 + loaded_vmcs->vmcs = NULL;
2254 ++ if (loaded_vmcs->msr_bitmap)
2255 ++ free_page((unsigned long)loaded_vmcs->msr_bitmap);
2256 + WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
2257 + }
2258 +
2259 ++static struct vmcs *alloc_vmcs(void)
2260 ++{
2261 ++ return alloc_vmcs_cpu(raw_smp_processor_id());
2262 ++}
2263 ++
2264 ++static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
2265 ++{
2266 ++ loaded_vmcs->vmcs = alloc_vmcs();
2267 ++ if (!loaded_vmcs->vmcs)
2268 ++ return -ENOMEM;
2269 ++
2270 ++ loaded_vmcs->shadow_vmcs = NULL;
2271 ++ loaded_vmcs_init(loaded_vmcs);
2272 ++
2273 ++ if (cpu_has_vmx_msr_bitmap()) {
2274 ++ loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
2275 ++ if (!loaded_vmcs->msr_bitmap)
2276 ++ goto out_vmcs;
2277 ++ memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
2278 ++ }
2279 ++ return 0;
2280 ++
2281 ++out_vmcs:
2282 ++ free_loaded_vmcs(loaded_vmcs);
2283 ++ return -ENOMEM;
2284 ++}
2285 ++
2286 + static void free_kvm_area(void)
2287 + {
2288 + int cpu;
2289 +@@ -4918,10 +5030,8 @@ static void free_vpid(int vpid)
2290 + spin_unlock(&vmx_vpid_lock);
2291 + }
2292 +
2293 +-#define MSR_TYPE_R 1
2294 +-#define MSR_TYPE_W 2
2295 +-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
2296 +- u32 msr, int type)
2297 ++static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
2298 ++ u32 msr, int type)
2299 + {
2300 + int f = sizeof(unsigned long);
2301 +
2302 +@@ -4955,6 +5065,50 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
2303 + }
2304 + }
2305 +
2306 ++static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
2307 ++ u32 msr, int type)
2308 ++{
2309 ++ int f = sizeof(unsigned long);
2310 ++
2311 ++ if (!cpu_has_vmx_msr_bitmap())
2312 ++ return;
2313 ++
2314 ++ /*
2315 ++ * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
2316 ++ * have the write-low and read-high bitmap offsets the wrong way round.
2317 ++ * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
2318 ++ */
2319 ++ if (msr <= 0x1fff) {
2320 ++ if (type & MSR_TYPE_R)
2321 ++ /* read-low */
2322 ++ __set_bit(msr, msr_bitmap + 0x000 / f);
2323 ++
2324 ++ if (type & MSR_TYPE_W)
2325 ++ /* write-low */
2326 ++ __set_bit(msr, msr_bitmap + 0x800 / f);
2327 ++
2328 ++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
2329 ++ msr &= 0x1fff;
2330 ++ if (type & MSR_TYPE_R)
2331 ++ /* read-high */
2332 ++ __set_bit(msr, msr_bitmap + 0x400 / f);
2333 ++
2334 ++ if (type & MSR_TYPE_W)
2335 ++ /* write-high */
2336 ++ __set_bit(msr, msr_bitmap + 0xc00 / f);
2337 ++
2338 ++ }
2339 ++}
2340 ++
2341 ++static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
2342 ++ u32 msr, int type, bool value)
2343 ++{
2344 ++ if (value)
2345 ++ vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
2346 ++ else
2347 ++ vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
2348 ++}
2349 ++
2350 + /*
2351 + * If a msr is allowed by L0, we should check whether it is allowed by L1.
2352 + * The corresponding bit will be cleared unless both of L0 and L1 allow it.
2353 +@@ -5001,30 +5155,70 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
2354 + }
2355 + }
2356 +
2357 +-static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
2358 ++static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
2359 + {
2360 +- if (!longmode_only)
2361 +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
2362 +- msr, MSR_TYPE_R | MSR_TYPE_W);
2363 +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
2364 +- msr, MSR_TYPE_R | MSR_TYPE_W);
2365 ++ u8 mode = 0;
2366 ++
2367 ++ if (cpu_has_secondary_exec_ctrls() &&
2368 ++ (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
2369 ++ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
2370 ++ mode |= MSR_BITMAP_MODE_X2APIC;
2371 ++ if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
2372 ++ mode |= MSR_BITMAP_MODE_X2APIC_APICV;
2373 ++ }
2374 ++
2375 ++ if (is_long_mode(vcpu))
2376 ++ mode |= MSR_BITMAP_MODE_LM;
2377 ++
2378 ++ return mode;
2379 + }
2380 +
2381 +-static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active)
2382 ++#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
2383 ++
2384 ++static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
2385 ++ u8 mode)
2386 + {
2387 +- if (apicv_active) {
2388 +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv,
2389 +- msr, type);
2390 +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv,
2391 +- msr, type);
2392 +- } else {
2393 +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
2394 +- msr, type);
2395 +- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
2396 +- msr, type);
2397 ++ int msr;
2398 ++
2399 ++ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
2400 ++ unsigned word = msr / BITS_PER_LONG;
2401 ++ msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
2402 ++ msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
2403 ++ }
2404 ++
2405 ++ if (mode & MSR_BITMAP_MODE_X2APIC) {
2406 ++ /*
2407 ++ * TPR reads and writes can be virtualized even if virtual interrupt
2408 ++ * delivery is not in use.
2409 ++ */
2410 ++ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
2411 ++ if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
2412 ++ vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
2413 ++ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
2414 ++ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
2415 ++ }
2416 + }
2417 + }
2418 +
2419 ++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
2420 ++{
2421 ++ struct vcpu_vmx *vmx = to_vmx(vcpu);
2422 ++ unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
2423 ++ u8 mode = vmx_msr_bitmap_mode(vcpu);
2424 ++ u8 changed = mode ^ vmx->msr_bitmap_mode;
2425 ++
2426 ++ if (!changed)
2427 ++ return;
2428 ++
2429 ++ vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
2430 ++ !(mode & MSR_BITMAP_MODE_LM));
2431 ++
2432 ++ if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
2433 ++ vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
2434 ++
2435 ++ vmx->msr_bitmap_mode = mode;
2436 ++}
2437 ++
2438 + static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
2439 + {
2440 + return enable_apicv;
2441 +@@ -5274,7 +5468,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
2442 + }
2443 +
2444 + if (cpu_has_vmx_msr_bitmap())
2445 +- vmx_set_msr_bitmap(vcpu);
2446 ++ vmx_update_msr_bitmap(vcpu);
2447 + }
2448 +
2449 + static u32 vmx_exec_control(struct vcpu_vmx *vmx)
2450 +@@ -5461,7 +5655,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
2451 + vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
2452 + }
2453 + if (cpu_has_vmx_msr_bitmap())
2454 +- vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
2455 ++ vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
2456 +
2457 + vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
2458 +
2459 +@@ -5539,6 +5733,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
2460 + ++vmx->nmsrs;
2461 + }
2462 +
2463 ++ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
2464 ++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
2465 +
2466 + vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
2467 +
2468 +@@ -5567,6 +5763,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
2469 + u64 cr0;
2470 +
2471 + vmx->rmode.vm86_active = 0;
2472 ++ vmx->spec_ctrl = 0;
2473 +
2474 + vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
2475 + kvm_set_cr8(vcpu, 0);
2476 +@@ -6744,7 +6941,7 @@ void vmx_enable_tdp(void)
2477 +
2478 + static __init int hardware_setup(void)
2479 + {
2480 +- int r = -ENOMEM, i, msr;
2481 ++ int r = -ENOMEM, i;
2482 +
2483 + rdmsrl_safe(MSR_EFER, &host_efer);
2484 +
2485 +@@ -6764,9 +6961,6 @@ static __init int hardware_setup(void)
2486 +
2487 + memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
2488 +
2489 +- memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
2490 +- memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
2491 +-
2492 + if (setup_vmcs_config(&vmcs_config) < 0) {
2493 + r = -EIO;
2494 + goto out;
2495 +@@ -6835,42 +7029,8 @@ static __init int hardware_setup(void)
2496 + kvm_tsc_scaling_ratio_frac_bits = 48;
2497 + }
2498 +
2499 +- vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
2500 +- vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
2501 +- vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
2502 +- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
2503 +- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
2504 +- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
2505 +-
2506 +- memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
2507 +- vmx_msr_bitmap_legacy, PAGE_SIZE);
2508 +- memcpy(vmx_msr_bitmap_longmode_x2apic_apicv,
2509 +- vmx_msr_bitmap_longmode, PAGE_SIZE);
2510 +- memcpy(vmx_msr_bitmap_legacy_x2apic,
2511 +- vmx_msr_bitmap_legacy, PAGE_SIZE);
2512 +- memcpy(vmx_msr_bitmap_longmode_x2apic,
2513 +- vmx_msr_bitmap_longmode, PAGE_SIZE);
2514 +-
2515 + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
2516 +
2517 +- for (msr = 0x800; msr <= 0x8ff; msr++) {
2518 +- if (msr == 0x839 /* TMCCT */)
2519 +- continue;
2520 +- vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true);
2521 +- }
2522 +-
2523 +- /*
2524 +- * TPR reads and writes can be virtualized even if virtual interrupt
2525 +- * delivery is not in use.
2526 +- */
2527 +- vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true);
2528 +- vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false);
2529 +-
2530 +- /* EOI */
2531 +- vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true);
2532 +- /* SELF-IPI */
2533 +- vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true);
2534 +-
2535 + if (enable_ept)
2536 + vmx_enable_tdp();
2537 + else
2538 +@@ -6973,94 +7133,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
2539 + return handle_nop(vcpu);
2540 + }
2541 +
2542 +-/*
2543 +- * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
2544 +- * We could reuse a single VMCS for all the L2 guests, but we also want the
2545 +- * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
2546 +- * allows keeping them loaded on the processor, and in the future will allow
2547 +- * optimizations where prepare_vmcs02 doesn't need to set all the fields on
2548 +- * every entry if they never change.
2549 +- * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
2550 +- * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
2551 +- *
2552 +- * The following functions allocate and free a vmcs02 in this pool.
2553 +- */
2554 +-
2555 +-/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
2556 +-static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
2557 +-{
2558 +- struct vmcs02_list *item;
2559 +- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
2560 +- if (item->vmptr == vmx->nested.current_vmptr) {
2561 +- list_move(&item->list, &vmx->nested.vmcs02_pool);
2562 +- return &item->vmcs02;
2563 +- }
2564 +-
2565 +- if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
2566 +- /* Recycle the least recently used VMCS. */
2567 +- item = list_last_entry(&vmx->nested.vmcs02_pool,
2568 +- struct vmcs02_list, list);
2569 +- item->vmptr = vmx->nested.current_vmptr;
2570 +- list_move(&item->list, &vmx->nested.vmcs02_pool);
2571 +- return &item->vmcs02;
2572 +- }
2573 +-
2574 +- /* Create a new VMCS */
2575 +- item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
2576 +- if (!item)
2577 +- return NULL;
2578 +- item->vmcs02.vmcs = alloc_vmcs();
2579 +- item->vmcs02.shadow_vmcs = NULL;
2580 +- if (!item->vmcs02.vmcs) {
2581 +- kfree(item);
2582 +- return NULL;
2583 +- }
2584 +- loaded_vmcs_init(&item->vmcs02);
2585 +- item->vmptr = vmx->nested.current_vmptr;
2586 +- list_add(&(item->list), &(vmx->nested.vmcs02_pool));
2587 +- vmx->nested.vmcs02_num++;
2588 +- return &item->vmcs02;
2589 +-}
2590 +-
2591 +-/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
2592 +-static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
2593 +-{
2594 +- struct vmcs02_list *item;
2595 +- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
2596 +- if (item->vmptr == vmptr) {
2597 +- free_loaded_vmcs(&item->vmcs02);
2598 +- list_del(&item->list);
2599 +- kfree(item);
2600 +- vmx->nested.vmcs02_num--;
2601 +- return;
2602 +- }
2603 +-}
2604 +-
2605 +-/*
2606 +- * Free all VMCSs saved for this vcpu, except the one pointed by
2607 +- * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
2608 +- * must be &vmx->vmcs01.
2609 +- */
2610 +-static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
2611 +-{
2612 +- struct vmcs02_list *item, *n;
2613 +-
2614 +- WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
2615 +- list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
2616 +- /*
2617 +- * Something will leak if the above WARN triggers. Better than
2618 +- * a use-after-free.
2619 +- */
2620 +- if (vmx->loaded_vmcs == &item->vmcs02)
2621 +- continue;
2622 +-
2623 +- free_loaded_vmcs(&item->vmcs02);
2624 +- list_del(&item->list);
2625 +- kfree(item);
2626 +- vmx->nested.vmcs02_num--;
2627 +- }
2628 +-}
2629 +-
2630 + /*
2631 + * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
2632 + * set the success or error code of an emulated VMX instruction, as specified
2633 +@@ -7241,13 +7313,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
2634 + {
2635 + struct vcpu_vmx *vmx = to_vmx(vcpu);
2636 + struct vmcs *shadow_vmcs;
2637 ++ int r;
2638 +
2639 +- if (cpu_has_vmx_msr_bitmap()) {
2640 +- vmx->nested.msr_bitmap =
2641 +- (unsigned long *)__get_free_page(GFP_KERNEL);
2642 +- if (!vmx->nested.msr_bitmap)
2643 +- goto out_msr_bitmap;
2644 +- }
2645 ++ r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
2646 ++ if (r < 0)
2647 ++ goto out_vmcs02;
2648 +
2649 + vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
2650 + if (!vmx->nested.cached_vmcs12)
2651 +@@ -7264,9 +7334,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
2652 + vmx->vmcs01.shadow_vmcs = shadow_vmcs;
2653 + }
2654 +
2655 +- INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
2656 +- vmx->nested.vmcs02_num = 0;
2657 +-
2658 + hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
2659 + HRTIMER_MODE_REL_PINNED);
2660 + vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
2661 +@@ -7278,9 +7345,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
2662 + kfree(vmx->nested.cached_vmcs12);
2663 +
2664 + out_cached_vmcs12:
2665 +- free_page((unsigned long)vmx->nested.msr_bitmap);
2666 ++ free_loaded_vmcs(&vmx->nested.vmcs02);
2667 +
2668 +-out_msr_bitmap:
2669 ++out_vmcs02:
2670 + return -ENOMEM;
2671 + }
2672 +
2673 +@@ -7423,10 +7490,6 @@ static void free_nested(struct vcpu_vmx *vmx)
2674 + free_vpid(vmx->nested.vpid02);
2675 + vmx->nested.posted_intr_nv = -1;
2676 + vmx->nested.current_vmptr = -1ull;
2677 +- if (vmx->nested.msr_bitmap) {
2678 +- free_page((unsigned long)vmx->nested.msr_bitmap);
2679 +- vmx->nested.msr_bitmap = NULL;
2680 +- }
2681 + if (enable_shadow_vmcs) {
2682 + vmx_disable_shadow_vmcs(vmx);
2683 + vmcs_clear(vmx->vmcs01.shadow_vmcs);
2684 +@@ -7434,7 +7497,7 @@ static void free_nested(struct vcpu_vmx *vmx)
2685 + vmx->vmcs01.shadow_vmcs = NULL;
2686 + }
2687 + kfree(vmx->nested.cached_vmcs12);
2688 +- /* Unpin physical memory we referred to in current vmcs02 */
2689 ++ /* Unpin physical memory we referred to in the vmcs02 */
2690 + if (vmx->nested.apic_access_page) {
2691 + kvm_release_page_dirty(vmx->nested.apic_access_page);
2692 + vmx->nested.apic_access_page = NULL;
2693 +@@ -7450,7 +7513,7 @@ static void free_nested(struct vcpu_vmx *vmx)
2694 + vmx->nested.pi_desc = NULL;
2695 + }
2696 +
2697 +- nested_free_all_saved_vmcss(vmx);
2698 ++ free_loaded_vmcs(&vmx->nested.vmcs02);
2699 + }
2700 +
2701 + /* Emulate the VMXOFF instruction */
2702 +@@ -7493,8 +7556,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
2703 + vmptr + offsetof(struct vmcs12, launch_state),
2704 + &zero, sizeof(zero));
2705 +
2706 +- nested_free_vmcs02(vmx, vmptr);
2707 +-
2708 + nested_vmx_succeed(vcpu);
2709 + return kvm_skip_emulated_instruction(vcpu);
2710 + }
2711 +@@ -8406,10 +8467,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
2712 +
2713 + /*
2714 + * The host physical addresses of some pages of guest memory
2715 +- * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
2716 +- * may write to these pages via their host physical address while
2717 +- * L2 is running, bypassing any address-translation-based dirty
2718 +- * tracking (e.g. EPT write protection).
2719 ++ * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
2720 ++ * Page). The CPU may write to these pages via their host
2721 ++ * physical address while L2 is running, bypassing any
2722 ++ * address-translation-based dirty tracking (e.g. EPT write
2723 ++ * protection).
2724 + *
2725 + * Mark them dirty on every exit from L2 to prevent them from
2726 + * getting out of sync with dirty tracking.
2727 +@@ -8943,7 +9005,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
2728 + }
2729 + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
2730 +
2731 +- vmx_set_msr_bitmap(vcpu);
2732 ++ vmx_update_msr_bitmap(vcpu);
2733 + }
2734 +
2735 + static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
2736 +@@ -9129,14 +9191,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
2737 + #endif
2738 + "pushf\n\t"
2739 + __ASM_SIZE(push) " $%c[cs]\n\t"
2740 +- "call *%[entry]\n\t"
2741 ++ CALL_NOSPEC
2742 + :
2743 + #ifdef CONFIG_X86_64
2744 + [sp]"=&r"(tmp),
2745 + #endif
2746 + ASM_CALL_CONSTRAINT
2747 + :
2748 +- [entry]"r"(entry),
2749 ++ THUNK_TARGET(entry),
2750 + [ss]"i"(__KERNEL_DS),
2751 + [cs]"i"(__KERNEL_CS)
2752 + );
2753 +@@ -9373,6 +9435,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
2754 +
2755 + vmx_arm_hv_timer(vcpu);
2756 +
2757 ++ /*
2758 ++ * If this vCPU has touched SPEC_CTRL, restore the guest's value if
2759 ++ * it's non-zero. Since vmentry is serialising on affected CPUs, there
2760 ++ * is no need to worry about the conditional branch over the wrmsr
2761 ++ * being speculatively taken.
2762 ++ */
2763 ++ if (vmx->spec_ctrl)
2764 ++ wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
2765 ++
2766 + vmx->__launched = vmx->loaded_vmcs->launched;
2767 + asm(
2768 + /* Store host registers */
2769 +@@ -9491,6 +9562,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
2770 + #endif
2771 + );
2772 +
2773 ++ /*
2774 ++ * We do not use IBRS in the kernel. If this vCPU has used the
2775 ++ * SPEC_CTRL MSR it may have left it on; save the value and
2776 ++ * turn it off. This is much more efficient than blindly adding
2777 ++ * it to the atomic save/restore list. Especially as the former
2778 ++ * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
2779 ++ *
2780 ++ * For non-nested case:
2781 ++ * If the L01 MSR bitmap does not intercept the MSR, then we need to
2782 ++ * save it.
2783 ++ *
2784 ++ * For nested case:
2785 ++ * If the L02 MSR bitmap does not intercept the MSR, then we need to
2786 ++ * save it.
2787 ++ */
2788 ++ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
2789 ++ rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
2790 ++
2791 ++ if (vmx->spec_ctrl)
2792 ++ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
2793 ++
2794 + /* Eliminate branch target predictions from guest mode */
2795 + vmexit_fill_RSB();
2796 +
2797 +@@ -9604,6 +9696,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
2798 + {
2799 + int err;
2800 + struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2801 ++ unsigned long *msr_bitmap;
2802 + int cpu;
2803 +
2804 + if (!vmx)
2805 +@@ -9636,13 +9729,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
2806 + if (!vmx->guest_msrs)
2807 + goto free_pml;
2808 +
2809 +- vmx->loaded_vmcs = &vmx->vmcs01;
2810 +- vmx->loaded_vmcs->vmcs = alloc_vmcs();
2811 +- vmx->loaded_vmcs->shadow_vmcs = NULL;
2812 +- if (!vmx->loaded_vmcs->vmcs)
2813 ++ err = alloc_loaded_vmcs(&vmx->vmcs01);
2814 ++ if (err < 0)
2815 + goto free_msrs;
2816 +- loaded_vmcs_init(vmx->loaded_vmcs);
2817 +
2818 ++ msr_bitmap = vmx->vmcs01.msr_bitmap;
2819 ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
2820 ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
2821 ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
2822 ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
2823 ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
2824 ++ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
2825 ++ vmx->msr_bitmap_mode = 0;
2826 ++
2827 ++ vmx->loaded_vmcs = &vmx->vmcs01;
2828 + cpu = get_cpu();
2829 + vmx_vcpu_load(&vmx->vcpu, cpu);
2830 + vmx->vcpu.cpu = cpu;
2831 +@@ -10105,10 +10205,25 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
2832 + int msr;
2833 + struct page *page;
2834 + unsigned long *msr_bitmap_l1;
2835 +- unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
2836 ++ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
2837 ++ /*
2838 ++ * pred_cmd & spec_ctrl are trying to verify two things:
2839 ++ *
2840 ++ * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
2841 ++ * ensures that we do not accidentally generate an L02 MSR bitmap
2842 ++ * from the L12 MSR bitmap that is too permissive.
2843 ++ * 2. That L1 or L2s have actually used the MSR. This avoids
2844 ++ * unnecessarily merging of the bitmap if the MSR is unused. This
2845 ++ * works properly because we only update the L01 MSR bitmap lazily.
2846 ++ * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
2847 ++ * updated to reflect this when L1 (or its L2s) actually write to
2848 ++ * the MSR.
2849 ++ */
2850 ++ bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
2851 ++ bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
2852 +
2853 +- /* This shortcut is ok because we support only x2APIC MSRs so far. */
2854 +- if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
2855 ++ if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
2856 ++ !pred_cmd && !spec_ctrl)
2857 + return false;
2858 +
2859 + page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
2860 +@@ -10141,6 +10256,19 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
2861 + MSR_TYPE_W);
2862 + }
2863 + }
2864 ++
2865 ++ if (spec_ctrl)
2866 ++ nested_vmx_disable_intercept_for_msr(
2867 ++ msr_bitmap_l1, msr_bitmap_l0,
2868 ++ MSR_IA32_SPEC_CTRL,
2869 ++ MSR_TYPE_R | MSR_TYPE_W);
2870 ++
2871 ++ if (pred_cmd)
2872 ++ nested_vmx_disable_intercept_for_msr(
2873 ++ msr_bitmap_l1, msr_bitmap_l0,
2874 ++ MSR_IA32_PRED_CMD,
2875 ++ MSR_TYPE_W);
2876 ++
2877 + kunmap(page);
2878 + kvm_release_page_clean(page);
2879 +
2880 +@@ -10682,6 +10810,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
2881 + if (kvm_has_tsc_control)
2882 + decache_tsc_multiplier(vmx);
2883 +
2884 ++ if (cpu_has_vmx_msr_bitmap())
2885 ++ vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
2886 ++
2887 + if (enable_vpid) {
2888 + /*
2889 + * There is no direct mapping between vpid02 and vpid12, the
2890 +@@ -10903,20 +11034,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
2891 + {
2892 + struct vcpu_vmx *vmx = to_vmx(vcpu);
2893 + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2894 +- struct loaded_vmcs *vmcs02;
2895 + u32 msr_entry_idx;
2896 + u32 exit_qual;
2897 +
2898 +- vmcs02 = nested_get_current_vmcs02(vmx);
2899 +- if (!vmcs02)
2900 +- return -ENOMEM;
2901 +-
2902 + enter_guest_mode(vcpu);
2903 +
2904 + if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
2905 + vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
2906 +
2907 +- vmx_switch_vmcs(vcpu, vmcs02);
2908 ++ vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
2909 + vmx_segment_cache_clear(vmx);
2910 +
2911 + if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
2912 +@@ -11485,7 +11611,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
2913 + vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
2914 +
2915 + if (cpu_has_vmx_msr_bitmap())
2916 +- vmx_set_msr_bitmap(vcpu);
2917 ++ vmx_update_msr_bitmap(vcpu);
2918 +
2919 + if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
2920 + vmcs12->vm_exit_msr_load_count))
2921 +@@ -11534,10 +11660,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
2922 + vm_exit_controls_reset_shadow(vmx);
2923 + vmx_segment_cache_clear(vmx);
2924 +
2925 +- /* if no vmcs02 cache requested, remove the one we used */
2926 +- if (VMCS02_POOL_SIZE == 0)
2927 +- nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
2928 +-
2929 + /* Update any VMCS fields that might have changed while L2 ran */
2930 + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
2931 + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
2932 +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
2933 +index c53298dfbf50..ac381437c291 100644
2934 +--- a/arch/x86/kvm/x86.c
2935 ++++ b/arch/x86/kvm/x86.c
2936 +@@ -1009,6 +1009,7 @@ static u32 msrs_to_save[] = {
2937 + #endif
2938 + MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
2939 + MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
2940 ++ MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
2941 + };
2942 +
2943 + static unsigned num_msrs_to_save;
2944 +diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
2945 +index f23934bbaf4e..69a473919260 100644
2946 +--- a/arch/x86/lib/Makefile
2947 ++++ b/arch/x86/lib/Makefile
2948 +@@ -27,6 +27,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
2949 + lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
2950 + lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
2951 + lib-$(CONFIG_RETPOLINE) += retpoline.o
2952 ++OBJECT_FILES_NON_STANDARD_retpoline.o :=y
2953 +
2954 + obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
2955 +
2956 +diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
2957 +index c97d935a29e8..49b167f73215 100644
2958 +--- a/arch/x86/lib/getuser.S
2959 ++++ b/arch/x86/lib/getuser.S
2960 +@@ -40,6 +40,8 @@ ENTRY(__get_user_1)
2961 + mov PER_CPU_VAR(current_task), %_ASM_DX
2962 + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
2963 + jae bad_get_user
2964 ++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
2965 ++ and %_ASM_DX, %_ASM_AX
2966 + ASM_STAC
2967 + 1: movzbl (%_ASM_AX),%edx
2968 + xor %eax,%eax
2969 +@@ -54,6 +56,8 @@ ENTRY(__get_user_2)
2970 + mov PER_CPU_VAR(current_task), %_ASM_DX
2971 + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
2972 + jae bad_get_user
2973 ++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
2974 ++ and %_ASM_DX, %_ASM_AX
2975 + ASM_STAC
2976 + 2: movzwl -1(%_ASM_AX),%edx
2977 + xor %eax,%eax
2978 +@@ -68,6 +72,8 @@ ENTRY(__get_user_4)
2979 + mov PER_CPU_VAR(current_task), %_ASM_DX
2980 + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
2981 + jae bad_get_user
2982 ++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
2983 ++ and %_ASM_DX, %_ASM_AX
2984 + ASM_STAC
2985 + 3: movl -3(%_ASM_AX),%edx
2986 + xor %eax,%eax
2987 +@@ -83,6 +89,8 @@ ENTRY(__get_user_8)
2988 + mov PER_CPU_VAR(current_task), %_ASM_DX
2989 + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
2990 + jae bad_get_user
2991 ++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
2992 ++ and %_ASM_DX, %_ASM_AX
2993 + ASM_STAC
2994 + 4: movq -7(%_ASM_AX),%rdx
2995 + xor %eax,%eax
2996 +@@ -94,6 +102,8 @@ ENTRY(__get_user_8)
2997 + mov PER_CPU_VAR(current_task), %_ASM_DX
2998 + cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
2999 + jae bad_get_user_8
3000 ++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
3001 ++ and %_ASM_DX, %_ASM_AX
3002 + ASM_STAC
3003 + 4: movl -7(%_ASM_AX),%edx
3004 + 5: movl -3(%_ASM_AX),%ecx
3005 +diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
3006 +index c909961e678a..480edc3a5e03 100644
3007 +--- a/arch/x86/lib/retpoline.S
3008 ++++ b/arch/x86/lib/retpoline.S
3009 +@@ -7,6 +7,7 @@
3010 + #include <asm/alternative-asm.h>
3011 + #include <asm/export.h>
3012 + #include <asm/nospec-branch.h>
3013 ++#include <asm/bitsperlong.h>
3014 +
3015 + .macro THUNK reg
3016 + .section .text.__x86.indirect_thunk
3017 +@@ -46,3 +47,58 @@ GENERATE_THUNK(r13)
3018 + GENERATE_THUNK(r14)
3019 + GENERATE_THUNK(r15)
3020 + #endif
3021 ++
3022 ++/*
3023 ++ * Fill the CPU return stack buffer.
3024 ++ *
3025 ++ * Each entry in the RSB, if used for a speculative 'ret', contains an
3026 ++ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
3027 ++ *
3028 ++ * This is required in various cases for retpoline and IBRS-based
3029 ++ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
3030 ++ * eliminate potentially bogus entries from the RSB, and sometimes
3031 ++ * purely to ensure that it doesn't get empty, which on some CPUs would
3032 ++ * allow predictions from other (unwanted!) sources to be used.
3033 ++ *
3034 ++ * Google experimented with loop-unrolling and this turned out to be
3035 ++ * the optimal version - two calls, each with their own speculation
3036 ++ * trap should their return address end up getting used, in a loop.
3037 ++ */
3038 ++.macro STUFF_RSB nr:req sp:req
3039 ++ mov $(\nr / 2), %_ASM_BX
3040 ++ .align 16
3041 ++771:
3042 ++ call 772f
3043 ++773: /* speculation trap */
3044 ++ pause
3045 ++ lfence
3046 ++ jmp 773b
3047 ++ .align 16
3048 ++772:
3049 ++ call 774f
3050 ++775: /* speculation trap */
3051 ++ pause
3052 ++ lfence
3053 ++ jmp 775b
3054 ++ .align 16
3055 ++774:
3056 ++ dec %_ASM_BX
3057 ++ jnz 771b
3058 ++ add $((BITS_PER_LONG/8) * \nr), \sp
3059 ++.endm
3060 ++
3061 ++#define RSB_FILL_LOOPS 16 /* To avoid underflow */
3062 ++
3063 ++ENTRY(__fill_rsb)
3064 ++ STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
3065 ++ ret
3066 ++END(__fill_rsb)
3067 ++EXPORT_SYMBOL_GPL(__fill_rsb)
3068 ++
3069 ++#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
3070 ++
3071 ++ENTRY(__clear_rsb)
3072 ++ STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
3073 ++ ret
3074 ++END(__clear_rsb)
3075 ++EXPORT_SYMBOL_GPL(__clear_rsb)
3076 +diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
3077 +index 1b377f734e64..7add8ba06887 100644
3078 +--- a/arch/x86/lib/usercopy_32.c
3079 ++++ b/arch/x86/lib/usercopy_32.c
3080 +@@ -331,12 +331,12 @@ do { \
3081 +
3082 + unsigned long __copy_user_ll(void *to, const void *from, unsigned long n)
3083 + {
3084 +- stac();
3085 ++ __uaccess_begin_nospec();
3086 + if (movsl_is_ok(to, from, n))
3087 + __copy_user(to, from, n);
3088 + else
3089 + n = __copy_user_intel(to, from, n);
3090 +- clac();
3091 ++ __uaccess_end();
3092 + return n;
3093 + }
3094 + EXPORT_SYMBOL(__copy_user_ll);
3095 +@@ -344,7 +344,7 @@ EXPORT_SYMBOL(__copy_user_ll);
3096 + unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
3097 + unsigned long n)
3098 + {
3099 +- stac();
3100 ++ __uaccess_begin_nospec();
3101 + #ifdef CONFIG_X86_INTEL_USERCOPY
3102 + if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
3103 + n = __copy_user_intel_nocache(to, from, n);
3104 +@@ -353,7 +353,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
3105 + #else
3106 + __copy_user(to, from, n);
3107 + #endif
3108 +- clac();
3109 ++ __uaccess_end();
3110 + return n;
3111 + }
3112 + EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
3113 +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
3114 +index 5bfe61a5e8e3..012d02624848 100644
3115 +--- a/arch/x86/mm/tlb.c
3116 ++++ b/arch/x86/mm/tlb.c
3117 +@@ -6,13 +6,14 @@
3118 + #include <linux/interrupt.h>
3119 + #include <linux/export.h>
3120 + #include <linux/cpu.h>
3121 ++#include <linux/debugfs.h>
3122 +
3123 + #include <asm/tlbflush.h>
3124 + #include <asm/mmu_context.h>
3125 ++#include <asm/nospec-branch.h>
3126 + #include <asm/cache.h>
3127 + #include <asm/apic.h>
3128 + #include <asm/uv/uv.h>
3129 +-#include <linux/debugfs.h>
3130 +
3131 + /*
3132 + * TLB flushing, formerly SMP-only
3133 +@@ -247,6 +248,27 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
3134 + } else {
3135 + u16 new_asid;
3136 + bool need_flush;
3137 ++ u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
3138 ++
3139 ++ /*
3140 ++ * Avoid user/user BTB poisoning by flushing the branch
3141 ++ * predictor when switching between processes. This stops
3142 ++ * one process from doing Spectre-v2 attacks on another.
3143 ++ *
3144 ++ * As an optimization, flush indirect branches only when
3145 ++ * switching into processes that disable dumping. This
3146 ++ * protects high value processes like gpg, without having
3147 ++ * too high performance overhead. IBPB is *expensive*!
3148 ++ *
3149 ++ * This will not flush branches when switching into kernel
3150 ++ * threads. It will also not flush if we switch to idle
3151 ++ * thread and back to the same process. It will flush if we
3152 ++ * switch to a different non-dumpable process.
3153 ++ */
3154 ++ if (tsk && tsk->mm &&
3155 ++ tsk->mm->context.ctx_id != last_ctx_id &&
3156 ++ get_dumpable(tsk->mm) != SUID_DUMP_USER)
3157 ++ indirect_branch_prediction_barrier();
3158 +
3159 + if (IS_ENABLED(CONFIG_VMAP_STACK)) {
3160 + /*
3161 +@@ -292,6 +314,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
3162 + trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
3163 + }
3164 +
3165 ++ /*
3166 ++ * Record last user mm's context id, so we can avoid
3167 ++ * flushing branch buffer with IBPB if we switch back
3168 ++ * to the same user.
3169 ++ */
3170 ++ if (next != &init_mm)
3171 ++ this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
3172 ++
3173 + this_cpu_write(cpu_tlbstate.loaded_mm, next);
3174 + this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
3175 + }
3176 +@@ -369,6 +399,7 @@ void initialize_tlbstate_and_flush(void)
3177 + write_cr3(build_cr3(mm->pgd, 0));
3178 +
3179 + /* Reinitialize tlbstate. */
3180 ++ this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
3181 + this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
3182 + this_cpu_write(cpu_tlbstate.next_asid, 1);
3183 + this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
3184 +diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c
3185 +index db040b378224..9180b9bd5821 100644
3186 +--- a/drivers/auxdisplay/img-ascii-lcd.c
3187 ++++ b/drivers/auxdisplay/img-ascii-lcd.c
3188 +@@ -441,3 +441,7 @@ static struct platform_driver img_ascii_lcd_driver = {
3189 + .remove = img_ascii_lcd_remove,
3190 + };
3191 + module_platform_driver(img_ascii_lcd_driver);
3192 ++
3193 ++MODULE_DESCRIPTION("Imagination Technologies ASCII LCD Display");
3194 ++MODULE_AUTHOR("Paul Burton <paul.burton@××××.com>");
3195 ++MODULE_LICENSE("GPL");
3196 +diff --git a/drivers/fpga/fpga-region.c b/drivers/fpga/fpga-region.c
3197 +index d9ab7c75b14f..e0c73ceba2ed 100644
3198 +--- a/drivers/fpga/fpga-region.c
3199 ++++ b/drivers/fpga/fpga-region.c
3200 +@@ -147,6 +147,7 @@ static struct fpga_manager *fpga_region_get_manager(struct fpga_region *region)
3201 + mgr_node = of_parse_phandle(np, "fpga-mgr", 0);
3202 + if (mgr_node) {
3203 + mgr = of_fpga_mgr_get(mgr_node);
3204 ++ of_node_put(mgr_node);
3205 + of_node_put(np);
3206 + return mgr;
3207 + }
3208 +@@ -192,10 +193,13 @@ static int fpga_region_get_bridges(struct fpga_region *region,
3209 + parent_br = region_np->parent;
3210 +
3211 + /* If overlay has a list of bridges, use it. */
3212 +- if (of_parse_phandle(overlay, "fpga-bridges", 0))
3213 ++ br = of_parse_phandle(overlay, "fpga-bridges", 0);
3214 ++ if (br) {
3215 ++ of_node_put(br);
3216 + np = overlay;
3217 +- else
3218 ++ } else {
3219 + np = region_np;
3220 ++ }
3221 +
3222 + for (i = 0; ; i++) {
3223 + br = of_parse_phandle(np, "fpga-bridges", i);
3224 +@@ -203,12 +207,15 @@ static int fpga_region_get_bridges(struct fpga_region *region,
3225 + break;
3226 +
3227 + /* If parent bridge is in list, skip it. */
3228 +- if (br == parent_br)
3229 ++ if (br == parent_br) {
3230 ++ of_node_put(br);
3231 + continue;
3232 ++ }
3233 +
3234 + /* If node is a bridge, get it and add to list */
3235 + ret = fpga_bridge_get_to_list(br, region->info,
3236 + &region->bridge_list);
3237 ++ of_node_put(br);
3238 +
3239 + /* If any of the bridges are in use, give up */
3240 + if (ret == -EBUSY) {
3241 +diff --git a/drivers/iio/accel/kxsd9-i2c.c b/drivers/iio/accel/kxsd9-i2c.c
3242 +index 98fbb628d5bd..38411e1c155b 100644
3243 +--- a/drivers/iio/accel/kxsd9-i2c.c
3244 ++++ b/drivers/iio/accel/kxsd9-i2c.c
3245 +@@ -63,3 +63,6 @@ static struct i2c_driver kxsd9_i2c_driver = {
3246 + .id_table = kxsd9_i2c_id,
3247 + };
3248 + module_i2c_driver(kxsd9_i2c_driver);
3249 ++
3250 ++MODULE_LICENSE("GPL v2");
3251 ++MODULE_DESCRIPTION("KXSD9 accelerometer I2C interface");
3252 +diff --git a/drivers/iio/adc/qcom-vadc-common.c b/drivers/iio/adc/qcom-vadc-common.c
3253 +index 47d24ae5462f..fe3d7826783c 100644
3254 +--- a/drivers/iio/adc/qcom-vadc-common.c
3255 ++++ b/drivers/iio/adc/qcom-vadc-common.c
3256 +@@ -5,6 +5,7 @@
3257 + #include <linux/math64.h>
3258 + #include <linux/log2.h>
3259 + #include <linux/err.h>
3260 ++#include <linux/module.h>
3261 +
3262 + #include "qcom-vadc-common.h"
3263 +
3264 +@@ -229,3 +230,6 @@ int qcom_vadc_decimation_from_dt(u32 value)
3265 + return __ffs64(value / VADC_DECIMATION_MIN);
3266 + }
3267 + EXPORT_SYMBOL(qcom_vadc_decimation_from_dt);
3268 ++
3269 ++MODULE_LICENSE("GPL v2");
3270 ++MODULE_DESCRIPTION("Qualcomm ADC common functionality");
3271 +diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
3272 +index 866aa3ce1ac9..6cf0006d4c8d 100644
3273 +--- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
3274 ++++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
3275 +@@ -436,3 +436,7 @@ int pxa2xx_pinctrl_exit(struct platform_device *pdev)
3276 + return 0;
3277 + }
3278 + EXPORT_SYMBOL_GPL(pxa2xx_pinctrl_exit);
3279 ++
3280 ++MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@××××.fr>");
3281 ++MODULE_DESCRIPTION("Marvell PXA2xx pinctrl driver");
3282 ++MODULE_LICENSE("GPL v2");
3283 +diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
3284 +index 854995e1cae7..7e7e6eb95b0a 100644
3285 +--- a/drivers/tty/serial/serial_core.c
3286 ++++ b/drivers/tty/serial/serial_core.c
3287 +@@ -974,6 +974,8 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port,
3288 + }
3289 + } else {
3290 + retval = uart_startup(tty, state, 1);
3291 ++ if (retval == 0)
3292 ++ tty_port_set_initialized(port, true);
3293 + if (retval > 0)
3294 + retval = 0;
3295 + }
3296 +diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
3297 +index 1c65817673db..41615f38bcff 100644
3298 +--- a/include/linux/fdtable.h
3299 ++++ b/include/linux/fdtable.h
3300 +@@ -10,6 +10,7 @@
3301 + #include <linux/compiler.h>
3302 + #include <linux/spinlock.h>
3303 + #include <linux/rcupdate.h>
3304 ++#include <linux/nospec.h>
3305 + #include <linux/types.h>
3306 + #include <linux/init.h>
3307 + #include <linux/fs.h>
3308 +@@ -82,8 +83,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i
3309 + {
3310 + struct fdtable *fdt = rcu_dereference_raw(files->fdt);
3311 +
3312 +- if (fd < fdt->max_fds)
3313 ++ if (fd < fdt->max_fds) {
3314 ++ fd = array_index_nospec(fd, fdt->max_fds);
3315 + return rcu_dereference_raw(fdt->fd[fd]);
3316 ++ }
3317 + return NULL;
3318 + }
3319 +
3320 +diff --git a/include/linux/init.h b/include/linux/init.h
3321 +index ea1b31101d9e..506a98151131 100644
3322 +--- a/include/linux/init.h
3323 ++++ b/include/linux/init.h
3324 +@@ -5,6 +5,13 @@
3325 + #include <linux/compiler.h>
3326 + #include <linux/types.h>
3327 +
3328 ++/* Built-in __init functions needn't be compiled with retpoline */
3329 ++#if defined(RETPOLINE) && !defined(MODULE)
3330 ++#define __noretpoline __attribute__((indirect_branch("keep")))
3331 ++#else
3332 ++#define __noretpoline
3333 ++#endif
3334 ++
3335 + /* These macros are used to mark some functions or
3336 + * initialized data (doesn't apply to uninitialized data)
3337 + * as `initialization' functions. The kernel can take this
3338 +@@ -40,7 +47,7 @@
3339 +
3340 + /* These are for everybody (although not all archs will actually
3341 + discard it in modules) */
3342 +-#define __init __section(.init.text) __cold __latent_entropy
3343 ++#define __init __section(.init.text) __cold __latent_entropy __noretpoline
3344 + #define __initdata __section(.init.data)
3345 + #define __initconst __section(.init.rodata)
3346 + #define __exitdata __section(.exit.data)
3347 +diff --git a/include/linux/module.h b/include/linux/module.h
3348 +index c69b49abe877..1d8f245967be 100644
3349 +--- a/include/linux/module.h
3350 ++++ b/include/linux/module.h
3351 +@@ -801,6 +801,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr,
3352 + static inline void module_bug_cleanup(struct module *mod) {}
3353 + #endif /* CONFIG_GENERIC_BUG */
3354 +
3355 ++#ifdef RETPOLINE
3356 ++extern bool retpoline_module_ok(bool has_retpoline);
3357 ++#else
3358 ++static inline bool retpoline_module_ok(bool has_retpoline)
3359 ++{
3360 ++ return true;
3361 ++}
3362 ++#endif
3363 ++
3364 + #ifdef CONFIG_MODULE_SIG
3365 + static inline bool module_sig_ok(struct module *module)
3366 + {
3367 +diff --git a/include/linux/nospec.h b/include/linux/nospec.h
3368 +new file mode 100644
3369 +index 000000000000..b99bced39ac2
3370 +--- /dev/null
3371 ++++ b/include/linux/nospec.h
3372 +@@ -0,0 +1,72 @@
3373 ++// SPDX-License-Identifier: GPL-2.0
3374 ++// Copyright(c) 2018 Linus Torvalds. All rights reserved.
3375 ++// Copyright(c) 2018 Alexei Starovoitov. All rights reserved.
3376 ++// Copyright(c) 2018 Intel Corporation. All rights reserved.
3377 ++
3378 ++#ifndef _LINUX_NOSPEC_H
3379 ++#define _LINUX_NOSPEC_H
3380 ++
3381 ++/**
3382 ++ * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
3383 ++ * @index: array element index
3384 ++ * @size: number of elements in array
3385 ++ *
3386 ++ * When @index is out of bounds (@index >= @size), the sign bit will be
3387 ++ * set. Extend the sign bit to all bits and invert, giving a result of
3388 ++ * zero for an out of bounds index, or ~0 if within bounds [0, @size).
3389 ++ */
3390 ++#ifndef array_index_mask_nospec
3391 ++static inline unsigned long array_index_mask_nospec(unsigned long index,
3392 ++ unsigned long size)
3393 ++{
3394 ++ /*
3395 ++ * Warn developers about inappropriate array_index_nospec() usage.
3396 ++ *
3397 ++ * Even if the CPU speculates past the WARN_ONCE branch, the
3398 ++ * sign bit of @index is taken into account when generating the
3399 ++ * mask.
3400 ++ *
3401 ++ * This warning is compiled out when the compiler can infer that
3402 ++ * @index and @size are less than LONG_MAX.
3403 ++ */
3404 ++ if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX,
3405 ++ "array_index_nospec() limited to range of [0, LONG_MAX]\n"))
3406 ++ return 0;
3407 ++
3408 ++ /*
3409 ++ * Always calculate and emit the mask even if the compiler
3410 ++ * thinks the mask is not needed. The compiler does not take
3411 ++ * into account the value of @index under speculation.
3412 ++ */
3413 ++ OPTIMIZER_HIDE_VAR(index);
3414 ++ return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1);
3415 ++}
3416 ++#endif
3417 ++
3418 ++/*
3419 ++ * array_index_nospec - sanitize an array index after a bounds check
3420 ++ *
3421 ++ * For a code sequence like:
3422 ++ *
3423 ++ * if (index < size) {
3424 ++ * index = array_index_nospec(index, size);
3425 ++ * val = array[index];
3426 ++ * }
3427 ++ *
3428 ++ * ...if the CPU speculates past the bounds check then
3429 ++ * array_index_nospec() will clamp the index within the range of [0,
3430 ++ * size).
3431 ++ */
3432 ++#define array_index_nospec(index, size) \
3433 ++({ \
3434 ++ typeof(index) _i = (index); \
3435 ++ typeof(size) _s = (size); \
3436 ++ unsigned long _mask = array_index_mask_nospec(_i, _s); \
3437 ++ \
3438 ++ BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \
3439 ++ BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \
3440 ++ \
3441 ++ _i &= _mask; \
3442 ++ _i; \
3443 ++})
3444 ++#endif /* _LINUX_NOSPEC_H */
3445 +diff --git a/kernel/module.c b/kernel/module.c
3446 +index dea01ac9cb74..09e48eee4d55 100644
3447 +--- a/kernel/module.c
3448 ++++ b/kernel/module.c
3449 +@@ -2863,6 +2863,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info)
3450 + }
3451 + #endif /* CONFIG_LIVEPATCH */
3452 +
3453 ++static void check_modinfo_retpoline(struct module *mod, struct load_info *info)
3454 ++{
3455 ++ if (retpoline_module_ok(get_modinfo(info, "retpoline")))
3456 ++ return;
3457 ++
3458 ++ pr_warn("%s: loading module not compiled with retpoline compiler.\n",
3459 ++ mod->name);
3460 ++}
3461 ++
3462 + /* Sets info->hdr and info->len. */
3463 + static int copy_module_from_user(const void __user *umod, unsigned long len,
3464 + struct load_info *info)
3465 +@@ -3029,6 +3038,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags)
3466 + add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
3467 + }
3468 +
3469 ++ check_modinfo_retpoline(mod, info);
3470 ++
3471 + if (get_modinfo(info, "staging")) {
3472 + add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
3473 + pr_warn("%s: module is from the staging directory, the quality "
3474 +diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
3475 +index 542a4fc0a8d7..4bbcfc1e2d43 100644
3476 +--- a/net/wireless/nl80211.c
3477 ++++ b/net/wireless/nl80211.c
3478 +@@ -16,6 +16,7 @@
3479 + #include <linux/nl80211.h>
3480 + #include <linux/rtnetlink.h>
3481 + #include <linux/netlink.h>
3482 ++#include <linux/nospec.h>
3483 + #include <linux/etherdevice.h>
3484 + #include <net/net_namespace.h>
3485 + #include <net/genetlink.h>
3486 +@@ -2056,20 +2057,22 @@ static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = {
3487 + static int parse_txq_params(struct nlattr *tb[],
3488 + struct ieee80211_txq_params *txq_params)
3489 + {
3490 ++ u8 ac;
3491 ++
3492 + if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] ||
3493 + !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] ||
3494 + !tb[NL80211_TXQ_ATTR_AIFS])
3495 + return -EINVAL;
3496 +
3497 +- txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
3498 ++ ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
3499 + txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]);
3500 + txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]);
3501 + txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]);
3502 + txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]);
3503 +
3504 +- if (txq_params->ac >= NL80211_NUM_ACS)
3505 ++ if (ac >= NL80211_NUM_ACS)
3506 + return -EINVAL;
3507 +-
3508 ++ txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS);
3509 + return 0;
3510 + }
3511 +
3512 +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
3513 +index f51cf977c65b..6510536c06df 100644
3514 +--- a/scripts/mod/modpost.c
3515 ++++ b/scripts/mod/modpost.c
3516 +@@ -2165,6 +2165,14 @@ static void add_intree_flag(struct buffer *b, int is_intree)
3517 + buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n");
3518 + }
3519 +
3520 ++/* Cannot check for assembler */
3521 ++static void add_retpoline(struct buffer *b)
3522 ++{
3523 ++ buf_printf(b, "\n#ifdef RETPOLINE\n");
3524 ++ buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n");
3525 ++ buf_printf(b, "#endif\n");
3526 ++}
3527 ++
3528 + static void add_staging_flag(struct buffer *b, const char *name)
3529 + {
3530 + static const char *staging_dir = "drivers/staging";
3531 +@@ -2506,6 +2514,7 @@ int main(int argc, char **argv)
3532 + err |= check_modname_len(mod);
3533 + add_header(&buf, mod);
3534 + add_intree_flag(&buf, !external_module);
3535 ++ add_retpoline(&buf);
3536 + add_staging_flag(&buf, mod->name);
3537 + err |= add_versions(&buf, mod);
3538 + add_depends(&buf, mod, modules);
3539 +diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c
3540 +index 25c63510ae15..7cdd2dc4fd79 100644
3541 +--- a/sound/soc/codecs/pcm512x-spi.c
3542 ++++ b/sound/soc/codecs/pcm512x-spi.c
3543 +@@ -70,3 +70,7 @@ static struct spi_driver pcm512x_spi_driver = {
3544 + };
3545 +
3546 + module_spi_driver(pcm512x_spi_driver);
3547 ++
3548 ++MODULE_DESCRIPTION("ASoC PCM512x codec driver - SPI");
3549 ++MODULE_AUTHOR("Mark Brown <broonie@××××××.org>");
3550 ++MODULE_LICENSE("GPL v2");
3551 +diff --git a/tools/objtool/check.c b/tools/objtool/check.c
3552 +index f40d46e24bcc..9cd028aa1509 100644
3553 +--- a/tools/objtool/check.c
3554 ++++ b/tools/objtool/check.c
3555 +@@ -543,18 +543,14 @@ static int add_call_destinations(struct objtool_file *file)
3556 + dest_off = insn->offset + insn->len + insn->immediate;
3557 + insn->call_dest = find_symbol_by_offset(insn->sec,
3558 + dest_off);
3559 +- /*
3560 +- * FIXME: Thanks to retpolines, it's now considered
3561 +- * normal for a function to call within itself. So
3562 +- * disable this warning for now.
3563 +- */
3564 +-#if 0
3565 +- if (!insn->call_dest) {
3566 +- WARN_FUNC("can't find call dest symbol at offset 0x%lx",
3567 +- insn->sec, insn->offset, dest_off);
3568 ++
3569 ++ if (!insn->call_dest && !insn->ignore) {
3570 ++ WARN_FUNC("unsupported intra-function call",
3571 ++ insn->sec, insn->offset);
3572 ++ WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
3573 + return -1;
3574 + }
3575 +-#endif
3576 ++
3577 + } else if (rela->sym->type == STT_SECTION) {
3578 + insn->call_dest = find_symbol_by_offset(rela->sym->sec,
3579 + rela->addend+4);
3580 +@@ -598,7 +594,7 @@ static int handle_group_alt(struct objtool_file *file,
3581 + struct instruction *orig_insn,
3582 + struct instruction **new_insn)
3583 + {
3584 +- struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
3585 ++ struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
3586 + unsigned long dest_off;
3587 +
3588 + last_orig_insn = NULL;
3589 +@@ -614,28 +610,30 @@ static int handle_group_alt(struct objtool_file *file,
3590 + last_orig_insn = insn;
3591 + }
3592 +
3593 +- if (!next_insn_same_sec(file, last_orig_insn)) {
3594 +- WARN("%s: don't know how to handle alternatives at end of section",
3595 +- special_alt->orig_sec->name);
3596 +- return -1;
3597 +- }
3598 +-
3599 +- fake_jump = malloc(sizeof(*fake_jump));
3600 +- if (!fake_jump) {
3601 +- WARN("malloc failed");
3602 +- return -1;
3603 ++ if (next_insn_same_sec(file, last_orig_insn)) {
3604 ++ fake_jump = malloc(sizeof(*fake_jump));
3605 ++ if (!fake_jump) {
3606 ++ WARN("malloc failed");
3607 ++ return -1;
3608 ++ }
3609 ++ memset(fake_jump, 0, sizeof(*fake_jump));
3610 ++ INIT_LIST_HEAD(&fake_jump->alts);
3611 ++ clear_insn_state(&fake_jump->state);
3612 ++
3613 ++ fake_jump->sec = special_alt->new_sec;
3614 ++ fake_jump->offset = -1;
3615 ++ fake_jump->type = INSN_JUMP_UNCONDITIONAL;
3616 ++ fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
3617 ++ fake_jump->ignore = true;
3618 + }
3619 +- memset(fake_jump, 0, sizeof(*fake_jump));
3620 +- INIT_LIST_HEAD(&fake_jump->alts);
3621 +- clear_insn_state(&fake_jump->state);
3622 +-
3623 +- fake_jump->sec = special_alt->new_sec;
3624 +- fake_jump->offset = -1;
3625 +- fake_jump->type = INSN_JUMP_UNCONDITIONAL;
3626 +- fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
3627 +- fake_jump->ignore = true;
3628 +
3629 + if (!special_alt->new_len) {
3630 ++ if (!fake_jump) {
3631 ++ WARN("%s: empty alternative at end of section",
3632 ++ special_alt->orig_sec->name);
3633 ++ return -1;
3634 ++ }
3635 ++
3636 + *new_insn = fake_jump;
3637 + return 0;
3638 + }
3639 +@@ -648,6 +646,8 @@ static int handle_group_alt(struct objtool_file *file,
3640 +
3641 + last_new_insn = insn;
3642 +
3643 ++ insn->ignore = orig_insn->ignore_alts;
3644 ++
3645 + if (insn->type != INSN_JUMP_CONDITIONAL &&
3646 + insn->type != INSN_JUMP_UNCONDITIONAL)
3647 + continue;
3648 +@@ -656,8 +656,14 @@ static int handle_group_alt(struct objtool_file *file,
3649 + continue;
3650 +
3651 + dest_off = insn->offset + insn->len + insn->immediate;
3652 +- if (dest_off == special_alt->new_off + special_alt->new_len)
3653 ++ if (dest_off == special_alt->new_off + special_alt->new_len) {
3654 ++ if (!fake_jump) {
3655 ++ WARN("%s: alternative jump to end of section",
3656 ++ special_alt->orig_sec->name);
3657 ++ return -1;
3658 ++ }
3659 + insn->jump_dest = fake_jump;
3660 ++ }
3661 +
3662 + if (!insn->jump_dest) {
3663 + WARN_FUNC("can't find alternative jump destination",
3664 +@@ -672,7 +678,8 @@ static int handle_group_alt(struct objtool_file *file,
3665 + return -1;
3666 + }
3667 +
3668 +- list_add(&fake_jump->list, &last_new_insn->list);
3669 ++ if (fake_jump)
3670 ++ list_add(&fake_jump->list, &last_new_insn->list);
3671 +
3672 + return 0;
3673 + }
3674 +@@ -729,10 +736,6 @@ static int add_special_section_alts(struct objtool_file *file)
3675 + goto out;
3676 + }
3677 +
3678 +- /* Ignore retpoline alternatives. */
3679 +- if (orig_insn->ignore_alts)
3680 +- continue;
3681 +-
3682 + new_insn = NULL;
3683 + if (!special_alt->group || special_alt->new_len) {
3684 + new_insn = find_insn(file, special_alt->new_sec,
3685 +@@ -1089,11 +1092,11 @@ static int decode_sections(struct objtool_file *file)
3686 + if (ret)
3687 + return ret;
3688 +
3689 +- ret = add_call_destinations(file);
3690 ++ ret = add_special_section_alts(file);
3691 + if (ret)
3692 + return ret;
3693 +
3694 +- ret = add_special_section_alts(file);
3695 ++ ret = add_call_destinations(file);
3696 + if (ret)
3697 + return ret;
3698 +
3699 +@@ -1720,10 +1723,12 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
3700 +
3701 + insn->visited = true;
3702 +
3703 +- list_for_each_entry(alt, &insn->alts, list) {
3704 +- ret = validate_branch(file, alt->insn, state);
3705 +- if (ret)
3706 +- return 1;
3707 ++ if (!insn->ignore_alts) {
3708 ++ list_for_each_entry(alt, &insn->alts, list) {
3709 ++ ret = validate_branch(file, alt->insn, state);
3710 ++ if (ret)
3711 ++ return 1;
3712 ++ }
3713 + }
3714 +
3715 + switch (insn->type) {
3716 +diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
3717 +index e61fe703197b..18384d9be4e1 100644
3718 +--- a/tools/objtool/orc_gen.c
3719 ++++ b/tools/objtool/orc_gen.c
3720 +@@ -98,6 +98,11 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec,
3721 + struct orc_entry *orc;
3722 + struct rela *rela;
3723 +
3724 ++ if (!insn_sec->sym) {
3725 ++ WARN("missing symbol for section %s", insn_sec->name);
3726 ++ return -1;
3727 ++ }
3728 ++
3729 + /* populate ORC data */
3730 + orc = (struct orc_entry *)u_sec->data->d_buf + idx;
3731 + memcpy(orc, o, sizeof(*orc));