Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:4.14 commit in: /
Date: Tue, 01 Nov 2022 19:49:37
Message-Id: 1667332161.b9af451e876d599b098610eed41fe167a13038a7.mpagano@gentoo
1 commit: b9af451e876d599b098610eed41fe167a13038a7
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Tue Nov 1 19:49:21 2022 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Tue Nov 1 19:49:21 2022 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=b9af451e
7
8 Linux patch 4.14.297
9
10 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>
11
12 0000_README | 4 +
13 1296_linux-4.14.297.patch | 2048 +++++++++++++++++++++++++++++++++++++++++++++
14 2 files changed, 2052 insertions(+)
15
16 diff --git a/0000_README b/0000_README
17 index 1d73a888..e3db412f 100644
18 --- a/0000_README
19 +++ b/0000_README
20 @@ -1231,6 +1231,10 @@ Patch: 1295_linux-4.14.296.patch
21 From: https://www.kernel.org
22 Desc: Linux 4.14.296
23
24 +Patch: 1296_linux-4.14.297.patch
25 +From: https://www.kernel.org
26 +Desc: Linux 4.14.297
27 +
28 Patch: 1500_XATTR_USER_PREFIX.patch
29 From: https://bugs.gentoo.org/show_bug.cgi?id=470644
30 Desc: Support for namespace user.pax.* on tmpfs.
31
32 diff --git a/1296_linux-4.14.297.patch b/1296_linux-4.14.297.patch
33 new file mode 100644
34 index 00000000..911985c4
35 --- /dev/null
36 +++ b/1296_linux-4.14.297.patch
37 @@ -0,0 +1,2048 @@
38 +diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
39 +index 6bd97cd50d625..7e061ed449aaa 100644
40 +--- a/Documentation/admin-guide/hw-vuln/spectre.rst
41 ++++ b/Documentation/admin-guide/hw-vuln/spectre.rst
42 +@@ -422,6 +422,14 @@ The possible values in this file are:
43 + 'RSB filling' Protection of RSB on context switch enabled
44 + ============= ===========================================
45 +
46 ++ - EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status:
47 ++
48 ++ =========================== =======================================================
49 ++ 'PBRSB-eIBRS: SW sequence' CPU is affected and protection of RSB on VMEXIT enabled
50 ++ 'PBRSB-eIBRS: Vulnerable' CPU is vulnerable
51 ++ 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
52 ++ =========================== =======================================================
53 ++
54 + Full mitigation might require a microcode update from the CPU
55 + vendor. When the necessary microcode is not available, the kernel will
56 + report vulnerability.
57 +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
58 +index 681d429c64269..629d7956ddf16 100644
59 +--- a/Documentation/admin-guide/kernel-parameters.txt
60 ++++ b/Documentation/admin-guide/kernel-parameters.txt
61 +@@ -3965,6 +3965,18 @@
62 +
63 + retain_initrd [RAM] Keep initrd memory after extraction
64 +
65 ++ retbleed= [X86] Control mitigation of RETBleed (Arbitrary
66 ++ Speculative Code Execution with Return Instructions)
67 ++ vulnerability.
68 ++
69 ++ off - unconditionally disable
70 ++ auto - automatically select a migitation
71 ++
72 ++ Selecting 'auto' will choose a mitigation method at run
73 ++ time according to the CPU.
74 ++
75 ++ Not specifying this option is equivalent to retbleed=auto.
76 ++
77 + rfkill.default_state=
78 + 0 "airplane mode". All wifi, bluetooth, wimax, gps, fm,
79 + etc. communication is blocked by default.
80 +@@ -4204,6 +4216,7 @@
81 + eibrs - enhanced IBRS
82 + eibrs,retpoline - enhanced IBRS + Retpolines
83 + eibrs,lfence - enhanced IBRS + LFENCE
84 ++ ibrs - use IBRS to protect kernel
85 +
86 + Not specifying this option is equivalent to
87 + spectre_v2=auto.
88 +diff --git a/Makefile b/Makefile
89 +index b7978fb873d60..a66c65fcb96fb 100644
90 +--- a/Makefile
91 ++++ b/Makefile
92 +@@ -1,7 +1,7 @@
93 + # SPDX-License-Identifier: GPL-2.0
94 + VERSION = 4
95 + PATCHLEVEL = 14
96 +-SUBLEVEL = 296
97 ++SUBLEVEL = 297
98 + EXTRAVERSION =
99 + NAME = Petit Gorille
100 +
101 +diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
102 +index 1dbc62a96b859..ef759951fd0fa 100644
103 +--- a/arch/x86/entry/calling.h
104 ++++ b/arch/x86/entry/calling.h
105 +@@ -6,6 +6,8 @@
106 + #include <asm/percpu.h>
107 + #include <asm/asm-offsets.h>
108 + #include <asm/processor-flags.h>
109 ++#include <asm/msr.h>
110 ++#include <asm/nospec-branch.h>
111 +
112 + /*
113 +
114 +@@ -146,27 +148,19 @@ For 32-bit we have the following conventions - kernel is built with
115 +
116 + .endm
117 +
118 +-.macro POP_REGS pop_rdi=1 skip_r11rcx=0
119 ++.macro POP_REGS pop_rdi=1
120 + popq %r15
121 + popq %r14
122 + popq %r13
123 + popq %r12
124 + popq %rbp
125 + popq %rbx
126 +- .if \skip_r11rcx
127 +- popq %rsi
128 +- .else
129 + popq %r11
130 +- .endif
131 + popq %r10
132 + popq %r9
133 + popq %r8
134 + popq %rax
135 +- .if \skip_r11rcx
136 +- popq %rsi
137 +- .else
138 + popq %rcx
139 +- .endif
140 + popq %rdx
141 + popq %rsi
142 + .if \pop_rdi
143 +@@ -336,6 +330,62 @@ For 32-bit we have the following conventions - kernel is built with
144 +
145 + #endif
146 +
147 ++/*
148 ++ * IBRS kernel mitigation for Spectre_v2.
149 ++ *
150 ++ * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
151 ++ * the regs it uses (AX, CX, DX). Must be called before the first RET
152 ++ * instruction (NOTE! UNTRAIN_RET includes a RET instruction)
153 ++ *
154 ++ * The optional argument is used to save/restore the current value,
155 ++ * which is used on the paranoid paths.
156 ++ *
157 ++ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
158 ++ */
159 ++.macro IBRS_ENTER save_reg
160 ++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
161 ++ movl $MSR_IA32_SPEC_CTRL, %ecx
162 ++
163 ++.ifnb \save_reg
164 ++ rdmsr
165 ++ shl $32, %rdx
166 ++ or %rdx, %rax
167 ++ mov %rax, \save_reg
168 ++ test $SPEC_CTRL_IBRS, %eax
169 ++ jz .Ldo_wrmsr_\@
170 ++ lfence
171 ++ jmp .Lend_\@
172 ++.Ldo_wrmsr_\@:
173 ++.endif
174 ++
175 ++ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
176 ++ movl %edx, %eax
177 ++ shr $32, %rdx
178 ++ wrmsr
179 ++.Lend_\@:
180 ++.endm
181 ++
182 ++/*
183 ++ * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
184 ++ * regs. Must be called after the last RET.
185 ++ */
186 ++.macro IBRS_EXIT save_reg
187 ++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
188 ++ movl $MSR_IA32_SPEC_CTRL, %ecx
189 ++
190 ++.ifnb \save_reg
191 ++ mov \save_reg, %rdx
192 ++.else
193 ++ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
194 ++ andl $(~SPEC_CTRL_IBRS), %edx
195 ++.endif
196 ++
197 ++ movl %edx, %eax
198 ++ shr $32, %rdx
199 ++ wrmsr
200 ++.Lend_\@:
201 ++.endm
202 ++
203 + /*
204 + * Mitigate Spectre v1 for conditional swapgs code paths.
205 + *
206 +diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
207 +index c19974a493784..dbcea4281c309 100644
208 +--- a/arch/x86/entry/entry_32.S
209 ++++ b/arch/x86/entry/entry_32.S
210 +@@ -245,7 +245,6 @@ ENTRY(__switch_to_asm)
211 + movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
212 + #endif
213 +
214 +-#ifdef CONFIG_RETPOLINE
215 + /*
216 + * When switching from a shallower to a deeper call stack
217 + * the RSB may either underflow or use entries populated
218 +@@ -254,7 +253,6 @@ ENTRY(__switch_to_asm)
219 + * speculative execution to prevent attack.
220 + */
221 + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
222 +-#endif
223 +
224 + /* restore callee-saved registers */
225 + popfl
226 +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
227 +index ac389ffb1822b..637a23d404e95 100644
228 +--- a/arch/x86/entry/entry_64.S
229 ++++ b/arch/x86/entry/entry_64.S
230 +@@ -230,6 +230,10 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
231 +
232 + /* IRQs are off. */
233 + movq %rsp, %rdi
234 ++
235 ++ /* clobbers %rax, make sure it is after saving the syscall nr */
236 ++ IBRS_ENTER
237 ++
238 + call do_syscall_64 /* returns with IRQs disabled */
239 +
240 + TRACE_IRQS_IRETQ /* we're about to change IF */
241 +@@ -301,8 +305,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
242 + * perf profiles. Nothing jumps here.
243 + */
244 + syscall_return_via_sysret:
245 +- /* rcx and r11 are already restored (see code above) */
246 +- POP_REGS pop_rdi=0 skip_r11rcx=1
247 ++ IBRS_EXIT
248 ++ POP_REGS pop_rdi=0
249 +
250 + /*
251 + * Now all regs are restored except RSP and RDI.
252 +@@ -353,7 +357,6 @@ ENTRY(__switch_to_asm)
253 + movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
254 + #endif
255 +
256 +-#ifdef CONFIG_RETPOLINE
257 + /*
258 + * When switching from a shallower to a deeper call stack
259 + * the RSB may either underflow or use entries populated
260 +@@ -362,7 +365,6 @@ ENTRY(__switch_to_asm)
261 + * speculative execution to prevent attack.
262 + */
263 + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
264 +-#endif
265 +
266 + /* restore callee-saved registers */
267 + popfq
268 +@@ -591,6 +593,7 @@ GLOBAL(retint_user)
269 + TRACE_IRQS_IRETQ
270 +
271 + GLOBAL(swapgs_restore_regs_and_return_to_usermode)
272 ++ IBRS_EXIT
273 + #ifdef CONFIG_DEBUG_ENTRY
274 + /* Assert that pt_regs indicates user mode. */
275 + testb $3, CS(%rsp)
276 +@@ -1134,6 +1137,9 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1
277 + * Save all registers in pt_regs, and switch gs if needed.
278 + * Use slow, but surefire "are we in kernel?" check.
279 + * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
280 ++ *
281 ++ * R14 - old CR3
282 ++ * R15 - old SPEC_CTRL
283 + */
284 + ENTRY(paranoid_entry)
285 + UNWIND_HINT_FUNC
286 +@@ -1157,6 +1163,12 @@ ENTRY(paranoid_entry)
287 + */
288 + FENCE_SWAPGS_KERNEL_ENTRY
289 +
290 ++ /*
291 ++ * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like
292 ++ * CR3 above, keep the old value in a callee saved register.
293 ++ */
294 ++ IBRS_ENTER save_reg=%r15
295 ++
296 + ret
297 + END(paranoid_entry)
298 +
299 +@@ -1171,9 +1183,19 @@ END(paranoid_entry)
300 + * to try to handle preemption here.
301 + *
302 + * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
303 ++ *
304 ++ * R14 - old CR3
305 ++ * R15 - old SPEC_CTRL
306 + */
307 + ENTRY(paranoid_exit)
308 + UNWIND_HINT_REGS
309 ++
310 ++ /*
311 ++ * Must restore IBRS state before both CR3 and %GS since we need access
312 ++ * to the per-CPU x86_spec_ctrl_shadow variable.
313 ++ */
314 ++ IBRS_EXIT save_reg=%r15
315 ++
316 + DISABLE_INTERRUPTS(CLBR_ANY)
317 + TRACE_IRQS_OFF_DEBUG
318 + testl %ebx, %ebx /* swapgs needed? */
319 +@@ -1208,8 +1230,10 @@ ENTRY(error_entry)
320 + FENCE_SWAPGS_USER_ENTRY
321 + /* We have user CR3. Change to kernel CR3. */
322 + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
323 ++ IBRS_ENTER
324 +
325 + .Lerror_entry_from_usermode_after_swapgs:
326 ++
327 + /* Put us onto the real thread stack. */
328 + popq %r12 /* save return addr in %12 */
329 + movq %rsp, %rdi /* arg0 = pt_regs pointer */
330 +@@ -1272,6 +1296,7 @@ ENTRY(error_entry)
331 + SWAPGS
332 + FENCE_SWAPGS_USER_ENTRY
333 + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
334 ++ IBRS_ENTER
335 +
336 + /*
337 + * Pretend that the exception came from user mode: set up pt_regs
338 +@@ -1377,6 +1402,8 @@ ENTRY(nmi)
339 + PUSH_AND_CLEAR_REGS rdx=(%rdx)
340 + ENCODE_FRAME_POINTER
341 +
342 ++ IBRS_ENTER
343 ++
344 + /*
345 + * At this point we no longer need to worry about stack damage
346 + * due to nesting -- we're on the normal thread stack and we're
347 +@@ -1600,6 +1627,9 @@ end_repeat_nmi:
348 + movq $-1, %rsi
349 + call do_nmi
350 +
351 ++ /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
352 ++ IBRS_EXIT save_reg=%r15
353 ++
354 + RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
355 +
356 + testl %ebx, %ebx /* swapgs needed? */
357 +diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
358 +index 304e3daf82dd2..8cdc3af68c538 100644
359 +--- a/arch/x86/entry/entry_64_compat.S
360 ++++ b/arch/x86/entry/entry_64_compat.S
361 +@@ -4,7 +4,6 @@
362 + *
363 + * Copyright 2000-2002 Andi Kleen, SuSE Labs.
364 + */
365 +-#include "calling.h"
366 + #include <asm/asm-offsets.h>
367 + #include <asm/current.h>
368 + #include <asm/errno.h>
369 +@@ -17,6 +16,8 @@
370 + #include <linux/linkage.h>
371 + #include <linux/err.h>
372 +
373 ++#include "calling.h"
374 ++
375 + .section .entry.text, "ax"
376 +
377 + /*
378 +@@ -106,6 +107,8 @@ ENTRY(entry_SYSENTER_compat)
379 + xorl %r15d, %r15d /* nospec r15 */
380 + cld
381 +
382 ++ IBRS_ENTER
383 ++
384 + /*
385 + * SYSENTER doesn't filter flags, so we need to clear NT and AC
386 + * ourselves. To save a few cycles, we can check whether
387 +@@ -250,6 +253,8 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
388 + */
389 + TRACE_IRQS_OFF
390 +
391 ++ IBRS_ENTER
392 ++
393 + movq %rsp, %rdi
394 + call do_fast_syscall_32
395 + /* XEN PV guests always use IRET path */
396 +@@ -259,6 +264,9 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
397 + /* Opportunistic SYSRET */
398 + sysret32_from_system_call:
399 + TRACE_IRQS_ON /* User mode traces as IRQs on. */
400 ++
401 ++ IBRS_EXIT
402 ++
403 + movq RBX(%rsp), %rbx /* pt_regs->rbx */
404 + movq RBP(%rsp), %rbp /* pt_regs->rbp */
405 + movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
406 +@@ -385,6 +393,8 @@ ENTRY(entry_INT80_compat)
407 + */
408 + TRACE_IRQS_OFF
409 +
410 ++ IBRS_ENTER
411 ++
412 + movq %rsp, %rdi
413 + call do_int80_syscall_32
414 + .Lsyscall_32_done:
415 +diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
416 +index 8844665929437..e54babe529c7b 100644
417 +--- a/arch/x86/include/asm/cpu_device_id.h
418 ++++ b/arch/x86/include/asm/cpu_device_id.h
419 +@@ -1,13 +1,172 @@
420 + /* SPDX-License-Identifier: GPL-2.0 */
421 +-#ifndef _CPU_DEVICE_ID
422 +-#define _CPU_DEVICE_ID 1
423 ++#ifndef _ASM_X86_CPU_DEVICE_ID
424 ++#define _ASM_X86_CPU_DEVICE_ID
425 +
426 + /*
427 + * Declare drivers belonging to specific x86 CPUs
428 + * Similar in spirit to pci_device_id and related PCI functions
429 ++ *
430 ++ * The wildcard initializers are in mod_devicetable.h because
431 ++ * file2alias needs them. Sigh.
432 + */
433 +-
434 + #include <linux/mod_devicetable.h>
435 ++/* Get the INTEL_FAM* model defines */
436 ++#include <asm/intel-family.h>
437 ++/* And the X86_VENDOR_* ones */
438 ++#include <asm/processor.h>
439 ++
440 ++/* Centaur FAM6 models */
441 ++#define X86_CENTAUR_FAM6_C7_A 0xa
442 ++#define X86_CENTAUR_FAM6_C7_D 0xd
443 ++#define X86_CENTAUR_FAM6_NANO 0xf
444 ++
445 ++/**
446 ++ * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Base macro for CPU matching
447 ++ * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
448 ++ * The name is expanded to X86_VENDOR_@_vendor
449 ++ * @_family: The family number or X86_FAMILY_ANY
450 ++ * @_model: The model number, model constant or X86_MODEL_ANY
451 ++ * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY
452 ++ * @_data: Driver specific data or NULL. The internal storage
453 ++ * format is unsigned long. The supplied value, pointer
454 ++ * etc. is casted to unsigned long internally.
455 ++ *
456 ++ * Use only if you need all selectors. Otherwise use one of the shorter
457 ++ * macros of the X86_MATCH_* family. If there is no matching shorthand
458 ++ * macro, consider to add one. If you really need to wrap one of the macros
459 ++ * into another macro at the usage site for good reasons, then please
460 ++ * start this local macro with X86_MATCH to allow easy grepping.
461 ++ */
462 ++#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(_vendor, _family, _model, \
463 ++ _feature, _data) { \
464 ++ .vendor = X86_VENDOR_##_vendor, \
465 ++ .family = _family, \
466 ++ .model = _model, \
467 ++ .feature = _feature, \
468 ++ .driver_data = (unsigned long) _data \
469 ++}
470 ++
471 ++/**
472 ++ * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature
473 ++ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
474 ++ * The name is expanded to X86_VENDOR_@vendor
475 ++ * @family: The family number or X86_FAMILY_ANY
476 ++ * @feature: A X86_FEATURE bit
477 ++ * @data: Driver specific data or NULL. The internal storage
478 ++ * format is unsigned long. The supplied value, pointer
479 ++ * etc. is casted to unsigned long internally.
480 ++ *
481 ++ * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
482 ++ * set to wildcards.
483 ++ */
484 ++#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \
485 ++ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, \
486 ++ X86_MODEL_ANY, feature, data)
487 ++
488 ++/**
489 ++ * X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature
490 ++ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
491 ++ * The name is expanded to X86_VENDOR_@vendor
492 ++ * @feature: A X86_FEATURE bit
493 ++ * @data: Driver specific data or NULL. The internal storage
494 ++ * format is unsigned long. The supplied value, pointer
495 ++ * etc. is casted to unsigned long internally.
496 ++ *
497 ++ * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
498 ++ * set to wildcards.
499 ++ */
500 ++#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \
501 ++ X86_MATCH_VENDOR_FAM_FEATURE(vendor, X86_FAMILY_ANY, feature, data)
502 ++
503 ++/**
504 ++ * X86_MATCH_FEATURE - Macro for matching a CPU feature
505 ++ * @feature: A X86_FEATURE bit
506 ++ * @data: Driver specific data or NULL. The internal storage
507 ++ * format is unsigned long. The supplied value, pointer
508 ++ * etc. is casted to unsigned long internally.
509 ++ *
510 ++ * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
511 ++ * set to wildcards.
512 ++ */
513 ++#define X86_MATCH_FEATURE(feature, data) \
514 ++ X86_MATCH_VENDOR_FEATURE(ANY, feature, data)
515 ++
516 ++/* Transitional to keep the existing code working */
517 ++#define X86_FEATURE_MATCH(feature) X86_MATCH_FEATURE(feature, NULL)
518 ++
519 ++/**
520 ++ * X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model
521 ++ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
522 ++ * The name is expanded to X86_VENDOR_@vendor
523 ++ * @family: The family number or X86_FAMILY_ANY
524 ++ * @model: The model number, model constant or X86_MODEL_ANY
525 ++ * @data: Driver specific data or NULL. The internal storage
526 ++ * format is unsigned long. The supplied value, pointer
527 ++ * etc. is casted to unsigned long internally.
528 ++ *
529 ++ * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
530 ++ * set to wildcards.
531 ++ */
532 ++#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \
533 ++ X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, \
534 ++ X86_FEATURE_ANY, data)
535 ++
536 ++/**
537 ++ * X86_MATCH_VENDOR_FAM - Match vendor and family
538 ++ * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
539 ++ * The name is expanded to X86_VENDOR_@vendor
540 ++ * @family: The family number or X86_FAMILY_ANY
541 ++ * @data: Driver specific data or NULL. The internal storage
542 ++ * format is unsigned long. The supplied value, pointer
543 ++ * etc. is casted to unsigned long internally.
544 ++ *
545 ++ * All other missing arguments to X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
546 ++ * set of wildcards.
547 ++ */
548 ++#define X86_MATCH_VENDOR_FAM(vendor, family, data) \
549 ++ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data)
550 ++
551 ++/**
552 ++ * X86_MATCH_INTEL_FAM6_MODEL - Match vendor INTEL, family 6 and model
553 ++ * @model: The model name without the INTEL_FAM6_ prefix or ANY
554 ++ * The model name is expanded to INTEL_FAM6_@model internally
555 ++ * @data: Driver specific data or NULL. The internal storage
556 ++ * format is unsigned long. The supplied value, pointer
557 ++ * etc. is casted to unsigned long internally.
558 ++ *
559 ++ * The vendor is set to INTEL, the family to 6 and all other missing
560 ++ * arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are set to wildcards.
561 ++ *
562 ++ * See X86_MATCH_VENDOR_FAM_MODEL_FEATURE() for further information.
563 ++ */
564 ++#define X86_MATCH_INTEL_FAM6_MODEL(model, data) \
565 ++ X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data)
566 ++
567 ++/*
568 ++ * Match specific microcode revisions.
569 ++ *
570 ++ * vendor/family/model/stepping must be all set.
571 ++ *
572 ++ * Only checks against the boot CPU. When mixed-stepping configs are
573 ++ * valid for a CPU model, add a quirk for every valid stepping and
574 ++ * do the fine-tuning in the quirk handler.
575 ++ */
576 ++
577 ++struct x86_cpu_desc {
578 ++ u8 x86_family;
579 ++ u8 x86_vendor;
580 ++ u8 x86_model;
581 ++ u8 x86_stepping;
582 ++ u32 x86_microcode_rev;
583 ++};
584 ++
585 ++#define INTEL_CPU_DESC(model, stepping, revision) { \
586 ++ .x86_family = 6, \
587 ++ .x86_vendor = X86_VENDOR_INTEL, \
588 ++ .x86_model = (model), \
589 ++ .x86_stepping = (stepping), \
590 ++ .x86_microcode_rev = (revision), \
591 ++}
592 +
593 + #define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins)
594 +
595 +@@ -37,5 +196,6 @@
596 + }
597 +
598 + extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
599 ++extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table);
600 +
601 +-#endif
602 ++#endif /* _ASM_X86_CPU_DEVICE_ID */
603 +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
604 +index d56634d6b10cf..840d8981567e7 100644
605 +--- a/arch/x86/include/asm/cpufeatures.h
606 ++++ b/arch/x86/include/asm/cpufeatures.h
607 +@@ -202,8 +202,8 @@
608 + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
609 + #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
610 + #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
611 +-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
612 +-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
613 ++#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
614 ++#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
615 + #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
616 + #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
617 + #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
618 +@@ -283,6 +283,15 @@
619 + #define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
620 + #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
621 + #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
622 ++/* FREE! (11*32+ 6) */
623 ++/* FREE! (11*32+ 7) */
624 ++/* FREE! (11*32+ 8) */
625 ++/* FREE! (11*32+ 9) */
626 ++/* FREE! (11*32+10) */
627 ++#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
628 ++#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
629 ++#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
630 ++#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
631 +
632 + /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
633 + #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
634 +@@ -295,6 +304,7 @@
635 + #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
636 + #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
637 + #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
638 ++#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
639 +
640 + /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
641 + #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
642 +@@ -395,5 +405,7 @@
643 + #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
644 + #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
645 + #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
646 ++#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
647 ++#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
648 +
649 + #endif /* _ASM_X86_CPUFEATURES_H */
650 +diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
651 +index 05d2d7169ab8f..7811d42e78ef7 100644
652 +--- a/arch/x86/include/asm/intel-family.h
653 ++++ b/arch/x86/include/asm/intel-family.h
654 +@@ -16,6 +16,9 @@
655 + * that group keep the CPUID for the variants sorted by model number.
656 + */
657 +
658 ++/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */
659 ++#define INTEL_FAM6_ANY X86_MODEL_ANY
660 ++
661 + #define INTEL_FAM6_CORE_YONAH 0x0E
662 +
663 + #define INTEL_FAM6_CORE2_MEROM 0x0F
664 +@@ -103,4 +106,7 @@
665 + #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
666 + #define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
667 +
668 ++/* Family 5 */
669 ++#define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */
670 ++
671 + #endif /* _ASM_X86_INTEL_FAMILY_H */
672 +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
673 +index c090d8e8fbb3f..92c6054f0a00e 100644
674 +--- a/arch/x86/include/asm/msr-index.h
675 ++++ b/arch/x86/include/asm/msr-index.h
676 +@@ -47,6 +47,8 @@
677 + #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
678 + #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
679 + #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
680 ++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
681 ++#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
682 +
683 + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
684 + #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
685 +@@ -73,6 +75,7 @@
686 + #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
687 + #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
688 + #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
689 ++#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
690 + #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
691 + #define ARCH_CAP_SSB_NO BIT(4) /*
692 + * Not susceptible to Speculative Store Bypass
693 +@@ -120,6 +123,17 @@
694 + * bit available to control VERW
695 + * behavior.
696 + */
697 ++#define ARCH_CAP_RRSBA BIT(19) /*
698 ++ * Indicates RET may use predictors
699 ++ * other than the RSB. With eIBRS
700 ++ * enabled predictions in kernel mode
701 ++ * are restricted to targets in
702 ++ * kernel.
703 ++ */
704 ++#define ARCH_CAP_PBRSB_NO BIT(24) /*
705 ++ * Not susceptible to Post-Barrier
706 ++ * Return Stack Buffer Predictions.
707 ++ */
708 +
709 + #define MSR_IA32_FLUSH_CMD 0x0000010b
710 + #define L1D_FLUSH BIT(0) /*
711 +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
712 +index 8a618fbf569f0..118441f533991 100644
713 +--- a/arch/x86/include/asm/nospec-branch.h
714 ++++ b/arch/x86/include/asm/nospec-branch.h
715 +@@ -4,11 +4,14 @@
716 + #define _ASM_X86_NOSPEC_BRANCH_H_
717 +
718 + #include <linux/static_key.h>
719 ++#include <linux/frame.h>
720 +
721 + #include <asm/alternative.h>
722 + #include <asm/alternative-asm.h>
723 + #include <asm/cpufeatures.h>
724 + #include <asm/msr-index.h>
725 ++#include <asm/unwind_hints.h>
726 ++#include <asm/percpu.h>
727 +
728 + /*
729 + * Fill the CPU return stack buffer.
730 +@@ -50,9 +53,18 @@
731 + lfence; \
732 + jmp 775b; \
733 + 774: \
734 ++ add $(BITS_PER_LONG/8) * 2, sp; \
735 + dec reg; \
736 + jnz 771b; \
737 +- add $(BITS_PER_LONG/8) * nr, sp;
738 ++ /* barrier for jnz misprediction */ \
739 ++ lfence;
740 ++
741 ++#define ISSUE_UNBALANCED_RET_GUARD(sp) \
742 ++ call 992f; \
743 ++ int3; \
744 ++992: \
745 ++ add $(BITS_PER_LONG/8), sp; \
746 ++ lfence;
747 +
748 + #ifdef __ASSEMBLY__
749 +
750 +@@ -141,13 +153,9 @@
751 + * monstrosity above, manually.
752 + */
753 + .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
754 +-#ifdef CONFIG_RETPOLINE
755 +- ANNOTATE_NOSPEC_ALTERNATIVE
756 +- ALTERNATIVE "jmp .Lskip_rsb_\@", \
757 +- __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
758 +- \ftr
759 ++ ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
760 ++ __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
761 + .Lskip_rsb_\@:
762 +-#endif
763 + .endm
764 +
765 + #else /* __ASSEMBLY__ */
766 +@@ -228,6 +236,7 @@ enum spectre_v2_mitigation {
767 + SPECTRE_V2_EIBRS,
768 + SPECTRE_V2_EIBRS_RETPOLINE,
769 + SPECTRE_V2_EIBRS_LFENCE,
770 ++ SPECTRE_V2_IBRS,
771 + };
772 +
773 + /* The indirect branch speculation control variants */
774 +@@ -256,19 +265,19 @@ extern char __indirect_thunk_end[];
775 + * retpoline and IBRS mitigations for Spectre v2 need this; only on future
776 + * CPUs with IBRS_ALL *might* it be avoided.
777 + */
778 +-static inline void vmexit_fill_RSB(void)
779 ++static __always_inline void vmexit_fill_RSB(void)
780 + {
781 +-#ifdef CONFIG_RETPOLINE
782 + unsigned long loops;
783 +
784 + asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
785 +- ALTERNATIVE("jmp 910f",
786 +- __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
787 +- X86_FEATURE_RETPOLINE)
788 ++ ALTERNATIVE_2("jmp 910f", "", X86_FEATURE_RSB_VMEXIT,
789 ++ "jmp 911f", X86_FEATURE_RSB_VMEXIT_LITE)
790 ++ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1))
791 ++ "911:"
792 ++ __stringify(ISSUE_UNBALANCED_RET_GUARD(%1))
793 + "910:"
794 + : "=r" (loops), ASM_CALL_CONSTRAINT
795 + : : "memory" );
796 +-#endif
797 + }
798 +
799 + static __always_inline
800 +@@ -291,6 +300,9 @@ static inline void indirect_branch_prediction_barrier(void)
801 +
802 + /* The Intel SPEC CTRL MSR base value cache */
803 + extern u64 x86_spec_ctrl_base;
804 ++DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
805 ++extern void write_spec_ctrl_current(u64 val, bool force);
806 ++extern u64 spec_ctrl_current(void);
807 +
808 + /*
809 + * With retpoline, we must use IBRS to restrict branch prediction
810 +@@ -300,18 +312,16 @@ extern u64 x86_spec_ctrl_base;
811 + */
812 + #define firmware_restrict_branch_speculation_start() \
813 + do { \
814 +- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
815 +- \
816 + preempt_disable(); \
817 +- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
818 ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
819 ++ spec_ctrl_current() | SPEC_CTRL_IBRS, \
820 + X86_FEATURE_USE_IBRS_FW); \
821 + } while (0)
822 +
823 + #define firmware_restrict_branch_speculation_end() \
824 + do { \
825 +- u64 val = x86_spec_ctrl_base; \
826 +- \
827 +- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
828 ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
829 ++ spec_ctrl_current(), \
830 + X86_FEATURE_USE_IBRS_FW); \
831 + preempt_enable(); \
832 + } while (0)
833 +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
834 +index 3914f9218a6bc..0ccd74d37aad7 100644
835 +--- a/arch/x86/kernel/cpu/amd.c
836 ++++ b/arch/x86/kernel/cpu/amd.c
837 +@@ -857,12 +857,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
838 + {
839 + set_cpu_cap(c, X86_FEATURE_ZEN);
840 +
841 +- /*
842 +- * Fix erratum 1076: CPB feature bit not being set in CPUID.
843 +- * Always set it, except when running under a hypervisor.
844 +- */
845 +- if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB))
846 +- set_cpu_cap(c, X86_FEATURE_CPB);
847 ++ /* Fix up CPUID bits, but only if not virtualised. */
848 ++ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
849 ++
850 ++ /* Erratum 1076: CPB feature bit not being set in CPUID. */
851 ++ if (!cpu_has(c, X86_FEATURE_CPB))
852 ++ set_cpu_cap(c, X86_FEATURE_CPB);
853 ++
854 ++ /*
855 ++ * Zen3 (Fam19 model < 0x10) parts are not susceptible to
856 ++ * Branch Type Confusion, but predate the allocation of the
857 ++ * BTC_NO bit.
858 ++ */
859 ++ if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
860 ++ set_cpu_cap(c, X86_FEATURE_BTC_NO);
861 ++ }
862 + }
863 +
864 + static void init_amd(struct cpuinfo_x86 *c)
865 +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
866 +index 68056ee5dff9f..05dcdb419abdd 100644
867 +--- a/arch/x86/kernel/cpu/bugs.c
868 ++++ b/arch/x86/kernel/cpu/bugs.c
869 +@@ -37,6 +37,8 @@
870 +
871 + static void __init spectre_v1_select_mitigation(void);
872 + static void __init spectre_v2_select_mitigation(void);
873 ++static void __init retbleed_select_mitigation(void);
874 ++static void __init spectre_v2_user_select_mitigation(void);
875 + static void __init ssb_select_mitigation(void);
876 + static void __init l1tf_select_mitigation(void);
877 + static void __init mds_select_mitigation(void);
878 +@@ -46,16 +48,40 @@ static void __init taa_select_mitigation(void);
879 + static void __init mmio_select_mitigation(void);
880 + static void __init srbds_select_mitigation(void);
881 +
882 +-/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
883 ++/* The base value of the SPEC_CTRL MSR without task-specific bits set */
884 + u64 x86_spec_ctrl_base;
885 + EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
886 ++
887 ++/* The current value of the SPEC_CTRL MSR with task-specific bits set */
888 ++DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
889 ++EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
890 ++
891 + static DEFINE_MUTEX(spec_ctrl_mutex);
892 +
893 + /*
894 +- * The vendor and possibly platform specific bits which can be modified in
895 +- * x86_spec_ctrl_base.
896 ++ * Keep track of the SPEC_CTRL MSR value for the current task, which may differ
897 ++ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
898 + */
899 +-static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
900 ++void write_spec_ctrl_current(u64 val, bool force)
901 ++{
902 ++ if (this_cpu_read(x86_spec_ctrl_current) == val)
903 ++ return;
904 ++
905 ++ this_cpu_write(x86_spec_ctrl_current, val);
906 ++
907 ++ /*
908 ++ * When KERNEL_IBRS this MSR is written on return-to-user, unless
909 ++ * forced the update can be delayed until that time.
910 ++ */
911 ++ if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
912 ++ wrmsrl(MSR_IA32_SPEC_CTRL, val);
913 ++}
914 ++
915 ++u64 spec_ctrl_current(void)
916 ++{
917 ++ return this_cpu_read(x86_spec_ctrl_current);
918 ++}
919 ++EXPORT_SYMBOL_GPL(spec_ctrl_current);
920 +
921 + /*
922 + * AMD specific MSR info for Speculative Store Bypass control.
923 +@@ -105,13 +131,21 @@ void __init check_bugs(void)
924 + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
925 + rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
926 +
927 +- /* Allow STIBP in MSR_SPEC_CTRL if supported */
928 +- if (boot_cpu_has(X86_FEATURE_STIBP))
929 +- x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
930 +-
931 + /* Select the proper CPU mitigations before patching alternatives: */
932 + spectre_v1_select_mitigation();
933 + spectre_v2_select_mitigation();
934 ++ /*
935 ++ * retbleed_select_mitigation() relies on the state set by
936 ++ * spectre_v2_select_mitigation(); specifically it wants to know about
937 ++ * spectre_v2=ibrs.
938 ++ */
939 ++ retbleed_select_mitigation();
940 ++ /*
941 ++ * spectre_v2_user_select_mitigation() relies on the state set by
942 ++ * retbleed_select_mitigation(); specifically the STIBP selection is
943 ++ * forced for UNRET.
944 ++ */
945 ++ spectre_v2_user_select_mitigation();
946 + ssb_select_mitigation();
947 + l1tf_select_mitigation();
948 + md_clear_select_mitigation();
949 +@@ -151,31 +185,17 @@ void __init check_bugs(void)
950 + #endif
951 + }
952 +
953 ++/*
954 ++ * NOTE: For VMX, this function is not called in the vmexit path.
955 ++ * It uses vmx_spec_ctrl_restore_host() instead.
956 ++ */
957 + void
958 + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
959 + {
960 +- u64 msrval, guestval, hostval = x86_spec_ctrl_base;
961 ++ u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
962 + struct thread_info *ti = current_thread_info();
963 +
964 +- /* Is MSR_SPEC_CTRL implemented ? */
965 + if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
966 +- /*
967 +- * Restrict guest_spec_ctrl to supported values. Clear the
968 +- * modifiable bits in the host base value and or the
969 +- * modifiable bits from the guest value.
970 +- */
971 +- guestval = hostval & ~x86_spec_ctrl_mask;
972 +- guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
973 +-
974 +- /* SSBD controlled in MSR_SPEC_CTRL */
975 +- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
976 +- static_cpu_has(X86_FEATURE_AMD_SSBD))
977 +- hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
978 +-
979 +- /* Conditional STIBP enabled? */
980 +- if (static_branch_unlikely(&switch_to_cond_stibp))
981 +- hostval |= stibp_tif_to_spec_ctrl(ti->flags);
982 +-
983 + if (hostval != guestval) {
984 + msrval = setguest ? guestval : hostval;
985 + wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
986 +@@ -705,12 +725,101 @@ static int __init nospectre_v1_cmdline(char *str)
987 + }
988 + early_param("nospectre_v1", nospectre_v1_cmdline);
989 +
990 +-#undef pr_fmt
991 +-#define pr_fmt(fmt) "Spectre V2 : " fmt
992 +-
993 + static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
994 + SPECTRE_V2_NONE;
995 +
996 ++#undef pr_fmt
997 ++#define pr_fmt(fmt) "RETBleed: " fmt
998 ++
999 ++enum retbleed_mitigation {
1000 ++ RETBLEED_MITIGATION_NONE,
1001 ++ RETBLEED_MITIGATION_IBRS,
1002 ++ RETBLEED_MITIGATION_EIBRS,
1003 ++};
1004 ++
1005 ++enum retbleed_mitigation_cmd {
1006 ++ RETBLEED_CMD_OFF,
1007 ++ RETBLEED_CMD_AUTO
1008 ++};
1009 ++
1010 ++const char * const retbleed_strings[] = {
1011 ++ [RETBLEED_MITIGATION_NONE] = "Vulnerable",
1012 ++ [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS",
1013 ++ [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS",
1014 ++};
1015 ++
1016 ++static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
1017 ++ RETBLEED_MITIGATION_NONE;
1018 ++static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
1019 ++ RETBLEED_CMD_AUTO;
1020 ++
1021 ++static int __init retbleed_parse_cmdline(char *str)
1022 ++{
1023 ++ if (!str)
1024 ++ return -EINVAL;
1025 ++
1026 ++ if (!strcmp(str, "off"))
1027 ++ retbleed_cmd = RETBLEED_CMD_OFF;
1028 ++ else if (!strcmp(str, "auto"))
1029 ++ retbleed_cmd = RETBLEED_CMD_AUTO;
1030 ++ else
1031 ++ pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str);
1032 ++
1033 ++ return 0;
1034 ++}
1035 ++early_param("retbleed", retbleed_parse_cmdline);
1036 ++
1037 ++#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
1038 ++
1039 ++static void __init retbleed_select_mitigation(void)
1040 ++{
1041 ++ if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
1042 ++ return;
1043 ++
1044 ++ switch (retbleed_cmd) {
1045 ++ case RETBLEED_CMD_OFF:
1046 ++ return;
1047 ++
1048 ++ case RETBLEED_CMD_AUTO:
1049 ++ default:
1050 ++ /*
1051 ++ * The Intel mitigation (IBRS) was already selected in
1052 ++ * spectre_v2_select_mitigation().
1053 ++ */
1054 ++
1055 ++ break;
1056 ++ }
1057 ++
1058 ++ switch (retbleed_mitigation) {
1059 ++ default:
1060 ++ break;
1061 ++ }
1062 ++
1063 ++ /*
1064 ++ * Let IBRS trump all on Intel without affecting the effects of the
1065 ++ * retbleed= cmdline option.
1066 ++ */
1067 ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
1068 ++ switch (spectre_v2_enabled) {
1069 ++ case SPECTRE_V2_IBRS:
1070 ++ retbleed_mitigation = RETBLEED_MITIGATION_IBRS;
1071 ++ break;
1072 ++ case SPECTRE_V2_EIBRS:
1073 ++ case SPECTRE_V2_EIBRS_RETPOLINE:
1074 ++ case SPECTRE_V2_EIBRS_LFENCE:
1075 ++ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
1076 ++ break;
1077 ++ default:
1078 ++ pr_err(RETBLEED_INTEL_MSG);
1079 ++ }
1080 ++ }
1081 ++
1082 ++ pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
1083 ++}
1084 ++
1085 ++#undef pr_fmt
1086 ++#define pr_fmt(fmt) "Spectre V2 : " fmt
1087 ++
1088 + static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
1089 + SPECTRE_V2_USER_NONE;
1090 + static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
1091 +@@ -740,6 +849,7 @@ static inline const char *spectre_v2_module_string(void) { return ""; }
1092 + #define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
1093 + #define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
1094 + #define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n"
1095 ++#define SPECTRE_V2_IBRS_PERF_MSG "WARNING: IBRS mitigation selected on Enhanced IBRS CPU, this may cause unnecessary performance loss\n"
1096 +
1097 + #ifdef CONFIG_BPF_SYSCALL
1098 + void unpriv_ebpf_notify(int new_state)
1099 +@@ -781,6 +891,7 @@ enum spectre_v2_mitigation_cmd {
1100 + SPECTRE_V2_CMD_EIBRS,
1101 + SPECTRE_V2_CMD_EIBRS_RETPOLINE,
1102 + SPECTRE_V2_CMD_EIBRS_LFENCE,
1103 ++ SPECTRE_V2_CMD_IBRS,
1104 + };
1105 +
1106 + enum spectre_v2_user_cmd {
1107 +@@ -821,13 +932,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure)
1108 + pr_info("spectre_v2_user=%s forced on command line.\n", reason);
1109 + }
1110 +
1111 ++static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd;
1112 ++
1113 + static enum spectre_v2_user_cmd __init
1114 +-spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
1115 ++spectre_v2_parse_user_cmdline(void)
1116 + {
1117 + char arg[20];
1118 + int ret, i;
1119 +
1120 +- switch (v2_cmd) {
1121 ++ switch (spectre_v2_cmd) {
1122 + case SPECTRE_V2_CMD_NONE:
1123 + return SPECTRE_V2_USER_CMD_NONE;
1124 + case SPECTRE_V2_CMD_FORCE:
1125 +@@ -853,15 +966,16 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
1126 + return SPECTRE_V2_USER_CMD_AUTO;
1127 + }
1128 +
1129 +-static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
1130 ++static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
1131 + {
1132 +- return (mode == SPECTRE_V2_EIBRS ||
1133 +- mode == SPECTRE_V2_EIBRS_RETPOLINE ||
1134 +- mode == SPECTRE_V2_EIBRS_LFENCE);
1135 ++ return mode == SPECTRE_V2_IBRS ||
1136 ++ mode == SPECTRE_V2_EIBRS ||
1137 ++ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
1138 ++ mode == SPECTRE_V2_EIBRS_LFENCE;
1139 + }
1140 +
1141 + static void __init
1142 +-spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
1143 ++spectre_v2_user_select_mitigation(void)
1144 + {
1145 + enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
1146 + bool smt_possible = IS_ENABLED(CONFIG_SMP);
1147 +@@ -874,7 +988,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
1148 + cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
1149 + smt_possible = false;
1150 +
1151 +- cmd = spectre_v2_parse_user_cmdline(v2_cmd);
1152 ++ cmd = spectre_v2_parse_user_cmdline();
1153 + switch (cmd) {
1154 + case SPECTRE_V2_USER_CMD_NONE:
1155 + goto set_mode;
1156 +@@ -922,12 +1036,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
1157 + }
1158 +
1159 + /*
1160 +- * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not
1161 +- * required.
1162 ++ * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible,
1163 ++ * STIBP is not required.
1164 + */
1165 + if (!boot_cpu_has(X86_FEATURE_STIBP) ||
1166 + !smt_possible ||
1167 +- spectre_v2_in_eibrs_mode(spectre_v2_enabled))
1168 ++ spectre_v2_in_ibrs_mode(spectre_v2_enabled))
1169 + return;
1170 +
1171 + /*
1172 +@@ -952,6 +1066,7 @@ static const char * const spectre_v2_strings[] = {
1173 + [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS",
1174 + [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE",
1175 + [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines",
1176 ++ [SPECTRE_V2_IBRS] = "Mitigation: IBRS",
1177 + };
1178 +
1179 + static const struct {
1180 +@@ -969,6 +1084,7 @@ static const struct {
1181 + { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false },
1182 + { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false },
1183 + { "auto", SPECTRE_V2_CMD_AUTO, false },
1184 ++ { "ibrs", SPECTRE_V2_CMD_IBRS, false },
1185 + };
1186 +
1187 + static void __init spec_v2_print_cond(const char *reason, bool secure)
1188 +@@ -1031,6 +1147,24 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
1189 + return SPECTRE_V2_CMD_AUTO;
1190 + }
1191 +
1192 ++ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
1193 ++ pr_err("%s selected but not Intel CPU. Switching to AUTO select\n",
1194 ++ mitigation_options[i].option);
1195 ++ return SPECTRE_V2_CMD_AUTO;
1196 ++ }
1197 ++
1198 ++ if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) {
1199 ++ pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n",
1200 ++ mitigation_options[i].option);
1201 ++ return SPECTRE_V2_CMD_AUTO;
1202 ++ }
1203 ++
1204 ++ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) {
1205 ++ pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n",
1206 ++ mitigation_options[i].option);
1207 ++ return SPECTRE_V2_CMD_AUTO;
1208 ++ }
1209 ++
1210 + spec_v2_print_cond(mitigation_options[i].option,
1211 + mitigation_options[i].secure);
1212 + return cmd;
1213 +@@ -1046,6 +1180,70 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
1214 + return SPECTRE_V2_RETPOLINE;
1215 + }
1216 +
1217 ++/* Disable in-kernel use of non-RSB RET predictors */
1218 ++static void __init spec_ctrl_disable_kernel_rrsba(void)
1219 ++{
1220 ++ u64 ia32_cap;
1221 ++
1222 ++ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
1223 ++ return;
1224 ++
1225 ++ ia32_cap = x86_read_arch_cap_msr();
1226 ++
1227 ++ if (ia32_cap & ARCH_CAP_RRSBA) {
1228 ++ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
1229 ++ write_spec_ctrl_current(x86_spec_ctrl_base, true);
1230 ++ }
1231 ++}
1232 ++
1233 ++static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
1234 ++{
1235 ++ /*
1236 ++ * Similar to context switches, there are two types of RSB attacks
1237 ++ * after VM exit:
1238 ++ *
1239 ++ * 1) RSB underflow
1240 ++ *
1241 ++ * 2) Poisoned RSB entry
1242 ++ *
1243 ++ * When retpoline is enabled, both are mitigated by filling/clearing
1244 ++ * the RSB.
1245 ++ *
1246 ++ * When IBRS is enabled, while #1 would be mitigated by the IBRS branch
1247 ++ * prediction isolation protections, RSB still needs to be cleared
1248 ++ * because of #2. Note that SMEP provides no protection here, unlike
1249 ++ * user-space-poisoned RSB entries.
1250 ++ *
1251 ++ * eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB
1252 ++ * bug is present then a LITE version of RSB protection is required,
1253 ++ * just a single call needs to retire before a RET is executed.
1254 ++ */
1255 ++ switch (mode) {
1256 ++ case SPECTRE_V2_NONE:
1257 ++ return;
1258 ++
1259 ++ case SPECTRE_V2_EIBRS_LFENCE:
1260 ++ case SPECTRE_V2_EIBRS:
1261 ++ if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB) &&
1262 ++ (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) {
1263 ++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE);
1264 ++ pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n");
1265 ++ }
1266 ++ return;
1267 ++
1268 ++ case SPECTRE_V2_EIBRS_RETPOLINE:
1269 ++ case SPECTRE_V2_RETPOLINE:
1270 ++ case SPECTRE_V2_LFENCE:
1271 ++ case SPECTRE_V2_IBRS:
1272 ++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
1273 ++ pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n");
1274 ++ return;
1275 ++ }
1276 ++
1277 ++ pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit");
1278 ++ dump_stack();
1279 ++}
1280 ++
1281 + static void __init spectre_v2_select_mitigation(void)
1282 + {
1283 + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
1284 +@@ -1070,6 +1268,14 @@ static void __init spectre_v2_select_mitigation(void)
1285 + break;
1286 + }
1287 +
1288 ++ if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
1289 ++ retbleed_cmd != RETBLEED_CMD_OFF &&
1290 ++ boot_cpu_has(X86_FEATURE_IBRS) &&
1291 ++ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
1292 ++ mode = SPECTRE_V2_IBRS;
1293 ++ break;
1294 ++ }
1295 ++
1296 + mode = spectre_v2_select_retpoline();
1297 + break;
1298 +
1299 +@@ -1086,6 +1292,10 @@ static void __init spectre_v2_select_mitigation(void)
1300 + mode = spectre_v2_select_retpoline();
1301 + break;
1302 +
1303 ++ case SPECTRE_V2_CMD_IBRS:
1304 ++ mode = SPECTRE_V2_IBRS;
1305 ++ break;
1306 ++
1307 + case SPECTRE_V2_CMD_EIBRS:
1308 + mode = SPECTRE_V2_EIBRS;
1309 + break;
1310 +@@ -1102,10 +1312,9 @@ static void __init spectre_v2_select_mitigation(void)
1311 + if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
1312 + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
1313 +
1314 +- if (spectre_v2_in_eibrs_mode(mode)) {
1315 +- /* Force it so VMEXIT will restore correctly */
1316 ++ if (spectre_v2_in_ibrs_mode(mode)) {
1317 + x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
1318 +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
1319 ++ write_spec_ctrl_current(x86_spec_ctrl_base, true);
1320 + }
1321 +
1322 + switch (mode) {
1323 +@@ -1113,6 +1322,12 @@ static void __init spectre_v2_select_mitigation(void)
1324 + case SPECTRE_V2_EIBRS:
1325 + break;
1326 +
1327 ++ case SPECTRE_V2_IBRS:
1328 ++ setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS);
1329 ++ if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED))
1330 ++ pr_warn(SPECTRE_V2_IBRS_PERF_MSG);
1331 ++ break;
1332 ++
1333 + case SPECTRE_V2_LFENCE:
1334 + case SPECTRE_V2_EIBRS_LFENCE:
1335 + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
1336 +@@ -1124,43 +1339,86 @@ static void __init spectre_v2_select_mitigation(void)
1337 + break;
1338 + }
1339 +
1340 ++ /*
1341 ++ * Disable alternate RSB predictions in kernel when indirect CALLs and
1342 ++ * JMPs gets protection against BHI and Intramode-BTI, but RET
1343 ++ * prediction from a non-RSB predictor is still a risk.
1344 ++ */
1345 ++ if (mode == SPECTRE_V2_EIBRS_LFENCE ||
1346 ++ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
1347 ++ mode == SPECTRE_V2_RETPOLINE)
1348 ++ spec_ctrl_disable_kernel_rrsba();
1349 ++
1350 + spectre_v2_enabled = mode;
1351 + pr_info("%s\n", spectre_v2_strings[mode]);
1352 +
1353 + /*
1354 +- * If spectre v2 protection has been enabled, unconditionally fill
1355 +- * RSB during a context switch; this protects against two independent
1356 +- * issues:
1357 ++ * If Spectre v2 protection has been enabled, fill the RSB during a
1358 ++ * context switch. In general there are two types of RSB attacks
1359 ++ * across context switches, for which the CALLs/RETs may be unbalanced.
1360 ++ *
1361 ++ * 1) RSB underflow
1362 ++ *
1363 ++ * Some Intel parts have "bottomless RSB". When the RSB is empty,
1364 ++ * speculated return targets may come from the branch predictor,
1365 ++ * which could have a user-poisoned BTB or BHB entry.
1366 ++ *
1367 ++ * AMD has it even worse: *all* returns are speculated from the BTB,
1368 ++ * regardless of the state of the RSB.
1369 ++ *
1370 ++ * When IBRS or eIBRS is enabled, the "user -> kernel" attack
1371 ++ * scenario is mitigated by the IBRS branch prediction isolation
1372 ++ * properties, so the RSB buffer filling wouldn't be necessary to
1373 ++ * protect against this type of attack.
1374 ++ *
1375 ++ * The "user -> user" attack scenario is mitigated by RSB filling.
1376 ++ *
1377 ++ * 2) Poisoned RSB entry
1378 ++ *
1379 ++ * If the 'next' in-kernel return stack is shorter than 'prev',
1380 ++ * 'next' could be tricked into speculating with a user-poisoned RSB
1381 ++ * entry.
1382 ++ *
1383 ++ * The "user -> kernel" attack scenario is mitigated by SMEP and
1384 ++ * eIBRS.
1385 + *
1386 +- * - RSB underflow (and switch to BTB) on Skylake+
1387 +- * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
1388 ++ * The "user -> user" scenario, also known as SpectreBHB, requires
1389 ++ * RSB clearing.
1390 ++ *
1391 ++ * So to mitigate all cases, unconditionally fill RSB on context
1392 ++ * switches.
1393 ++ *
1394 ++ * FIXME: Is this pointless for retbleed-affected AMD?
1395 + */
1396 + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
1397 + pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
1398 +
1399 ++ spectre_v2_determine_rsb_fill_type_at_vmexit(mode);
1400 ++
1401 + /*
1402 +- * Retpoline means the kernel is safe because it has no indirect
1403 +- * branches. Enhanced IBRS protects firmware too, so, enable restricted
1404 +- * speculation around firmware calls only when Enhanced IBRS isn't
1405 +- * supported.
1406 ++ * Retpoline protects the kernel, but doesn't protect firmware. IBRS
1407 ++ * and Enhanced IBRS protect firmware too, so enable IBRS around
1408 ++ * firmware calls only when IBRS / Enhanced IBRS aren't otherwise
1409 ++ * enabled.
1410 + *
1411 + * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
1412 + * the user might select retpoline on the kernel command line and if
1413 + * the CPU supports Enhanced IBRS, kernel might un-intentionally not
1414 + * enable IBRS around firmware calls.
1415 + */
1416 +- if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) {
1417 ++ if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
1418 + setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
1419 + pr_info("Enabling Restricted Speculation for firmware calls\n");
1420 + }
1421 +
1422 + /* Set up IBPB and STIBP depending on the general spectre V2 command */
1423 +- spectre_v2_user_select_mitigation(cmd);
1424 ++ spectre_v2_cmd = cmd;
1425 + }
1426 +
1427 + static void update_stibp_msr(void * __unused)
1428 + {
1429 +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
1430 ++ u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP);
1431 ++ write_spec_ctrl_current(val, true);
1432 + }
1433 +
1434 + /* Update x86_spec_ctrl_base in case SMT state changed. */
1435 +@@ -1376,16 +1634,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
1436 + break;
1437 + }
1438 +
1439 +- /*
1440 +- * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
1441 +- * bit in the mask to allow guests to use the mitigation even in the
1442 +- * case where the host does not enable it.
1443 +- */
1444 +- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
1445 +- static_cpu_has(X86_FEATURE_AMD_SSBD)) {
1446 +- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
1447 +- }
1448 +-
1449 + /*
1450 + * We have three CPU feature flags that are in play here:
1451 + * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
1452 +@@ -1403,7 +1651,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
1453 + x86_amd_ssb_disable();
1454 + } else {
1455 + x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
1456 +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
1457 ++ write_spec_ctrl_current(x86_spec_ctrl_base, true);
1458 + }
1459 + }
1460 +
1461 +@@ -1608,7 +1856,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
1462 + void x86_spec_ctrl_setup_ap(void)
1463 + {
1464 + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
1465 +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
1466 ++ write_spec_ctrl_current(x86_spec_ctrl_base, true);
1467 +
1468 + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
1469 + x86_amd_ssb_disable();
1470 +@@ -1843,7 +2091,7 @@ static ssize_t mmio_stale_data_show_state(char *buf)
1471 +
1472 + static char *stibp_state(void)
1473 + {
1474 +- if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
1475 ++ if (spectre_v2_in_ibrs_mode(spectre_v2_enabled))
1476 + return "";
1477 +
1478 + switch (spectre_v2_user_stibp) {
1479 +@@ -1873,6 +2121,19 @@ static char *ibpb_state(void)
1480 + return "";
1481 + }
1482 +
1483 ++static char *pbrsb_eibrs_state(void)
1484 ++{
1485 ++ if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
1486 ++ if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
1487 ++ boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
1488 ++ return ", PBRSB-eIBRS: SW sequence";
1489 ++ else
1490 ++ return ", PBRSB-eIBRS: Vulnerable";
1491 ++ } else {
1492 ++ return ", PBRSB-eIBRS: Not affected";
1493 ++ }
1494 ++}
1495 ++
1496 + static ssize_t spectre_v2_show_state(char *buf)
1497 + {
1498 + if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
1499 +@@ -1885,12 +2146,13 @@ static ssize_t spectre_v2_show_state(char *buf)
1500 + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
1501 + return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
1502 +
1503 +- return sprintf(buf, "%s%s%s%s%s%s\n",
1504 ++ return sprintf(buf, "%s%s%s%s%s%s%s\n",
1505 + spectre_v2_strings[spectre_v2_enabled],
1506 + ibpb_state(),
1507 + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
1508 + stibp_state(),
1509 + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
1510 ++ pbrsb_eibrs_state(),
1511 + spectre_v2_module_string());
1512 + }
1513 +
1514 +@@ -1899,6 +2161,11 @@ static ssize_t srbds_show_state(char *buf)
1515 + return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
1516 + }
1517 +
1518 ++static ssize_t retbleed_show_state(char *buf)
1519 ++{
1520 ++ return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
1521 ++}
1522 ++
1523 + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
1524 + char *buf, unsigned int bug)
1525 + {
1526 +@@ -1942,6 +2209,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
1527 + case X86_BUG_MMIO_UNKNOWN:
1528 + return mmio_stale_data_show_state(buf);
1529 +
1530 ++ case X86_BUG_RETBLEED:
1531 ++ return retbleed_show_state(buf);
1532 ++
1533 + default:
1534 + break;
1535 + }
1536 +@@ -2001,4 +2271,9 @@ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *at
1537 + else
1538 + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
1539 + }
1540 ++
1541 ++ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
1542 ++{
1543 ++ return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
1544 ++}
1545 + #endif
1546 +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
1547 +index e72a21c207724..2ad6d3b02a380 100644
1548 +--- a/arch/x86/kernel/cpu/common.c
1549 ++++ b/arch/x86/kernel/cpu/common.c
1550 +@@ -906,6 +906,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
1551 + #define NO_SWAPGS BIT(6)
1552 + #define NO_ITLB_MULTIHIT BIT(7)
1553 + #define NO_MMIO BIT(8)
1554 ++#define NO_EIBRS_PBRSB BIT(9)
1555 +
1556 + #define VULNWL(_vendor, _family, _model, _whitelist) \
1557 + { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
1558 +@@ -947,7 +948,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
1559 +
1560 + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1561 + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1562 +- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1563 ++ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
1564 +
1565 + /*
1566 + * Technically, swapgs isn't serializing on AMD (despite it previously
1567 +@@ -957,7 +958,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
1568 + * good enough for our purposes.
1569 + */
1570 +
1571 +- VULNWL_INTEL(ATOM_TREMONT_X, NO_ITLB_MULTIHIT),
1572 ++ VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB),
1573 ++ VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB),
1574 ++ VULNWL_INTEL(ATOM_TREMONT_X, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
1575 +
1576 + /* AMD Family 0xf - 0x12 */
1577 + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1578 +@@ -970,48 +973,55 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
1579 + {}
1580 + };
1581 +
1582 ++#define VULNBL(vendor, family, model, blacklist) \
1583 ++ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist)
1584 ++
1585 + #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \
1586 + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \
1587 + INTEL_FAM6_##model, steppings, \
1588 + X86_FEATURE_ANY, issues)
1589 +
1590 ++#define VULNBL_AMD(family, blacklist) \
1591 ++ VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
1592 ++
1593 + #define SRBDS BIT(0)
1594 + /* CPU is affected by X86_BUG_MMIO_STALE_DATA */
1595 + #define MMIO BIT(1)
1596 + /* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
1597 + #define MMIO_SBDS BIT(2)
1598 ++/* CPU is affected by RETbleed, speculating where you would not expect it */
1599 ++#define RETBLEED BIT(3)
1600 +
1601 + static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
1602 + VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
1603 + VULNBL_INTEL_STEPPINGS(HASWELL_CORE, X86_STEPPING_ANY, SRBDS),
1604 + VULNBL_INTEL_STEPPINGS(HASWELL_ULT, X86_STEPPING_ANY, SRBDS),
1605 + VULNBL_INTEL_STEPPINGS(HASWELL_GT3E, X86_STEPPING_ANY, SRBDS),
1606 +- VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO),
1607 +- VULNBL_INTEL_STEPPINGS(BROADWELL_XEON_D,X86_STEPPINGS(0x3, 0x5), MMIO),
1608 ++ VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO),
1609 ++ VULNBL_INTEL_STEPPINGS(BROADWELL_XEON_D,X86_STEPPING_ANY, MMIO),
1610 + VULNBL_INTEL_STEPPINGS(BROADWELL_GT3E, X86_STEPPING_ANY, SRBDS),
1611 + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
1612 + VULNBL_INTEL_STEPPINGS(BROADWELL_CORE, X86_STEPPING_ANY, SRBDS),
1613 +- VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
1614 +- VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS),
1615 +- VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) |
1616 +- BIT(7) | BIT(0xB), MMIO),
1617 +- VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
1618 +- VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPING_ANY, SRBDS),
1619 +- VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO),
1620 +- VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPINGS(0x0, 0x8), SRBDS),
1621 +- VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO),
1622 +- VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPINGS(0x0, 0x8), SRBDS),
1623 +- VULNBL_INTEL_STEPPINGS(ICELAKE_MOBILE, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS),
1624 +- VULNBL_INTEL_STEPPINGS(ICELAKE_XEON_D, X86_STEPPINGS(0x1, 0x1), MMIO),
1625 +- VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO),
1626 +- VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS),
1627 +- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
1628 +- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO),
1629 +- VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
1630 +- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO),
1631 +- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
1632 ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
1633 ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED),
1634 ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
1635 ++ VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
1636 ++ VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
1637 ++ VULNBL_INTEL_STEPPINGS(CANNONLAKE_MOBILE,X86_STEPPING_ANY, RETBLEED),
1638 ++ VULNBL_INTEL_STEPPINGS(ICELAKE_MOBILE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
1639 ++ VULNBL_INTEL_STEPPINGS(ICELAKE_XEON_D, X86_STEPPING_ANY, MMIO),
1640 ++ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO),
1641 ++ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
1642 ++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
1643 ++ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
1644 ++ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED),
1645 ++ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
1646 + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_X, X86_STEPPING_ANY, MMIO),
1647 +- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS),
1648 ++ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
1649 ++
1650 ++ VULNBL_AMD(0x15, RETBLEED),
1651 ++ VULNBL_AMD(0x16, RETBLEED),
1652 ++ VULNBL_AMD(0x17, RETBLEED),
1653 + {}
1654 + };
1655 +
1656 +@@ -1117,6 +1127,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
1657 + setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
1658 + }
1659 +
1660 ++ if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
1661 ++ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
1662 ++ setup_force_cpu_bug(X86_BUG_RETBLEED);
1663 ++ }
1664 ++
1665 ++ if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) &&
1666 ++ !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
1667 ++ !(ia32_cap & ARCH_CAP_PBRSB_NO))
1668 ++ setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
1669 ++
1670 + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
1671 + return;
1672 +
1673 +diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
1674 +index 751e590574660..ad6776081e60d 100644
1675 +--- a/arch/x86/kernel/cpu/match.c
1676 ++++ b/arch/x86/kernel/cpu/match.c
1677 +@@ -16,12 +16,17 @@
1678 + * respective wildcard entries.
1679 + *
1680 + * A typical table entry would be to match a specific CPU
1681 +- * { X86_VENDOR_INTEL, 6, 0x12 }
1682 +- * or to match a specific CPU feature
1683 +- * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
1684 ++ *
1685 ++ * X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_BROADWELL,
1686 ++ * X86_FEATURE_ANY, NULL);
1687 + *
1688 + * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY,
1689 +- * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
1690 ++ * %X86_MODEL_ANY, %X86_FEATURE_ANY (except for vendor)
1691 ++ *
1692 ++ * asm/cpu_device_id.h contains a set of useful macros which are shortcuts
1693 ++ * for various common selections. The above can be shortened to:
1694 ++ *
1695 ++ * X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, NULL);
1696 + *
1697 + * Arrays used to match for this should also be declared using
1698 + * MODULE_DEVICE_TABLE(x86cpu, ...)
1699 +@@ -53,3 +58,34 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
1700 + return NULL;
1701 + }
1702 + EXPORT_SYMBOL(x86_match_cpu);
1703 ++
1704 ++static const struct x86_cpu_desc *
1705 ++x86_match_cpu_with_stepping(const struct x86_cpu_desc *match)
1706 ++{
1707 ++ struct cpuinfo_x86 *c = &boot_cpu_data;
1708 ++ const struct x86_cpu_desc *m;
1709 ++
1710 ++ for (m = match; m->x86_family | m->x86_model; m++) {
1711 ++ if (c->x86_vendor != m->x86_vendor)
1712 ++ continue;
1713 ++ if (c->x86 != m->x86_family)
1714 ++ continue;
1715 ++ if (c->x86_model != m->x86_model)
1716 ++ continue;
1717 ++ if (c->x86_stepping != m->x86_stepping)
1718 ++ continue;
1719 ++ return m;
1720 ++ }
1721 ++ return NULL;
1722 ++}
1723 ++
1724 ++bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table)
1725 ++{
1726 ++ const struct x86_cpu_desc *res = x86_match_cpu_with_stepping(table);
1727 ++
1728 ++ if (!res || res->x86_microcode_rev > boot_cpu_data.microcode)
1729 ++ return false;
1730 ++
1731 ++ return true;
1732 ++}
1733 ++EXPORT_SYMBOL_GPL(x86_cpu_has_min_microcode_rev);
1734 +diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
1735 +index 0b9c7150cb23f..efdb1decf0347 100644
1736 +--- a/arch/x86/kernel/cpu/scattered.c
1737 ++++ b/arch/x86/kernel/cpu/scattered.c
1738 +@@ -21,6 +21,7 @@ struct cpuid_bit {
1739 + static const struct cpuid_bit cpuid_bits[] = {
1740 + { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
1741 + { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
1742 ++ { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
1743 + { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
1744 + { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
1745 + { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
1746 +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
1747 +index a07b09f68e7ee..baa9254149e7a 100644
1748 +--- a/arch/x86/kernel/process.c
1749 ++++ b/arch/x86/kernel/process.c
1750 +@@ -435,7 +435,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
1751 + }
1752 +
1753 + if (updmsr)
1754 +- wrmsrl(MSR_IA32_SPEC_CTRL, msr);
1755 ++ write_spec_ctrl_current(msr, false);
1756 + }
1757 +
1758 + static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
1759 +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
1760 +index 69056af43a981..7d86708962039 100644
1761 +--- a/arch/x86/kvm/svm.c
1762 ++++ b/arch/x86/kvm/svm.c
1763 +@@ -47,6 +47,7 @@
1764 + #include <asm/irq_remapping.h>
1765 + #include <asm/microcode.h>
1766 + #include <asm/spec-ctrl.h>
1767 ++#include <asm/cpu_device_id.h>
1768 +
1769 + #include <asm/virtext.h>
1770 + #include "trace.h"
1771 +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
1772 +index 27d99928a10e7..aea4c497da3f2 100644
1773 +--- a/arch/x86/kvm/vmx.c
1774 ++++ b/arch/x86/kvm/vmx.c
1775 +@@ -40,6 +40,7 @@
1776 + #include "x86.h"
1777 +
1778 + #include <asm/cpu.h>
1779 ++#include <asm/cpu_device_id.h>
1780 + #include <asm/io.h>
1781 + #include <asm/desc.h>
1782 + #include <asm/vmx.h>
1783 +@@ -9769,10 +9770,36 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
1784 + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
1785 + }
1786 +
1787 ++u64 __always_inline vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx)
1788 ++{
1789 ++ u64 guestval, hostval = this_cpu_read(x86_spec_ctrl_current);
1790 ++
1791 ++ if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
1792 ++ return 0;
1793 ++
1794 ++ guestval = __rdmsr(MSR_IA32_SPEC_CTRL);
1795 ++
1796 ++ /*
1797 ++ * If the guest/host SPEC_CTRL values differ, restore the host value.
1798 ++ *
1799 ++ * For legacy IBRS, the IBRS bit always needs to be written after
1800 ++ * transitioning from a less privileged predictor mode, regardless of
1801 ++ * whether the guest/host values differ.
1802 ++ */
1803 ++ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
1804 ++ guestval != hostval)
1805 ++ native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
1806 ++
1807 ++ barrier_nospec();
1808 ++
1809 ++ return guestval;
1810 ++}
1811 ++
1812 + static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
1813 + {
1814 + struct vcpu_vmx *vmx = to_vmx(vcpu);
1815 + unsigned long debugctlmsr, cr3, cr4;
1816 ++ u64 spec_ctrl;
1817 +
1818 + /* Record the guest's net vcpu time for enforced NMI injections. */
1819 + if (unlikely(!cpu_has_virtual_nmis() &&
1820 +@@ -9966,6 +9993,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
1821 + , "eax", "ebx", "edi", "esi"
1822 + #endif
1823 + );
1824 ++ /*
1825 ++ * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
1826 ++ * the first unbalanced RET after vmexit!
1827 ++ *
1828 ++ * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
1829 ++ * entries and (in some cases) RSB underflow.
1830 ++ *
1831 ++ * eIBRS has its own protection against poisoned RSB, so it doesn't
1832 ++ * need the RSB filling sequence. But it does need to be enabled, and a
1833 ++ * single call to retire, before the first unbalanced RET.
1834 ++ *
1835 ++ * So no RETs before vmx_spec_ctrl_restore_host() below.
1836 ++ */
1837 ++ vmexit_fill_RSB();
1838 ++
1839 ++ /* Save this for below */
1840 ++ spec_ctrl = vmx_spec_ctrl_restore_host(vmx);
1841 +
1842 + vmx_enable_fb_clear(vmx);
1843 +
1844 +@@ -9985,12 +10029,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
1845 + * save it.
1846 + */
1847 + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
1848 +- vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
1849 +-
1850 +- x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
1851 +-
1852 +- /* Eliminate branch target predictions from guest mode */
1853 +- vmexit_fill_RSB();
1854 ++ vmx->spec_ctrl = spec_ctrl;
1855 +
1856 + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
1857 + if (debugctlmsr)
1858 +diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
1859 +index ba4e7732e2c70..9ae1531243808 100644
1860 +--- a/drivers/base/cpu.c
1861 ++++ b/drivers/base/cpu.c
1862 +@@ -564,6 +564,12 @@ ssize_t __weak cpu_show_mmio_stale_data(struct device *dev,
1863 + return sysfs_emit(buf, "Not affected\n");
1864 + }
1865 +
1866 ++ssize_t __weak cpu_show_retbleed(struct device *dev,
1867 ++ struct device_attribute *attr, char *buf)
1868 ++{
1869 ++ return sysfs_emit(buf, "Not affected\n");
1870 ++}
1871 ++
1872 + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
1873 + static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
1874 + static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
1875 +@@ -574,6 +580,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
1876 + static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
1877 + static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
1878 + static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
1879 ++static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
1880 +
1881 + static struct attribute *cpu_root_vulnerabilities_attrs[] = {
1882 + &dev_attr_meltdown.attr,
1883 +@@ -586,6 +593,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
1884 + &dev_attr_itlb_multihit.attr,
1885 + &dev_attr_srbds.attr,
1886 + &dev_attr_mmio_stale_data.attr,
1887 ++ &dev_attr_retbleed.attr,
1888 + NULL
1889 + };
1890 +
1891 +diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
1892 +index 8a199b4047c23..6b53041775757 100644
1893 +--- a/drivers/cpufreq/acpi-cpufreq.c
1894 ++++ b/drivers/cpufreq/acpi-cpufreq.c
1895 +@@ -47,6 +47,7 @@
1896 + #include <asm/msr.h>
1897 + #include <asm/processor.h>
1898 + #include <asm/cpufeature.h>
1899 ++#include <asm/cpu_device_id.h>
1900 +
1901 + MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
1902 + MODULE_DESCRIPTION("ACPI Processor P-States Driver");
1903 +diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c
1904 +index 042023bbbf621..b7692861b2f72 100644
1905 +--- a/drivers/cpufreq/amd_freq_sensitivity.c
1906 ++++ b/drivers/cpufreq/amd_freq_sensitivity.c
1907 +@@ -20,6 +20,7 @@
1908 +
1909 + #include <asm/msr.h>
1910 + #include <asm/cpufeature.h>
1911 ++#include <asm/cpu_device_id.h>
1912 +
1913 + #include "cpufreq_ondemand.h"
1914 +
1915 +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
1916 +index 31f54a334b586..51cc492c2e35f 100644
1917 +--- a/drivers/idle/intel_idle.c
1918 ++++ b/drivers/idle/intel_idle.c
1919 +@@ -58,11 +58,13 @@
1920 + #include <linux/tick.h>
1921 + #include <trace/events/power.h>
1922 + #include <linux/sched.h>
1923 ++#include <linux/sched/smt.h>
1924 + #include <linux/notifier.h>
1925 + #include <linux/cpu.h>
1926 + #include <linux/moduleparam.h>
1927 + #include <asm/cpu_device_id.h>
1928 + #include <asm/intel-family.h>
1929 ++#include <asm/nospec-branch.h>
1930 + #include <asm/mwait.h>
1931 + #include <asm/msr.h>
1932 +
1933 +@@ -97,6 +99,8 @@ static const struct idle_cpu *icpu;
1934 + static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
1935 + static int intel_idle(struct cpuidle_device *dev,
1936 + struct cpuidle_driver *drv, int index);
1937 ++static int intel_idle_ibrs(struct cpuidle_device *dev,
1938 ++ struct cpuidle_driver *drv, int index);
1939 + static void intel_idle_s2idle(struct cpuidle_device *dev,
1940 + struct cpuidle_driver *drv, int index);
1941 + static struct cpuidle_state *cpuidle_state_table;
1942 +@@ -109,6 +113,12 @@ static struct cpuidle_state *cpuidle_state_table;
1943 + */
1944 + #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000
1945 +
1946 ++/*
1947 ++ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
1948 ++ * above.
1949 ++ */
1950 ++#define CPUIDLE_FLAG_IBRS BIT(16)
1951 ++
1952 + /*
1953 + * MWAIT takes an 8-bit "hint" in EAX "suggesting"
1954 + * the C-state (top nibble) and sub-state (bottom nibble)
1955 +@@ -617,7 +627,7 @@ static struct cpuidle_state skl_cstates[] = {
1956 + {
1957 + .name = "C6",
1958 + .desc = "MWAIT 0x20",
1959 +- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1960 ++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
1961 + .exit_latency = 85,
1962 + .target_residency = 200,
1963 + .enter = &intel_idle,
1964 +@@ -625,7 +635,7 @@ static struct cpuidle_state skl_cstates[] = {
1965 + {
1966 + .name = "C7s",
1967 + .desc = "MWAIT 0x33",
1968 +- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
1969 ++ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
1970 + .exit_latency = 124,
1971 + .target_residency = 800,
1972 + .enter = &intel_idle,
1973 +@@ -633,7 +643,7 @@ static struct cpuidle_state skl_cstates[] = {
1974 + {
1975 + .name = "C8",
1976 + .desc = "MWAIT 0x40",
1977 +- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
1978 ++ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
1979 + .exit_latency = 200,
1980 + .target_residency = 800,
1981 + .enter = &intel_idle,
1982 +@@ -641,7 +651,7 @@ static struct cpuidle_state skl_cstates[] = {
1983 + {
1984 + .name = "C9",
1985 + .desc = "MWAIT 0x50",
1986 +- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
1987 ++ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
1988 + .exit_latency = 480,
1989 + .target_residency = 5000,
1990 + .enter = &intel_idle,
1991 +@@ -649,7 +659,7 @@ static struct cpuidle_state skl_cstates[] = {
1992 + {
1993 + .name = "C10",
1994 + .desc = "MWAIT 0x60",
1995 +- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1996 ++ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
1997 + .exit_latency = 890,
1998 + .target_residency = 5000,
1999 + .enter = &intel_idle,
2000 +@@ -678,7 +688,7 @@ static struct cpuidle_state skx_cstates[] = {
2001 + {
2002 + .name = "C6",
2003 + .desc = "MWAIT 0x20",
2004 +- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
2005 ++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
2006 + .exit_latency = 133,
2007 + .target_residency = 600,
2008 + .enter = &intel_idle,
2009 +@@ -935,6 +945,24 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev,
2010 + return index;
2011 + }
2012 +
2013 ++static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
2014 ++ struct cpuidle_driver *drv, int index)
2015 ++{
2016 ++ bool smt_active = sched_smt_active();
2017 ++ u64 spec_ctrl = spec_ctrl_current();
2018 ++ int ret;
2019 ++
2020 ++ if (smt_active)
2021 ++ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
2022 ++
2023 ++ ret = intel_idle(dev, drv, index);
2024 ++
2025 ++ if (smt_active)
2026 ++ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
2027 ++
2028 ++ return ret;
2029 ++}
2030 ++
2031 + /**
2032 + * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle
2033 + * @dev: cpuidle_device
2034 +@@ -1375,6 +1403,11 @@ static void __init intel_idle_cpuidle_driver_init(void)
2035 + mark_tsc_unstable("TSC halts in idle"
2036 + " states deeper than C2");
2037 +
2038 ++ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
2039 ++ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
2040 ++ drv->states[drv->state_count].enter = intel_idle_ibrs;
2041 ++ }
2042 ++
2043 + drv->states[drv->state_count] = /* structure copy */
2044 + cpuidle_state_table[cstate];
2045 +
2046 +diff --git a/include/linux/cpu.h b/include/linux/cpu.h
2047 +index f958ecc82de99..8c4d21e717749 100644
2048 +--- a/include/linux/cpu.h
2049 ++++ b/include/linux/cpu.h
2050 +@@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr,
2051 + extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
2052 + struct device_attribute *attr,
2053 + char *buf);
2054 ++extern ssize_t cpu_show_retbleed(struct device *dev,
2055 ++ struct device_attribute *attr, char *buf);
2056 +
2057 + extern __printf(4, 5)
2058 + struct device *cpu_device_create(struct device *parent, void *drvdata,
2059 +diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
2060 +index 6f8eb1238235f..97794823eabd6 100644
2061 +--- a/include/linux/mod_devicetable.h
2062 ++++ b/include/linux/mod_devicetable.h
2063 +@@ -604,9 +604,7 @@ struct x86_cpu_id {
2064 + __u16 steppings;
2065 + };
2066 +
2067 +-#define X86_FEATURE_MATCH(x) \
2068 +- { X86_VENDOR_ANY, X86_FAMILY_ANY, X86_MODEL_ANY, x }
2069 +-
2070 ++/* Wild cards for x86_cpu_id::vendor, family, model and feature */
2071 + #define X86_VENDOR_ANY 0xffff
2072 + #define X86_FAMILY_ANY 0
2073 + #define X86_MODEL_ANY 0
2074 +diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
2075 +index bb5861adb5a0a..8fd46c8793483 100644
2076 +--- a/tools/arch/x86/include/asm/cpufeatures.h
2077 ++++ b/tools/arch/x86/include/asm/cpufeatures.h
2078 +@@ -270,6 +270,7 @@
2079 +
2080 + /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
2081 + #define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
2082 ++#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM-Exit when EIBRS is enabled */
2083 +
2084 + /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
2085 + #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */