1 |
commit: 7aaaf24efa9606d739c15aa28d8118b093cc315e |
2 |
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
3 |
AuthorDate: Thu Aug 11 12:32:45 2022 +0000 |
4 |
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
5 |
CommitDate: Thu Aug 11 12:32:45 2022 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=7aaaf24e |
7 |
|
8 |
Linux patch 5.18.17 |
9 |
|
10 |
Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org> |
11 |
|
12 |
0000_README | 4 + |
13 |
1016_linux-5.18.17.patch | 1418 ++++++++++++++++++++++++++++++++++++++++++++++ |
14 |
2 files changed, 1422 insertions(+) |
15 |
|
16 |
diff --git a/0000_README b/0000_README |
17 |
index efa0b25e..e0f23579 100644 |
18 |
--- a/0000_README |
19 |
+++ b/0000_README |
20 |
@@ -107,6 +107,10 @@ Patch: 1015_linux-5.18.16.patch |
21 |
From: http://www.kernel.org |
22 |
Desc: Linux 5.18.16 |
23 |
|
24 |
+Patch: 1016_linux-5.18.17.patch |
25 |
+From: http://www.kernel.org |
26 |
+Desc: Linux 5.18.17 |
27 |
+ |
28 |
Patch: 1500_XATTR_USER_PREFIX.patch |
29 |
From: https://bugs.gentoo.org/show_bug.cgi?id=470644 |
30 |
Desc: Support for namespace user.pax.* on tmpfs. |
31 |
|
32 |
diff --git a/1016_linux-5.18.17.patch b/1016_linux-5.18.17.patch |
33 |
new file mode 100644 |
34 |
index 00000000..94fc8829 |
35 |
--- /dev/null |
36 |
+++ b/1016_linux-5.18.17.patch |
37 |
@@ -0,0 +1,1418 @@ |
38 |
+diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst |
39 |
+index 9e9556826450b..2ce2a38cdd556 100644 |
40 |
+--- a/Documentation/admin-guide/hw-vuln/spectre.rst |
41 |
++++ b/Documentation/admin-guide/hw-vuln/spectre.rst |
42 |
+@@ -422,6 +422,14 @@ The possible values in this file are: |
43 |
+ 'RSB filling' Protection of RSB on context switch enabled |
44 |
+ ============= =========================================== |
45 |
+ |
46 |
++ - EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status: |
47 |
++ |
48 |
++ =========================== ======================================================= |
49 |
++ 'PBRSB-eIBRS: SW sequence' CPU is affected and protection of RSB on VMEXIT enabled |
50 |
++ 'PBRSB-eIBRS: Vulnerable' CPU is vulnerable |
51 |
++ 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB |
52 |
++ =========================== ======================================================= |
53 |
++ |
54 |
+ Full mitigation might require a microcode update from the CPU |
55 |
+ vendor. When the necessary microcode is not available, the kernel will |
56 |
+ report vulnerability. |
57 |
+diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml |
58 |
+index 5aac094fd2172..58ecafc1b7f90 100644 |
59 |
+--- a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml |
60 |
++++ b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml |
61 |
+@@ -23,6 +23,7 @@ properties: |
62 |
+ - brcm,bcm4345c5 |
63 |
+ - brcm,bcm43540-bt |
64 |
+ - brcm,bcm4335a0 |
65 |
++ - brcm,bcm4349-bt |
66 |
+ |
67 |
+ shutdown-gpios: |
68 |
+ maxItems: 1 |
69 |
+diff --git a/Makefile b/Makefile |
70 |
+index 18bcbcd037f0a..ef8c18e5c161c 100644 |
71 |
+--- a/Makefile |
72 |
++++ b/Makefile |
73 |
+@@ -1,7 +1,7 @@ |
74 |
+ # SPDX-License-Identifier: GPL-2.0 |
75 |
+ VERSION = 5 |
76 |
+ PATCHLEVEL = 18 |
77 |
+-SUBLEVEL = 16 |
78 |
++SUBLEVEL = 17 |
79 |
+ EXTRAVERSION = |
80 |
+ NAME = Superb Owl |
81 |
+ |
82 |
+diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c |
83 |
+index 9c3d86e397bf3..1fae18ba11ed1 100644 |
84 |
+--- a/arch/arm64/crypto/poly1305-glue.c |
85 |
++++ b/arch/arm64/crypto/poly1305-glue.c |
86 |
+@@ -52,7 +52,7 @@ static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, |
87 |
+ { |
88 |
+ if (unlikely(!dctx->sset)) { |
89 |
+ if (!dctx->rset) { |
90 |
+- poly1305_init_arch(dctx, src); |
91 |
++ poly1305_init_arm64(&dctx->h, src); |
92 |
+ src += POLY1305_BLOCK_SIZE; |
93 |
+ len -= POLY1305_BLOCK_SIZE; |
94 |
+ dctx->rset = 1; |
95 |
+diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h |
96 |
+index 96dc0f7da258d..a971d462f531c 100644 |
97 |
+--- a/arch/arm64/include/asm/kernel-pgtable.h |
98 |
++++ b/arch/arm64/include/asm/kernel-pgtable.h |
99 |
+@@ -103,8 +103,8 @@ |
100 |
+ /* |
101 |
+ * Initial memory map attributes. |
102 |
+ */ |
103 |
+-#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) |
104 |
+-#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) |
105 |
++#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN) |
106 |
++#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PMD_SECT_UXN) |
107 |
+ |
108 |
+ #if ARM64_KERNEL_USES_PMD_MAPS |
109 |
+ #define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) |
110 |
+diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S |
111 |
+index 6a98f1a38c29a..8a93a0a7489b2 100644 |
112 |
+--- a/arch/arm64/kernel/head.S |
113 |
++++ b/arch/arm64/kernel/head.S |
114 |
+@@ -285,7 +285,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) |
115 |
+ subs x1, x1, #64 |
116 |
+ b.ne 1b |
117 |
+ |
118 |
+- mov x7, SWAPPER_MM_MMUFLAGS |
119 |
++ mov_q x7, SWAPPER_MM_MMUFLAGS |
120 |
+ |
121 |
+ /* |
122 |
+ * Create the identity mapping. |
123 |
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig |
124 |
+index 4d1d87f76a74f..ce1f5a876cfea 100644 |
125 |
+--- a/arch/x86/Kconfig |
126 |
++++ b/arch/x86/Kconfig |
127 |
+@@ -2469,7 +2469,7 @@ config RETPOLINE |
128 |
+ config RETHUNK |
129 |
+ bool "Enable return-thunks" |
130 |
+ depends on RETPOLINE && CC_HAS_RETURN_THUNK |
131 |
+- default y |
132 |
++ default y if X86_64 |
133 |
+ help |
134 |
+ Compile the kernel with the return-thunks compiler option to guard |
135 |
+ against kernel-to-user data leaks by avoiding return speculation. |
136 |
+@@ -2478,21 +2478,21 @@ config RETHUNK |
137 |
+ |
138 |
+ config CPU_UNRET_ENTRY |
139 |
+ bool "Enable UNRET on kernel entry" |
140 |
+- depends on CPU_SUP_AMD && RETHUNK |
141 |
++ depends on CPU_SUP_AMD && RETHUNK && X86_64 |
142 |
+ default y |
143 |
+ help |
144 |
+ Compile the kernel with support for the retbleed=unret mitigation. |
145 |
+ |
146 |
+ config CPU_IBPB_ENTRY |
147 |
+ bool "Enable IBPB on kernel entry" |
148 |
+- depends on CPU_SUP_AMD |
149 |
++ depends on CPU_SUP_AMD && X86_64 |
150 |
+ default y |
151 |
+ help |
152 |
+ Compile the kernel with support for the retbleed=ibpb mitigation. |
153 |
+ |
154 |
+ config CPU_IBRS_ENTRY |
155 |
+ bool "Enable IBRS on kernel entry" |
156 |
+- depends on CPU_SUP_INTEL |
157 |
++ depends on CPU_SUP_INTEL && X86_64 |
158 |
+ default y |
159 |
+ help |
160 |
+ Compile the kernel with support for the spectre_v2=ibrs mitigation. |
161 |
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h |
162 |
+index 49889f171e860..e82da174d28c3 100644 |
163 |
+--- a/arch/x86/include/asm/cpufeatures.h |
164 |
++++ b/arch/x86/include/asm/cpufeatures.h |
165 |
+@@ -302,6 +302,7 @@ |
166 |
+ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ |
167 |
+ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ |
168 |
+ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ |
169 |
++#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ |
170 |
+ |
171 |
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ |
172 |
+ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ |
173 |
+@@ -453,5 +454,6 @@ |
174 |
+ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ |
175 |
+ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ |
176 |
+ #define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ |
177 |
++#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ |
178 |
+ |
179 |
+ #endif /* _ASM_X86_CPUFEATURES_H */ |
180 |
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h |
181 |
+index 4ff36610af6ab..9fdaa847d4b66 100644 |
182 |
+--- a/arch/x86/include/asm/kvm_host.h |
183 |
++++ b/arch/x86/include/asm/kvm_host.h |
184 |
+@@ -651,6 +651,7 @@ struct kvm_vcpu_arch { |
185 |
+ u64 ia32_misc_enable_msr; |
186 |
+ u64 smbase; |
187 |
+ u64 smi_count; |
188 |
++ bool at_instruction_boundary; |
189 |
+ bool tpr_access_reporting; |
190 |
+ bool xsaves_enabled; |
191 |
+ bool xfd_no_write_intercept; |
192 |
+@@ -1289,6 +1290,8 @@ struct kvm_vcpu_stat { |
193 |
+ u64 nested_run; |
194 |
+ u64 directed_yield_attempted; |
195 |
+ u64 directed_yield_successful; |
196 |
++ u64 preemption_reported; |
197 |
++ u64 preemption_other; |
198 |
+ u64 guest_mode; |
199 |
+ }; |
200 |
+ |
201 |
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h |
202 |
+index ad084326f24c2..f951147cc7fdc 100644 |
203 |
+--- a/arch/x86/include/asm/msr-index.h |
204 |
++++ b/arch/x86/include/asm/msr-index.h |
205 |
+@@ -148,6 +148,10 @@ |
206 |
+ * are restricted to targets in |
207 |
+ * kernel. |
208 |
+ */ |
209 |
++#define ARCH_CAP_PBRSB_NO BIT(24) /* |
210 |
++ * Not susceptible to Post-Barrier |
211 |
++ * Return Stack Buffer Predictions. |
212 |
++ */ |
213 |
+ |
214 |
+ #define MSR_IA32_FLUSH_CMD 0x0000010b |
215 |
+ #define L1D_FLUSH BIT(0) /* |
216 |
+diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h |
217 |
+index 38a3e86e665ef..d3a3cc6772ee1 100644 |
218 |
+--- a/arch/x86/include/asm/nospec-branch.h |
219 |
++++ b/arch/x86/include/asm/nospec-branch.h |
220 |
+@@ -60,7 +60,9 @@ |
221 |
+ 774: \ |
222 |
+ add $(BITS_PER_LONG/8) * 2, sp; \ |
223 |
+ dec reg; \ |
224 |
+- jnz 771b; |
225 |
++ jnz 771b; \ |
226 |
++ /* barrier for jnz misprediction */ \ |
227 |
++ lfence; |
228 |
+ |
229 |
+ #ifdef __ASSEMBLY__ |
230 |
+ |
231 |
+@@ -118,13 +120,28 @@ |
232 |
+ #endif |
233 |
+ .endm |
234 |
+ |
235 |
++.macro ISSUE_UNBALANCED_RET_GUARD |
236 |
++ ANNOTATE_INTRA_FUNCTION_CALL |
237 |
++ call .Lunbalanced_ret_guard_\@ |
238 |
++ int3 |
239 |
++.Lunbalanced_ret_guard_\@: |
240 |
++ add $(BITS_PER_LONG/8), %_ASM_SP |
241 |
++ lfence |
242 |
++.endm |
243 |
++ |
244 |
+ /* |
245 |
+ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP |
246 |
+ * monstrosity above, manually. |
247 |
+ */ |
248 |
+-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req |
249 |
++.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2 |
250 |
++.ifb \ftr2 |
251 |
+ ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr |
252 |
++.else |
253 |
++ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2 |
254 |
++.endif |
255 |
+ __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) |
256 |
++.Lunbalanced_\@: |
257 |
++ ISSUE_UNBALANCED_RET_GUARD |
258 |
+ .Lskip_rsb_\@: |
259 |
+ .endm |
260 |
+ |
261 |
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c |
262 |
+index fd986a8ba2bd7..fa625b2a8a939 100644 |
263 |
+--- a/arch/x86/kernel/cpu/bugs.c |
264 |
++++ b/arch/x86/kernel/cpu/bugs.c |
265 |
+@@ -1328,6 +1328,53 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) |
266 |
+ } |
267 |
+ } |
268 |
+ |
269 |
++static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) |
270 |
++{ |
271 |
++ /* |
272 |
++ * Similar to context switches, there are two types of RSB attacks |
273 |
++ * after VM exit: |
274 |
++ * |
275 |
++ * 1) RSB underflow |
276 |
++ * |
277 |
++ * 2) Poisoned RSB entry |
278 |
++ * |
279 |
++ * When retpoline is enabled, both are mitigated by filling/clearing |
280 |
++ * the RSB. |
281 |
++ * |
282 |
++ * When IBRS is enabled, while #1 would be mitigated by the IBRS branch |
283 |
++ * prediction isolation protections, RSB still needs to be cleared |
284 |
++ * because of #2. Note that SMEP provides no protection here, unlike |
285 |
++ * user-space-poisoned RSB entries. |
286 |
++ * |
287 |
++ * eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB |
288 |
++ * bug is present then a LITE version of RSB protection is required, |
289 |
++ * just a single call needs to retire before a RET is executed. |
290 |
++ */ |
291 |
++ switch (mode) { |
292 |
++ case SPECTRE_V2_NONE: |
293 |
++ return; |
294 |
++ |
295 |
++ case SPECTRE_V2_EIBRS_LFENCE: |
296 |
++ case SPECTRE_V2_EIBRS: |
297 |
++ if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { |
298 |
++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); |
299 |
++ pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n"); |
300 |
++ } |
301 |
++ return; |
302 |
++ |
303 |
++ case SPECTRE_V2_EIBRS_RETPOLINE: |
304 |
++ case SPECTRE_V2_RETPOLINE: |
305 |
++ case SPECTRE_V2_LFENCE: |
306 |
++ case SPECTRE_V2_IBRS: |
307 |
++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); |
308 |
++ pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n"); |
309 |
++ return; |
310 |
++ } |
311 |
++ |
312 |
++ pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit"); |
313 |
++ dump_stack(); |
314 |
++} |
315 |
++ |
316 |
+ static void __init spectre_v2_select_mitigation(void) |
317 |
+ { |
318 |
+ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); |
319 |
+@@ -1478,28 +1525,7 @@ static void __init spectre_v2_select_mitigation(void) |
320 |
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); |
321 |
+ pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); |
322 |
+ |
323 |
+- /* |
324 |
+- * Similar to context switches, there are two types of RSB attacks |
325 |
+- * after vmexit: |
326 |
+- * |
327 |
+- * 1) RSB underflow |
328 |
+- * |
329 |
+- * 2) Poisoned RSB entry |
330 |
+- * |
331 |
+- * When retpoline is enabled, both are mitigated by filling/clearing |
332 |
+- * the RSB. |
333 |
+- * |
334 |
+- * When IBRS is enabled, while #1 would be mitigated by the IBRS branch |
335 |
+- * prediction isolation protections, RSB still needs to be cleared |
336 |
+- * because of #2. Note that SMEP provides no protection here, unlike |
337 |
+- * user-space-poisoned RSB entries. |
338 |
+- * |
339 |
+- * eIBRS, on the other hand, has RSB-poisoning protections, so it |
340 |
+- * doesn't need RSB clearing after vmexit. |
341 |
+- */ |
342 |
+- if (boot_cpu_has(X86_FEATURE_RETPOLINE) || |
343 |
+- boot_cpu_has(X86_FEATURE_KERNEL_IBRS)) |
344 |
+- setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); |
345 |
++ spectre_v2_determine_rsb_fill_type_at_vmexit(mode); |
346 |
+ |
347 |
+ /* |
348 |
+ * Retpoline protects the kernel, but doesn't protect firmware. IBRS |
349 |
+@@ -2285,6 +2311,19 @@ static char *ibpb_state(void) |
350 |
+ return ""; |
351 |
+ } |
352 |
+ |
353 |
++static char *pbrsb_eibrs_state(void) |
354 |
++{ |
355 |
++ if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { |
356 |
++ if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || |
357 |
++ boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) |
358 |
++ return ", PBRSB-eIBRS: SW sequence"; |
359 |
++ else |
360 |
++ return ", PBRSB-eIBRS: Vulnerable"; |
361 |
++ } else { |
362 |
++ return ", PBRSB-eIBRS: Not affected"; |
363 |
++ } |
364 |
++} |
365 |
++ |
366 |
+ static ssize_t spectre_v2_show_state(char *buf) |
367 |
+ { |
368 |
+ if (spectre_v2_enabled == SPECTRE_V2_LFENCE) |
369 |
+@@ -2297,12 +2336,13 @@ static ssize_t spectre_v2_show_state(char *buf) |
370 |
+ spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) |
371 |
+ return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); |
372 |
+ |
373 |
+- return sprintf(buf, "%s%s%s%s%s%s\n", |
374 |
++ return sprintf(buf, "%s%s%s%s%s%s%s\n", |
375 |
+ spectre_v2_strings[spectre_v2_enabled], |
376 |
+ ibpb_state(), |
377 |
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", |
378 |
+ stibp_state(), |
379 |
+ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", |
380 |
++ pbrsb_eibrs_state(), |
381 |
+ spectre_v2_module_string()); |
382 |
+ } |
383 |
+ |
384 |
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c |
385 |
+index 1f43ddf2ffc36..d47e20e305cd2 100644 |
386 |
+--- a/arch/x86/kernel/cpu/common.c |
387 |
++++ b/arch/x86/kernel/cpu/common.c |
388 |
+@@ -1161,6 +1161,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) |
389 |
+ #define NO_SWAPGS BIT(6) |
390 |
+ #define NO_ITLB_MULTIHIT BIT(7) |
391 |
+ #define NO_SPECTRE_V2 BIT(8) |
392 |
++#define NO_EIBRS_PBRSB BIT(9) |
393 |
+ |
394 |
+ #define VULNWL(vendor, family, model, whitelist) \ |
395 |
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) |
396 |
+@@ -1203,7 +1204,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { |
397 |
+ |
398 |
+ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), |
399 |
+ VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), |
400 |
+- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), |
401 |
++ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), |
402 |
+ |
403 |
+ /* |
404 |
+ * Technically, swapgs isn't serializing on AMD (despite it previously |
405 |
+@@ -1213,7 +1214,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { |
406 |
+ * good enough for our purposes. |
407 |
+ */ |
408 |
+ |
409 |
+- VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT), |
410 |
++ VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB), |
411 |
++ VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB), |
412 |
++ VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), |
413 |
+ |
414 |
+ /* AMD Family 0xf - 0x12 */ |
415 |
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), |
416 |
+@@ -1391,6 +1394,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) |
417 |
+ setup_force_cpu_bug(X86_BUG_RETBLEED); |
418 |
+ } |
419 |
+ |
420 |
++ if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) && |
421 |
++ !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && |
422 |
++ !(ia32_cap & ARCH_CAP_PBRSB_NO)) |
423 |
++ setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); |
424 |
++ |
425 |
+ if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) |
426 |
+ return; |
427 |
+ |
428 |
+diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c |
429 |
+index 6d3b3e5a5533b..ee4802d7b36cd 100644 |
430 |
+--- a/arch/x86/kvm/mmu/tdp_iter.c |
431 |
++++ b/arch/x86/kvm/mmu/tdp_iter.c |
432 |
+@@ -145,6 +145,15 @@ static bool try_step_up(struct tdp_iter *iter) |
433 |
+ return true; |
434 |
+ } |
435 |
+ |
436 |
++/* |
437 |
++ * Step the iterator back up a level in the paging structure. Should only be |
438 |
++ * used when the iterator is below the root level. |
439 |
++ */ |
440 |
++void tdp_iter_step_up(struct tdp_iter *iter) |
441 |
++{ |
442 |
++ WARN_ON(!try_step_up(iter)); |
443 |
++} |
444 |
++ |
445 |
+ /* |
446 |
+ * Step to the next SPTE in a pre-order traversal of the paging structure. |
447 |
+ * To get to the next SPTE, the iterator either steps down towards the goal |
448 |
+diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h |
449 |
+index f0af385c56e03..adfca0cf94d3a 100644 |
450 |
+--- a/arch/x86/kvm/mmu/tdp_iter.h |
451 |
++++ b/arch/x86/kvm/mmu/tdp_iter.h |
452 |
+@@ -114,5 +114,6 @@ void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root, |
453 |
+ int min_level, gfn_t next_last_level_gfn); |
454 |
+ void tdp_iter_next(struct tdp_iter *iter); |
455 |
+ void tdp_iter_restart(struct tdp_iter *iter); |
456 |
++void tdp_iter_step_up(struct tdp_iter *iter); |
457 |
+ |
458 |
+ #endif /* __KVM_X86_MMU_TDP_ITER_H */ |
459 |
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c |
460 |
+index 922b06bf4b948..b61a11d462ccb 100644 |
461 |
+--- a/arch/x86/kvm/mmu/tdp_mmu.c |
462 |
++++ b/arch/x86/kvm/mmu/tdp_mmu.c |
463 |
+@@ -1748,12 +1748,12 @@ static void zap_collapsible_spte_range(struct kvm *kvm, |
464 |
+ gfn_t start = slot->base_gfn; |
465 |
+ gfn_t end = start + slot->npages; |
466 |
+ struct tdp_iter iter; |
467 |
++ int max_mapping_level; |
468 |
+ kvm_pfn_t pfn; |
469 |
+ |
470 |
+ rcu_read_lock(); |
471 |
+ |
472 |
+ tdp_root_for_each_pte(iter, root, start, end) { |
473 |
+-retry: |
474 |
+ if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true)) |
475 |
+ continue; |
476 |
+ |
477 |
+@@ -1761,15 +1761,41 @@ retry: |
478 |
+ !is_last_spte(iter.old_spte, iter.level)) |
479 |
+ continue; |
480 |
+ |
481 |
++ /* |
482 |
++ * This is a leaf SPTE. Check if the PFN it maps can |
483 |
++ * be mapped at a higher level. |
484 |
++ */ |
485 |
+ pfn = spte_to_pfn(iter.old_spte); |
486 |
+- if (kvm_is_reserved_pfn(pfn) || |
487 |
+- iter.level >= kvm_mmu_max_mapping_level(kvm, slot, iter.gfn, |
488 |
+- pfn, PG_LEVEL_NUM)) |
489 |
++ |
490 |
++ if (kvm_is_reserved_pfn(pfn)) |
491 |
+ continue; |
492 |
+ |
493 |
++ max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot, |
494 |
++ iter.gfn, pfn, PG_LEVEL_NUM); |
495 |
++ |
496 |
++ WARN_ON(max_mapping_level < iter.level); |
497 |
++ |
498 |
++ /* |
499 |
++ * If this page is already mapped at the highest |
500 |
++ * viable level, there's nothing more to do. |
501 |
++ */ |
502 |
++ if (max_mapping_level == iter.level) |
503 |
++ continue; |
504 |
++ |
505 |
++ /* |
506 |
++ * The page can be remapped at a higher level, so step |
507 |
++ * up to zap the parent SPTE. |
508 |
++ */ |
509 |
++ while (max_mapping_level > iter.level) |
510 |
++ tdp_iter_step_up(&iter); |
511 |
++ |
512 |
+ /* Note, a successful atomic zap also does a remote TLB flush. */ |
513 |
+- if (tdp_mmu_zap_spte_atomic(kvm, &iter)) |
514 |
+- goto retry; |
515 |
++ tdp_mmu_zap_spte_atomic(kvm, &iter); |
516 |
++ |
517 |
++ /* |
518 |
++ * If the atomic zap fails, the iter will recurse back into |
519 |
++ * the same subtree to retry. |
520 |
++ */ |
521 |
+ } |
522 |
+ |
523 |
+ rcu_read_unlock(); |
524 |
+diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c |
525 |
+index 76e9e6eb71d63..7aa1ce34a5204 100644 |
526 |
+--- a/arch/x86/kvm/svm/sev.c |
527 |
++++ b/arch/x86/kvm/svm/sev.c |
528 |
+@@ -844,7 +844,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, |
529 |
+ |
530 |
+ /* If source buffer is not aligned then use an intermediate buffer */ |
531 |
+ if (!IS_ALIGNED((unsigned long)vaddr, 16)) { |
532 |
+- src_tpage = alloc_page(GFP_KERNEL); |
533 |
++ src_tpage = alloc_page(GFP_KERNEL_ACCOUNT); |
534 |
+ if (!src_tpage) |
535 |
+ return -ENOMEM; |
536 |
+ |
537 |
+@@ -865,7 +865,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, |
538 |
+ if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { |
539 |
+ int dst_offset; |
540 |
+ |
541 |
+- dst_tpage = alloc_page(GFP_KERNEL); |
542 |
++ dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT); |
543 |
+ if (!dst_tpage) { |
544 |
+ ret = -ENOMEM; |
545 |
+ goto e_free; |
546 |
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c |
547 |
+index 6bfb0b0e66bd3..c667214c630b1 100644 |
548 |
+--- a/arch/x86/kvm/svm/svm.c |
549 |
++++ b/arch/x86/kvm/svm/svm.c |
550 |
+@@ -4166,6 +4166,8 @@ out: |
551 |
+ |
552 |
+ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) |
553 |
+ { |
554 |
++ if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR) |
555 |
++ vcpu->arch.at_instruction_boundary = true; |
556 |
+ } |
557 |
+ |
558 |
+ static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) |
559 |
+diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S |
560 |
+index 4182c7ffc9091..6de96b9438044 100644 |
561 |
+--- a/arch/x86/kvm/vmx/vmenter.S |
562 |
++++ b/arch/x86/kvm/vmx/vmenter.S |
563 |
+@@ -227,11 +227,13 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) |
564 |
+ * entries and (in some cases) RSB underflow. |
565 |
+ * |
566 |
+ * eIBRS has its own protection against poisoned RSB, so it doesn't |
567 |
+- * need the RSB filling sequence. But it does need to be enabled |
568 |
+- * before the first unbalanced RET. |
569 |
++ * need the RSB filling sequence. But it does need to be enabled, and a |
570 |
++ * single call to retire, before the first unbalanced RET. |
571 |
+ */ |
572 |
+ |
573 |
+- FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT |
574 |
++ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\ |
575 |
++ X86_FEATURE_RSB_VMEXIT_LITE |
576 |
++ |
577 |
+ |
578 |
+ pop %_ASM_ARG2 /* @flags */ |
579 |
+ pop %_ASM_ARG1 /* @vmx */ |
580 |
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c |
581 |
+index 4b6a0268c78e3..597c3c08da501 100644 |
582 |
+--- a/arch/x86/kvm/vmx/vmx.c |
583 |
++++ b/arch/x86/kvm/vmx/vmx.c |
584 |
+@@ -6630,6 +6630,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) |
585 |
+ return; |
586 |
+ |
587 |
+ handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc)); |
588 |
++ vcpu->arch.at_instruction_boundary = true; |
589 |
+ } |
590 |
+ |
591 |
+ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) |
592 |
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c |
593 |
+index 53b6fdf30c99b..65b0ec28bd52b 100644 |
594 |
+--- a/arch/x86/kvm/x86.c |
595 |
++++ b/arch/x86/kvm/x86.c |
596 |
+@@ -291,6 +291,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { |
597 |
+ STATS_DESC_COUNTER(VCPU, nested_run), |
598 |
+ STATS_DESC_COUNTER(VCPU, directed_yield_attempted), |
599 |
+ STATS_DESC_COUNTER(VCPU, directed_yield_successful), |
600 |
++ STATS_DESC_COUNTER(VCPU, preemption_reported), |
601 |
++ STATS_DESC_COUNTER(VCPU, preemption_other), |
602 |
+ STATS_DESC_ICOUNTER(VCPU, guest_mode) |
603 |
+ }; |
604 |
+ |
605 |
+@@ -4607,6 +4609,19 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) |
606 |
+ struct kvm_memslots *slots; |
607 |
+ static const u8 preempted = KVM_VCPU_PREEMPTED; |
608 |
+ |
609 |
++ /* |
610 |
++ * The vCPU can be marked preempted if and only if the VM-Exit was on |
611 |
++ * an instruction boundary and will not trigger guest emulation of any |
612 |
++ * kind (see vcpu_run). Vendor specific code controls (conservatively) |
613 |
++ * when this is true, for example allowing the vCPU to be marked |
614 |
++ * preempted if and only if the VM-Exit was due to a host interrupt. |
615 |
++ */ |
616 |
++ if (!vcpu->arch.at_instruction_boundary) { |
617 |
++ vcpu->stat.preemption_other++; |
618 |
++ return; |
619 |
++ } |
620 |
++ |
621 |
++ vcpu->stat.preemption_reported++; |
622 |
+ if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) |
623 |
+ return; |
624 |
+ |
625 |
+@@ -4636,19 +4651,21 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
626 |
+ { |
627 |
+ int idx; |
628 |
+ |
629 |
+- if (vcpu->preempted && !vcpu->arch.guest_state_protected) |
630 |
+- vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu); |
631 |
++ if (vcpu->preempted) { |
632 |
++ if (!vcpu->arch.guest_state_protected) |
633 |
++ vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu); |
634 |
+ |
635 |
+- /* |
636 |
+- * Take the srcu lock as memslots will be accessed to check the gfn |
637 |
+- * cache generation against the memslots generation. |
638 |
+- */ |
639 |
+- idx = srcu_read_lock(&vcpu->kvm->srcu); |
640 |
+- if (kvm_xen_msr_enabled(vcpu->kvm)) |
641 |
+- kvm_xen_runstate_set_preempted(vcpu); |
642 |
+- else |
643 |
+- kvm_steal_time_set_preempted(vcpu); |
644 |
+- srcu_read_unlock(&vcpu->kvm->srcu, idx); |
645 |
++ /* |
646 |
++ * Take the srcu lock as memslots will be accessed to check the gfn |
647 |
++ * cache generation against the memslots generation. |
648 |
++ */ |
649 |
++ idx = srcu_read_lock(&vcpu->kvm->srcu); |
650 |
++ if (kvm_xen_msr_enabled(vcpu->kvm)) |
651 |
++ kvm_xen_runstate_set_preempted(vcpu); |
652 |
++ else |
653 |
++ kvm_steal_time_set_preempted(vcpu); |
654 |
++ srcu_read_unlock(&vcpu->kvm->srcu, idx); |
655 |
++ } |
656 |
+ |
657 |
+ static_call(kvm_x86_vcpu_put)(vcpu); |
658 |
+ vcpu->arch.last_host_tsc = rdtsc(); |
659 |
+@@ -9767,6 +9784,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) |
660 |
+ return; |
661 |
+ |
662 |
+ down_read(&vcpu->kvm->arch.apicv_update_lock); |
663 |
++ preempt_disable(); |
664 |
+ |
665 |
+ activate = kvm_apicv_activated(vcpu->kvm); |
666 |
+ if (vcpu->arch.apicv_active == activate) |
667 |
+@@ -9786,6 +9804,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) |
668 |
+ kvm_make_request(KVM_REQ_EVENT, vcpu); |
669 |
+ |
670 |
+ out: |
671 |
++ preempt_enable(); |
672 |
+ up_read(&vcpu->kvm->arch.apicv_update_lock); |
673 |
+ } |
674 |
+ EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv); |
675 |
+@@ -10363,6 +10382,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu) |
676 |
+ vcpu->arch.l1tf_flush_l1d = true; |
677 |
+ |
678 |
+ for (;;) { |
679 |
++ /* |
680 |
++ * If another guest vCPU requests a PV TLB flush in the middle |
681 |
++ * of instruction emulation, the rest of the emulation could |
682 |
++ * use a stale page translation. Assume that any code after |
683 |
++ * this point can start executing an instruction. |
684 |
++ */ |
685 |
++ vcpu->arch.at_instruction_boundary = false; |
686 |
+ if (kvm_vcpu_running(vcpu)) { |
687 |
+ r = vcpu_enter_guest(vcpu); |
688 |
+ } else { |
689 |
+diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h |
690 |
+index adbcc9ed59dbc..fda1413f8af95 100644 |
691 |
+--- a/arch/x86/kvm/xen.h |
692 |
++++ b/arch/x86/kvm/xen.h |
693 |
+@@ -103,8 +103,10 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu) |
694 |
+ * behalf of the vCPU. Only if the VMM does actually block |
695 |
+ * does it need to enter RUNSTATE_blocked. |
696 |
+ */ |
697 |
+- if (vcpu->preempted) |
698 |
+- kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable); |
699 |
++ if (WARN_ON_ONCE(!vcpu->preempted)) |
700 |
++ return; |
701 |
++ |
702 |
++ kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable); |
703 |
+ } |
704 |
+ |
705 |
+ /* 32-bit compatibility definitions, also used natively in 32-bit build */ |
706 |
+diff --git a/block/blk-ioc.c b/block/blk-ioc.c |
707 |
+index df9cfe4ca5328..63fc020424082 100644 |
708 |
+--- a/block/blk-ioc.c |
709 |
++++ b/block/blk-ioc.c |
710 |
+@@ -247,6 +247,8 @@ static struct io_context *alloc_io_context(gfp_t gfp_flags, int node) |
711 |
+ INIT_HLIST_HEAD(&ioc->icq_list); |
712 |
+ INIT_WORK(&ioc->release_work, ioc_release_fn); |
713 |
+ #endif |
714 |
++ ioc->ioprio = IOPRIO_DEFAULT; |
715 |
++ |
716 |
+ return ioc; |
717 |
+ } |
718 |
+ |
719 |
+diff --git a/block/ioprio.c b/block/ioprio.c |
720 |
+index 2fe068fcaad58..2a34cbca18aed 100644 |
721 |
+--- a/block/ioprio.c |
722 |
++++ b/block/ioprio.c |
723 |
+@@ -157,9 +157,9 @@ out: |
724 |
+ int ioprio_best(unsigned short aprio, unsigned short bprio) |
725 |
+ { |
726 |
+ if (!ioprio_valid(aprio)) |
727 |
+- aprio = IOPRIO_DEFAULT; |
728 |
++ aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM); |
729 |
+ if (!ioprio_valid(bprio)) |
730 |
+- bprio = IOPRIO_DEFAULT; |
731 |
++ bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM); |
732 |
+ |
733 |
+ return min(aprio, bprio); |
734 |
+ } |
735 |
+diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c |
736 |
+index 598fd19b65fa4..45973aa6e06d4 100644 |
737 |
+--- a/drivers/acpi/apei/bert.c |
738 |
++++ b/drivers/acpi/apei/bert.c |
739 |
+@@ -29,16 +29,26 @@ |
740 |
+ |
741 |
+ #undef pr_fmt |
742 |
+ #define pr_fmt(fmt) "BERT: " fmt |
743 |
++ |
744 |
++#define ACPI_BERT_PRINT_MAX_RECORDS 5 |
745 |
+ #define ACPI_BERT_PRINT_MAX_LEN 1024 |
746 |
+ |
747 |
+ static int bert_disable; |
748 |
+ |
749 |
++/* |
750 |
++ * Print "all" the error records in the BERT table, but avoid huge spam to |
751 |
++ * the console if the BIOS included oversize records, or too many records. |
752 |
++ * Skipping some records here does not lose anything because the full |
753 |
++ * data is available to user tools in: |
754 |
++ * /sys/firmware/acpi/tables/data/BERT |
755 |
++ */ |
756 |
+ static void __init bert_print_all(struct acpi_bert_region *region, |
757 |
+ unsigned int region_len) |
758 |
+ { |
759 |
+ struct acpi_hest_generic_status *estatus = |
760 |
+ (struct acpi_hest_generic_status *)region; |
761 |
+ int remain = region_len; |
762 |
++ int printed = 0, skipped = 0; |
763 |
+ u32 estatus_len; |
764 |
+ |
765 |
+ while (remain >= sizeof(struct acpi_bert_region)) { |
766 |
+@@ -46,24 +56,26 @@ static void __init bert_print_all(struct acpi_bert_region *region, |
767 |
+ if (remain < estatus_len) { |
768 |
+ pr_err(FW_BUG "Truncated status block (length: %u).\n", |
769 |
+ estatus_len); |
770 |
+- return; |
771 |
++ break; |
772 |
+ } |
773 |
+ |
774 |
+ /* No more error records. */ |
775 |
+ if (!estatus->block_status) |
776 |
+- return; |
777 |
++ break; |
778 |
+ |
779 |
+ if (cper_estatus_check(estatus)) { |
780 |
+ pr_err(FW_BUG "Invalid error record.\n"); |
781 |
+- return; |
782 |
++ break; |
783 |
+ } |
784 |
+ |
785 |
+- pr_info_once("Error records from previous boot:\n"); |
786 |
+- if (region_len < ACPI_BERT_PRINT_MAX_LEN) |
787 |
++ if (estatus_len < ACPI_BERT_PRINT_MAX_LEN && |
788 |
++ printed < ACPI_BERT_PRINT_MAX_RECORDS) { |
789 |
++ pr_info_once("Error records from previous boot:\n"); |
790 |
+ cper_estatus_print(KERN_INFO HW_ERR, estatus); |
791 |
+- else |
792 |
+- pr_info_once("Max print length exceeded, table data is available at:\n" |
793 |
+- "/sys/firmware/acpi/tables/data/BERT"); |
794 |
++ printed++; |
795 |
++ } else { |
796 |
++ skipped++; |
797 |
++ } |
798 |
+ |
799 |
+ /* |
800 |
+ * Because the boot error source is "one-time polled" type, |
801 |
+@@ -75,6 +87,9 @@ static void __init bert_print_all(struct acpi_bert_region *region, |
802 |
+ estatus = (void *)estatus + estatus_len; |
803 |
+ remain -= estatus_len; |
804 |
+ } |
805 |
++ |
806 |
++ if (skipped) |
807 |
++ pr_info(HW_ERR "Skipped %d error records\n", skipped); |
808 |
+ } |
809 |
+ |
810 |
+ static int __init setup_bert_disable(char *str) |
811 |
+diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c |
812 |
+index becc198e4c224..6615f59ab7fd2 100644 |
813 |
+--- a/drivers/acpi/video_detect.c |
814 |
++++ b/drivers/acpi/video_detect.c |
815 |
+@@ -430,7 +430,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = { |
816 |
+ .callback = video_detect_force_native, |
817 |
+ .ident = "Clevo NL5xRU", |
818 |
+ .matches = { |
819 |
+- DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), |
820 |
+ DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), |
821 |
+ }, |
822 |
+ }, |
823 |
+@@ -438,59 +437,75 @@ static const struct dmi_system_id video_detect_dmi_table[] = { |
824 |
+ .callback = video_detect_force_native, |
825 |
+ .ident = "Clevo NL5xRU", |
826 |
+ .matches = { |
827 |
+- DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"), |
828 |
+- DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), |
829 |
++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), |
830 |
++ DMI_MATCH(DMI_BOARD_NAME, "AURA1501"), |
831 |
+ }, |
832 |
+ }, |
833 |
+ { |
834 |
+ .callback = video_detect_force_native, |
835 |
+ .ident = "Clevo NL5xRU", |
836 |
+ .matches = { |
837 |
+- DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), |
838 |
+- DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), |
839 |
++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), |
840 |
++ DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"), |
841 |
+ }, |
842 |
+ }, |
843 |
+ { |
844 |
+ .callback = video_detect_force_native, |
845 |
+- .ident = "Clevo NL5xRU", |
846 |
++ .ident = "Clevo NL5xNU", |
847 |
+ .matches = { |
848 |
+- DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), |
849 |
+- DMI_MATCH(DMI_BOARD_NAME, "AURA1501"), |
850 |
++ DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), |
851 |
+ }, |
852 |
+ }, |
853 |
++ /* |
854 |
++ * The TongFang PF5PU1G, PF4NU1F, PF5NU1G, and PF5LUXG/TUXEDO BA15 Gen10, |
855 |
++ * Pulse 14/15 Gen1, and Pulse 15 Gen2 have the same problem as the Clevo |
856 |
++ * NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2. See the description |
857 |
++ * above. |
858 |
++ */ |
859 |
+ { |
860 |
+ .callback = video_detect_force_native, |
861 |
+- .ident = "Clevo NL5xRU", |
862 |
++ .ident = "TongFang PF5PU1G", |
863 |
+ .matches = { |
864 |
+- DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), |
865 |
+- DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"), |
866 |
++ DMI_MATCH(DMI_BOARD_NAME, "PF5PU1G"), |
867 |
+ }, |
868 |
+ }, |
869 |
+ { |
870 |
+ .callback = video_detect_force_native, |
871 |
+- .ident = "Clevo NL5xNU", |
872 |
++ .ident = "TongFang PF4NU1F", |
873 |
++ .matches = { |
874 |
++ DMI_MATCH(DMI_BOARD_NAME, "PF4NU1F"), |
875 |
++ }, |
876 |
++ }, |
877 |
++ { |
878 |
++ .callback = video_detect_force_native, |
879 |
++ .ident = "TongFang PF4NU1F", |
880 |
+ .matches = { |
881 |
+ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), |
882 |
+- DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), |
883 |
++ DMI_MATCH(DMI_BOARD_NAME, "PULSE1401"), |
884 |
+ }, |
885 |
+ }, |
886 |
+ { |
887 |
+ .callback = video_detect_force_native, |
888 |
+- .ident = "Clevo NL5xNU", |
889 |
++ .ident = "TongFang PF5NU1G", |
890 |
+ .matches = { |
891 |
+- DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"), |
892 |
+- DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), |
893 |
++ DMI_MATCH(DMI_BOARD_NAME, "PF5NU1G"), |
894 |
+ }, |
895 |
+ }, |
896 |
+ { |
897 |
+ .callback = video_detect_force_native, |
898 |
+- .ident = "Clevo NL5xNU", |
899 |
++ .ident = "TongFang PF5NU1G", |
900 |
+ .matches = { |
901 |
+- DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), |
902 |
+- DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), |
903 |
++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), |
904 |
++ DMI_MATCH(DMI_BOARD_NAME, "PULSE1501"), |
905 |
++ }, |
906 |
++ }, |
907 |
++ { |
908 |
++ .callback = video_detect_force_native, |
909 |
++ .ident = "TongFang PF5LUXG", |
910 |
++ .matches = { |
911 |
++ DMI_MATCH(DMI_BOARD_NAME, "PF5LUXG"), |
912 |
+ }, |
913 |
+ }, |
914 |
+- |
915 |
+ /* |
916 |
+ * Desktops which falsely report a backlight and which our heuristics |
917 |
+ * for this do not catch. |
918 |
+diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c |
919 |
+index d9ceca7a7935c..a18f289d73466 100644 |
920 |
+--- a/drivers/bluetooth/btbcm.c |
921 |
++++ b/drivers/bluetooth/btbcm.c |
922 |
+@@ -453,6 +453,8 @@ static const struct bcm_subver_table bcm_uart_subver_table[] = { |
923 |
+ { 0x6606, "BCM4345C5" }, /* 003.006.006 */ |
924 |
+ { 0x230f, "BCM4356A2" }, /* 001.003.015 */ |
925 |
+ { 0x220e, "BCM20702A1" }, /* 001.002.014 */ |
926 |
++ { 0x420d, "BCM4349B1" }, /* 002.002.013 */ |
927 |
++ { 0x420e, "BCM4349B1" }, /* 002.002.014 */ |
928 |
+ { 0x4217, "BCM4329B1" }, /* 002.002.023 */ |
929 |
+ { 0x6106, "BCM4359C0" }, /* 003.001.006 */ |
930 |
+ { 0x4106, "BCM4335A0" }, /* 002.001.006 */ |
931 |
+diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c |
932 |
+index e48c3ad069bb4..d789c077d95dc 100644 |
933 |
+--- a/drivers/bluetooth/btusb.c |
934 |
++++ b/drivers/bluetooth/btusb.c |
935 |
+@@ -422,6 +422,18 @@ static const struct usb_device_id blacklist_table[] = { |
936 |
+ { USB_DEVICE(0x04ca, 0x4006), .driver_info = BTUSB_REALTEK | |
937 |
+ BTUSB_WIDEBAND_SPEECH }, |
938 |
+ |
939 |
++ /* Realtek 8852CE Bluetooth devices */ |
940 |
++ { USB_DEVICE(0x04ca, 0x4007), .driver_info = BTUSB_REALTEK | |
941 |
++ BTUSB_WIDEBAND_SPEECH }, |
942 |
++ { USB_DEVICE(0x04c5, 0x1675), .driver_info = BTUSB_REALTEK | |
943 |
++ BTUSB_WIDEBAND_SPEECH }, |
944 |
++ { USB_DEVICE(0x0cb8, 0xc558), .driver_info = BTUSB_REALTEK | |
945 |
++ BTUSB_WIDEBAND_SPEECH }, |
946 |
++ { USB_DEVICE(0x13d3, 0x3587), .driver_info = BTUSB_REALTEK | |
947 |
++ BTUSB_WIDEBAND_SPEECH }, |
948 |
++ { USB_DEVICE(0x13d3, 0x3586), .driver_info = BTUSB_REALTEK | |
949 |
++ BTUSB_WIDEBAND_SPEECH }, |
950 |
++ |
951 |
+ /* Realtek Bluetooth devices */ |
952 |
+ { USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01), |
953 |
+ .driver_info = BTUSB_REALTEK }, |
954 |
+@@ -469,6 +481,9 @@ static const struct usb_device_id blacklist_table[] = { |
955 |
+ { USB_DEVICE(0x0489, 0xe0d9), .driver_info = BTUSB_MEDIATEK | |
956 |
+ BTUSB_WIDEBAND_SPEECH | |
957 |
+ BTUSB_VALID_LE_STATES }, |
958 |
++ { USB_DEVICE(0x13d3, 0x3568), .driver_info = BTUSB_MEDIATEK | |
959 |
++ BTUSB_WIDEBAND_SPEECH | |
960 |
++ BTUSB_VALID_LE_STATES }, |
961 |
+ |
962 |
+ /* Additional Realtek 8723AE Bluetooth devices */ |
963 |
+ { USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK }, |
964 |
+diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c |
965 |
+index 785f445dd60d5..49bed66b8c84e 100644 |
966 |
+--- a/drivers/bluetooth/hci_bcm.c |
967 |
++++ b/drivers/bluetooth/hci_bcm.c |
968 |
+@@ -1544,8 +1544,10 @@ static const struct of_device_id bcm_bluetooth_of_match[] = { |
969 |
+ { .compatible = "brcm,bcm43430a0-bt" }, |
970 |
+ { .compatible = "brcm,bcm43430a1-bt" }, |
971 |
+ { .compatible = "brcm,bcm43438-bt", .data = &bcm43438_device_data }, |
972 |
++ { .compatible = "brcm,bcm4349-bt", .data = &bcm43438_device_data }, |
973 |
+ { .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data }, |
974 |
+ { .compatible = "brcm,bcm4335a0" }, |
975 |
++ { .compatible = "infineon,cyw55572-bt" }, |
976 |
+ { }, |
977 |
+ }; |
978 |
+ MODULE_DEVICE_TABLE(of, bcm_bluetooth_of_match); |
979 |
+diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c |
980 |
+index eab34e24d9446..8df11016fd51b 100644 |
981 |
+--- a/drivers/bluetooth/hci_qca.c |
982 |
++++ b/drivers/bluetooth/hci_qca.c |
983 |
+@@ -1588,7 +1588,7 @@ static bool qca_wakeup(struct hci_dev *hdev) |
984 |
+ wakeup = device_may_wakeup(hu->serdev->ctrl->dev.parent); |
985 |
+ bt_dev_dbg(hu->hdev, "wakeup status : %d", wakeup); |
986 |
+ |
987 |
+- return !wakeup; |
988 |
++ return wakeup; |
989 |
+ } |
990 |
+ |
991 |
+ static int qca_regulator_init(struct hci_uart *hu) |
992 |
+diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c |
993 |
+index 73b3961890397..afb0942ccc293 100644 |
994 |
+--- a/drivers/macintosh/adb.c |
995 |
++++ b/drivers/macintosh/adb.c |
996 |
+@@ -647,7 +647,7 @@ do_adb_query(struct adb_request *req) |
997 |
+ |
998 |
+ switch(req->data[1]) { |
999 |
+ case ADB_QUERY_GETDEVINFO: |
1000 |
+- if (req->nbytes < 3) |
1001 |
++ if (req->nbytes < 3 || req->data[2] >= 16) |
1002 |
+ break; |
1003 |
+ mutex_lock(&adb_handler_mutex); |
1004 |
+ req->reply[0] = adb_handler[req->data[2]].original_address; |
1005 |
+diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h |
1006 |
+index 19db5693175fe..2a0ead57db71c 100644 |
1007 |
+--- a/fs/btrfs/block-group.h |
1008 |
++++ b/fs/btrfs/block-group.h |
1009 |
+@@ -104,6 +104,7 @@ struct btrfs_block_group { |
1010 |
+ unsigned int relocating_repair:1; |
1011 |
+ unsigned int chunk_item_inserted:1; |
1012 |
+ unsigned int zone_is_active:1; |
1013 |
++ unsigned int zoned_data_reloc_ongoing:1; |
1014 |
+ |
1015 |
+ int disk_cache_state; |
1016 |
+ |
1017 |
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c |
1018 |
+index 6aa92f84f4654..f45ecd939a2cb 100644 |
1019 |
+--- a/fs/btrfs/extent-tree.c |
1020 |
++++ b/fs/btrfs/extent-tree.c |
1021 |
+@@ -3836,7 +3836,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, |
1022 |
+ block_group->start == fs_info->data_reloc_bg || |
1023 |
+ fs_info->data_reloc_bg == 0); |
1024 |
+ |
1025 |
+- if (block_group->ro) { |
1026 |
++ if (block_group->ro || block_group->zoned_data_reloc_ongoing) { |
1027 |
+ ret = 1; |
1028 |
+ goto out; |
1029 |
+ } |
1030 |
+@@ -3898,8 +3898,24 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, |
1031 |
+ out: |
1032 |
+ if (ret && ffe_ctl->for_treelog) |
1033 |
+ fs_info->treelog_bg = 0; |
1034 |
+- if (ret && ffe_ctl->for_data_reloc) |
1035 |
++ if (ret && ffe_ctl->for_data_reloc && |
1036 |
++ fs_info->data_reloc_bg == block_group->start) { |
1037 |
++ /* |
1038 |
++ * Do not allow further allocations from this block group. |
1039 |
++ * Compared to increasing the ->ro, setting the |
1040 |
++ * ->zoned_data_reloc_ongoing flag still allows nocow |
1041 |
++ * writers to come in. See btrfs_inc_nocow_writers(). |
1042 |
++ * |
1043 |
++ * We need to disable an allocation to avoid an allocation of |
1044 |
++ * regular (non-relocation data) extent. With mix of relocation |
1045 |
++ * extents and regular extents, we can dispatch WRITE commands |
1046 |
++ * (for relocation extents) and ZONE APPEND commands (for |
1047 |
++ * regular extents) at the same time to the same zone, which |
1048 |
++ * easily break the write pointer. |
1049 |
++ */ |
1050 |
++ block_group->zoned_data_reloc_ongoing = 1; |
1051 |
+ fs_info->data_reloc_bg = 0; |
1052 |
++ } |
1053 |
+ spin_unlock(&fs_info->relocation_bg_lock); |
1054 |
+ spin_unlock(&fs_info->treelog_bg_lock); |
1055 |
+ spin_unlock(&block_group->lock); |
1056 |
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c |
1057 |
+index a23a42ba88cae..68ddd90685d9d 100644 |
1058 |
+--- a/fs/btrfs/extent_io.c |
1059 |
++++ b/fs/btrfs/extent_io.c |
1060 |
+@@ -5214,13 +5214,14 @@ int extent_writepages(struct address_space *mapping, |
1061 |
+ */ |
1062 |
+ btrfs_zoned_data_reloc_lock(BTRFS_I(inode)); |
1063 |
+ ret = extent_write_cache_pages(mapping, wbc, &epd); |
1064 |
+- btrfs_zoned_data_reloc_unlock(BTRFS_I(inode)); |
1065 |
+ ASSERT(ret <= 0); |
1066 |
+ if (ret < 0) { |
1067 |
++ btrfs_zoned_data_reloc_unlock(BTRFS_I(inode)); |
1068 |
+ end_write_bio(&epd, ret); |
1069 |
+ return ret; |
1070 |
+ } |
1071 |
+ ret = flush_write_bio(&epd); |
1072 |
++ btrfs_zoned_data_reloc_unlock(BTRFS_I(inode)); |
1073 |
+ return ret; |
1074 |
+ } |
1075 |
+ |
1076 |
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c |
1077 |
+index 9ae79342631a8..5d15e374d0326 100644 |
1078 |
+--- a/fs/btrfs/inode.c |
1079 |
++++ b/fs/btrfs/inode.c |
1080 |
+@@ -3102,6 +3102,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) |
1081 |
+ ordered_extent->file_offset, |
1082 |
+ ordered_extent->file_offset + |
1083 |
+ logical_len); |
1084 |
++ btrfs_zoned_release_data_reloc_bg(fs_info, ordered_extent->disk_bytenr, |
1085 |
++ ordered_extent->disk_num_bytes); |
1086 |
+ } else { |
1087 |
+ BUG_ON(root == fs_info->tree_root); |
1088 |
+ ret = insert_ordered_extent_file_extent(trans, ordered_extent); |
1089 |
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c |
1090 |
+index 5091d679a602c..84b6d39509bd3 100644 |
1091 |
+--- a/fs/btrfs/zoned.c |
1092 |
++++ b/fs/btrfs/zoned.c |
1093 |
+@@ -2005,6 +2005,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len |
1094 |
+ struct btrfs_device *device; |
1095 |
+ u64 min_alloc_bytes; |
1096 |
+ u64 physical; |
1097 |
++ int i; |
1098 |
+ |
1099 |
+ if (!btrfs_is_zoned(fs_info)) |
1100 |
+ return; |
1101 |
+@@ -2039,13 +2040,25 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len |
1102 |
+ spin_unlock(&block_group->lock); |
1103 |
+ |
1104 |
+ map = block_group->physical_map; |
1105 |
+- device = map->stripes[0].dev; |
1106 |
+- physical = map->stripes[0].physical; |
1107 |
++ for (i = 0; i < map->num_stripes; i++) { |
1108 |
++ int ret; |
1109 |
+ |
1110 |
+- if (!device->zone_info->max_active_zones) |
1111 |
+- goto out; |
1112 |
++ device = map->stripes[i].dev; |
1113 |
++ physical = map->stripes[i].physical; |
1114 |
++ |
1115 |
++ if (device->zone_info->max_active_zones == 0) |
1116 |
++ continue; |
1117 |
+ |
1118 |
+- btrfs_dev_clear_active_zone(device, physical); |
1119 |
++ ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, |
1120 |
++ physical >> SECTOR_SHIFT, |
1121 |
++ device->zone_info->zone_size >> SECTOR_SHIFT, |
1122 |
++ GFP_NOFS); |
1123 |
++ |
1124 |
++ if (ret) |
1125 |
++ return; |
1126 |
++ |
1127 |
++ btrfs_dev_clear_active_zone(device, physical); |
1128 |
++ } |
1129 |
+ |
1130 |
+ spin_lock(&fs_info->zone_active_bgs_lock); |
1131 |
+ ASSERT(!list_empty(&block_group->active_bg_list)); |
1132 |
+@@ -2116,3 +2129,30 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) |
1133 |
+ } |
1134 |
+ mutex_unlock(&fs_devices->device_list_mutex); |
1135 |
+ } |
1136 |
++ |
1137 |
++void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical, |
1138 |
++ u64 length) |
1139 |
++{ |
1140 |
++ struct btrfs_block_group *block_group; |
1141 |
++ |
1142 |
++ if (!btrfs_is_zoned(fs_info)) |
1143 |
++ return; |
1144 |
++ |
1145 |
++ block_group = btrfs_lookup_block_group(fs_info, logical); |
1146 |
++ /* It should be called on a previous data relocation block group. */ |
1147 |
++ ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)); |
1148 |
++ |
1149 |
++ spin_lock(&block_group->lock); |
1150 |
++ if (!block_group->zoned_data_reloc_ongoing) |
1151 |
++ goto out; |
1152 |
++ |
1153 |
++ /* All relocation extents are written. */ |
1154 |
++ if (block_group->start + block_group->alloc_offset == logical + length) { |
1155 |
++ /* Now, release this block group for further allocations. */ |
1156 |
++ block_group->zoned_data_reloc_ongoing = 0; |
1157 |
++ } |
1158 |
++ |
1159 |
++out: |
1160 |
++ spin_unlock(&block_group->lock); |
1161 |
++ btrfs_put_block_group(block_group); |
1162 |
++} |
1163 |
+diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h |
1164 |
+index 2d898970aec5f..cf6320feef464 100644 |
1165 |
+--- a/fs/btrfs/zoned.h |
1166 |
++++ b/fs/btrfs/zoned.h |
1167 |
+@@ -80,6 +80,8 @@ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg, |
1168 |
+ struct extent_buffer *eb); |
1169 |
+ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg); |
1170 |
+ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info); |
1171 |
++void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical, |
1172 |
++ u64 length); |
1173 |
+ #else /* CONFIG_BLK_DEV_ZONED */ |
1174 |
+ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, |
1175 |
+ struct blk_zone *zone) |
1176 |
+@@ -241,6 +243,9 @@ static inline void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg, |
1177 |
+ static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { } |
1178 |
+ |
1179 |
+ static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { } |
1180 |
++ |
1181 |
++static inline void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, |
1182 |
++ u64 logical, u64 length) { } |
1183 |
+ #endif |
1184 |
+ |
1185 |
+ static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) |
1186 |
+diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h |
1187 |
+index 3f53bc27a19bf..3d088a88f8320 100644 |
1188 |
+--- a/include/linux/ioprio.h |
1189 |
++++ b/include/linux/ioprio.h |
1190 |
+@@ -11,7 +11,7 @@ |
1191 |
+ /* |
1192 |
+ * Default IO priority. |
1193 |
+ */ |
1194 |
+-#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM) |
1195 |
++#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0) |
1196 |
+ |
1197 |
+ /* |
1198 |
+ * Check that a priority value has a valid class. |
1199 |
+diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c |
1200 |
+index 9d09f489b60e0..2e0f75bcb7fd1 100644 |
1201 |
+--- a/kernel/entry/kvm.c |
1202 |
++++ b/kernel/entry/kvm.c |
1203 |
+@@ -9,12 +9,6 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) |
1204 |
+ int ret; |
1205 |
+ |
1206 |
+ if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) { |
1207 |
+- clear_notify_signal(); |
1208 |
+- if (task_work_pending(current)) |
1209 |
+- task_work_run(); |
1210 |
+- } |
1211 |
+- |
1212 |
+- if (ti_work & _TIF_SIGPENDING) { |
1213 |
+ kvm_handle_signal_exit(vcpu); |
1214 |
+ return -EINTR; |
1215 |
+ } |
1216 |
+diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h |
1217 |
+index 5d09ded0c491f..04b7e3654ff77 100644 |
1218 |
+--- a/tools/arch/x86/include/asm/cpufeatures.h |
1219 |
++++ b/tools/arch/x86/include/asm/cpufeatures.h |
1220 |
+@@ -301,6 +301,7 @@ |
1221 |
+ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ |
1222 |
+ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ |
1223 |
+ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ |
1224 |
++#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM-Exit when EIBRS is enabled */ |
1225 |
+ |
1226 |
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ |
1227 |
+ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ |
1228 |
+diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h |
1229 |
+index ad084326f24c2..f951147cc7fdc 100644 |
1230 |
+--- a/tools/arch/x86/include/asm/msr-index.h |
1231 |
++++ b/tools/arch/x86/include/asm/msr-index.h |
1232 |
+@@ -148,6 +148,10 @@ |
1233 |
+ * are restricted to targets in |
1234 |
+ * kernel. |
1235 |
+ */ |
1236 |
++#define ARCH_CAP_PBRSB_NO BIT(24) /* |
1237 |
++ * Not susceptible to Post-Barrier |
1238 |
++ * Return Stack Buffer Predictions. |
1239 |
++ */ |
1240 |
+ |
1241 |
+ #define MSR_IA32_FLUSH_CMD 0x0000010b |
1242 |
+ #define L1D_FLUSH BIT(0) /* |
1243 |
+diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat |
1244 |
+index 5a5bd74f55bd5..9c366b3a676db 100755 |
1245 |
+--- a/tools/kvm/kvm_stat/kvm_stat |
1246 |
++++ b/tools/kvm/kvm_stat/kvm_stat |
1247 |
+@@ -1646,7 +1646,8 @@ Press any other key to refresh statistics immediately. |
1248 |
+ .format(values)) |
1249 |
+ if len(pids) > 1: |
1250 |
+ sys.exit('Error: Multiple processes found (pids: {}). Use "-p"' |
1251 |
+- ' to specify the desired pid'.format(" ".join(pids))) |
1252 |
++ ' to specify the desired pid' |
1253 |
++ .format(" ".join(map(str, pids)))) |
1254 |
+ namespace.pid = pids[0] |
1255 |
+ |
1256 |
+ argparser = argparse.ArgumentParser(description=description_text, |
1257 |
+diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c |
1258 |
+index e0b0164e9af85..be1d9728c4cea 100644 |
1259 |
+--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c |
1260 |
++++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c |
1261 |
+@@ -73,20 +73,19 @@ void ucall_uninit(struct kvm_vm *vm) |
1262 |
+ |
1263 |
+ void ucall(uint64_t cmd, int nargs, ...) |
1264 |
+ { |
1265 |
+- struct ucall uc = { |
1266 |
+- .cmd = cmd, |
1267 |
+- }; |
1268 |
++ struct ucall uc = {}; |
1269 |
+ va_list va; |
1270 |
+ int i; |
1271 |
+ |
1272 |
++ WRITE_ONCE(uc.cmd, cmd); |
1273 |
+ nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; |
1274 |
+ |
1275 |
+ va_start(va, nargs); |
1276 |
+ for (i = 0; i < nargs; ++i) |
1277 |
+- uc.args[i] = va_arg(va, uint64_t); |
1278 |
++ WRITE_ONCE(uc.args[i], va_arg(va, uint64_t)); |
1279 |
+ va_end(va); |
1280 |
+ |
1281 |
+- *ucall_exit_mmio_addr = (vm_vaddr_t)&uc; |
1282 |
++ WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc); |
1283 |
+ } |
1284 |
+ |
1285 |
+ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) |
1286 |
+diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c |
1287 |
+index 722df3a28791c..ddd68ba0c99fc 100644 |
1288 |
+--- a/tools/testing/selftests/kvm/lib/perf_test_util.c |
1289 |
++++ b/tools/testing/selftests/kvm/lib/perf_test_util.c |
1290 |
+@@ -110,6 +110,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, |
1291 |
+ struct kvm_vm *vm; |
1292 |
+ uint64_t guest_num_pages; |
1293 |
+ uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); |
1294 |
++ uint64_t region_end_gfn; |
1295 |
+ int i; |
1296 |
+ |
1297 |
+ pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); |
1298 |
+@@ -144,18 +145,29 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, |
1299 |
+ |
1300 |
+ pta->vm = vm; |
1301 |
+ |
1302 |
++ /* Put the test region at the top guest physical memory. */ |
1303 |
++ region_end_gfn = vm_get_max_gfn(vm) + 1; |
1304 |
++ |
1305 |
++#ifdef __x86_64__ |
1306 |
++ /* |
1307 |
++ * When running vCPUs in L2, restrict the test region to 48 bits to |
1308 |
++ * avoid needing 5-level page tables to identity map L2. |
1309 |
++ */ |
1310 |
++ if (pta->nested) |
1311 |
++ region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size); |
1312 |
++#endif |
1313 |
+ /* |
1314 |
+ * If there should be more memory in the guest test region than there |
1315 |
+ * can be pages in the guest, it will definitely cause problems. |
1316 |
+ */ |
1317 |
+- TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), |
1318 |
++ TEST_ASSERT(guest_num_pages < region_end_gfn, |
1319 |
+ "Requested more guest memory than address space allows.\n" |
1320 |
+ " guest pages: %" PRIx64 " max gfn: %" PRIx64 |
1321 |
+ " vcpus: %d wss: %" PRIx64 "]\n", |
1322 |
+- guest_num_pages, vm_get_max_gfn(vm), vcpus, |
1323 |
++ guest_num_pages, region_end_gfn - 1, vcpus, |
1324 |
+ vcpu_memory_bytes); |
1325 |
+ |
1326 |
+- pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size; |
1327 |
++ pta->gpa = (region_end_gfn - guest_num_pages) * pta->guest_page_size; |
1328 |
+ pta->gpa = align_down(pta->gpa, backing_src_pagesz); |
1329 |
+ #ifdef __s390x__ |
1330 |
+ /* Align to 1M (segment size) */ |
1331 |
+diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c |
1332 |
+index e0b2bb1339b16..3330fb183c680 100644 |
1333 |
+--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c |
1334 |
++++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c |
1335 |
+@@ -44,7 +44,7 @@ static inline void nop_loop(void) |
1336 |
+ { |
1337 |
+ int i; |
1338 |
+ |
1339 |
+- for (i = 0; i < 1000000; i++) |
1340 |
++ for (i = 0; i < 100000000; i++) |
1341 |
+ asm volatile("nop"); |
1342 |
+ } |
1343 |
+ |
1344 |
+@@ -56,12 +56,14 @@ static inline void check_tsc_msr_rdtsc(void) |
1345 |
+ tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY); |
1346 |
+ GUEST_ASSERT(tsc_freq > 0); |
1347 |
+ |
1348 |
+- /* First, check MSR-based clocksource */ |
1349 |
++ /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */ |
1350 |
+ r1 = rdtsc(); |
1351 |
+ t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); |
1352 |
++ r1 = (r1 + rdtsc()) / 2; |
1353 |
+ nop_loop(); |
1354 |
+ r2 = rdtsc(); |
1355 |
+ t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); |
1356 |
++ r2 = (r2 + rdtsc()) / 2; |
1357 |
+ |
1358 |
+ GUEST_ASSERT(r2 > r1 && t2 > t1); |
1359 |
+ |
1360 |
+@@ -181,12 +183,14 @@ static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm) |
1361 |
+ tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY); |
1362 |
+ TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero"); |
1363 |
+ |
1364 |
+- /* First, check MSR-based clocksource */ |
1365 |
++ /* For increased accuracy, take mean rdtsc() before and afrer ioctl */ |
1366 |
+ r1 = rdtsc(); |
1367 |
+ t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); |
1368 |
++ r1 = (r1 + rdtsc()) / 2; |
1369 |
+ nop_loop(); |
1370 |
+ r2 = rdtsc(); |
1371 |
+ t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); |
1372 |
++ r2 = (r2 + rdtsc()) / 2; |
1373 |
+ |
1374 |
+ TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2); |
1375 |
+ |
1376 |
+diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c |
1377 |
+index 9b68658b6bb85..5b98f3ee58a58 100644 |
1378 |
+--- a/tools/vm/slabinfo.c |
1379 |
++++ b/tools/vm/slabinfo.c |
1380 |
+@@ -233,6 +233,24 @@ static unsigned long read_slab_obj(struct slabinfo *s, const char *name) |
1381 |
+ return l; |
1382 |
+ } |
1383 |
+ |
1384 |
++static unsigned long read_debug_slab_obj(struct slabinfo *s, const char *name) |
1385 |
++{ |
1386 |
++ char x[128]; |
1387 |
++ FILE *f; |
1388 |
++ size_t l; |
1389 |
++ |
1390 |
++ snprintf(x, 128, "/sys/kernel/debug/slab/%s/%s", s->name, name); |
1391 |
++ f = fopen(x, "r"); |
1392 |
++ if (!f) { |
1393 |
++ buffer[0] = 0; |
1394 |
++ l = 0; |
1395 |
++ } else { |
1396 |
++ l = fread(buffer, 1, sizeof(buffer), f); |
1397 |
++ buffer[l] = 0; |
1398 |
++ fclose(f); |
1399 |
++ } |
1400 |
++ return l; |
1401 |
++} |
1402 |
+ |
1403 |
+ /* |
1404 |
+ * Put a size string together |
1405 |
+@@ -409,14 +427,18 @@ static void show_tracking(struct slabinfo *s) |
1406 |
+ { |
1407 |
+ printf("\n%s: Kernel object allocation\n", s->name); |
1408 |
+ printf("-----------------------------------------------------------------------\n"); |
1409 |
+- if (read_slab_obj(s, "alloc_calls")) |
1410 |
++ if (read_debug_slab_obj(s, "alloc_traces")) |
1411 |
++ printf("%s", buffer); |
1412 |
++ else if (read_slab_obj(s, "alloc_calls")) |
1413 |
+ printf("%s", buffer); |
1414 |
+ else |
1415 |
+ printf("No Data\n"); |
1416 |
+ |
1417 |
+ printf("\n%s: Kernel object freeing\n", s->name); |
1418 |
+ printf("------------------------------------------------------------------------\n"); |
1419 |
+- if (read_slab_obj(s, "free_calls")) |
1420 |
++ if (read_debug_slab_obj(s, "free_traces")) |
1421 |
++ printf("%s", buffer); |
1422 |
++ else if (read_slab_obj(s, "free_calls")) |
1423 |
+ printf("%s", buffer); |
1424 |
+ else |
1425 |
+ printf("No Data\n"); |
1426 |
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c |
1427 |
+index 24cb37d19c638..7f1d19689701b 100644 |
1428 |
+--- a/virt/kvm/kvm_main.c |
1429 |
++++ b/virt/kvm/kvm_main.c |
1430 |
+@@ -3327,9 +3327,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu) |
1431 |
+ |
1432 |
+ vcpu->stat.generic.blocking = 1; |
1433 |
+ |
1434 |
++ preempt_disable(); |
1435 |
+ kvm_arch_vcpu_blocking(vcpu); |
1436 |
+- |
1437 |
+ prepare_to_rcuwait(wait); |
1438 |
++ preempt_enable(); |
1439 |
++ |
1440 |
+ for (;;) { |
1441 |
+ set_current_state(TASK_INTERRUPTIBLE); |
1442 |
+ |
1443 |
+@@ -3339,9 +3341,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu) |
1444 |
+ waited = true; |
1445 |
+ schedule(); |
1446 |
+ } |
1447 |
+- finish_rcuwait(wait); |
1448 |
+ |
1449 |
++ preempt_disable(); |
1450 |
++ finish_rcuwait(wait); |
1451 |
+ kvm_arch_vcpu_unblocking(vcpu); |
1452 |
++ preempt_enable(); |
1453 |
+ |
1454 |
+ vcpu->stat.generic.blocking = 0; |
1455 |
+ |