Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:5.18 commit in: /
Date: Thu, 11 Aug 2022 12:33:09
Message-Id: 1660221165.7aaaf24efa9606d739c15aa28d8118b093cc315e.mpagano@gentoo
1 commit: 7aaaf24efa9606d739c15aa28d8118b093cc315e
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Thu Aug 11 12:32:45 2022 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Thu Aug 11 12:32:45 2022 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=7aaaf24e
7
8 Linux patch 5.18.17
9
10 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>
11
12 0000_README | 4 +
13 1016_linux-5.18.17.patch | 1418 ++++++++++++++++++++++++++++++++++++++++++++++
14 2 files changed, 1422 insertions(+)
15
16 diff --git a/0000_README b/0000_README
17 index efa0b25e..e0f23579 100644
18 --- a/0000_README
19 +++ b/0000_README
20 @@ -107,6 +107,10 @@ Patch: 1015_linux-5.18.16.patch
21 From: http://www.kernel.org
22 Desc: Linux 5.18.16
23
24 +Patch: 1016_linux-5.18.17.patch
25 +From: http://www.kernel.org
26 +Desc: Linux 5.18.17
27 +
28 Patch: 1500_XATTR_USER_PREFIX.patch
29 From: https://bugs.gentoo.org/show_bug.cgi?id=470644
30 Desc: Support for namespace user.pax.* on tmpfs.
31
32 diff --git a/1016_linux-5.18.17.patch b/1016_linux-5.18.17.patch
33 new file mode 100644
34 index 00000000..94fc8829
35 --- /dev/null
36 +++ b/1016_linux-5.18.17.patch
37 @@ -0,0 +1,1418 @@
38 +diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
39 +index 9e9556826450b..2ce2a38cdd556 100644
40 +--- a/Documentation/admin-guide/hw-vuln/spectre.rst
41 ++++ b/Documentation/admin-guide/hw-vuln/spectre.rst
42 +@@ -422,6 +422,14 @@ The possible values in this file are:
43 + 'RSB filling' Protection of RSB on context switch enabled
44 + ============= ===========================================
45 +
46 ++ - EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status:
47 ++
48 ++ =========================== =======================================================
49 ++ 'PBRSB-eIBRS: SW sequence' CPU is affected and protection of RSB on VMEXIT enabled
50 ++ 'PBRSB-eIBRS: Vulnerable' CPU is vulnerable
51 ++ 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
52 ++ =========================== =======================================================
53 ++
54 + Full mitigation might require a microcode update from the CPU
55 + vendor. When the necessary microcode is not available, the kernel will
56 + report vulnerability.
57 +diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
58 +index 5aac094fd2172..58ecafc1b7f90 100644
59 +--- a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
60 ++++ b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
61 +@@ -23,6 +23,7 @@ properties:
62 + - brcm,bcm4345c5
63 + - brcm,bcm43540-bt
64 + - brcm,bcm4335a0
65 ++ - brcm,bcm4349-bt
66 +
67 + shutdown-gpios:
68 + maxItems: 1
69 +diff --git a/Makefile b/Makefile
70 +index 18bcbcd037f0a..ef8c18e5c161c 100644
71 +--- a/Makefile
72 ++++ b/Makefile
73 +@@ -1,7 +1,7 @@
74 + # SPDX-License-Identifier: GPL-2.0
75 + VERSION = 5
76 + PATCHLEVEL = 18
77 +-SUBLEVEL = 16
78 ++SUBLEVEL = 17
79 + EXTRAVERSION =
80 + NAME = Superb Owl
81 +
82 +diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
83 +index 9c3d86e397bf3..1fae18ba11ed1 100644
84 +--- a/arch/arm64/crypto/poly1305-glue.c
85 ++++ b/arch/arm64/crypto/poly1305-glue.c
86 +@@ -52,7 +52,7 @@ static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
87 + {
88 + if (unlikely(!dctx->sset)) {
89 + if (!dctx->rset) {
90 +- poly1305_init_arch(dctx, src);
91 ++ poly1305_init_arm64(&dctx->h, src);
92 + src += POLY1305_BLOCK_SIZE;
93 + len -= POLY1305_BLOCK_SIZE;
94 + dctx->rset = 1;
95 +diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
96 +index 96dc0f7da258d..a971d462f531c 100644
97 +--- a/arch/arm64/include/asm/kernel-pgtable.h
98 ++++ b/arch/arm64/include/asm/kernel-pgtable.h
99 +@@ -103,8 +103,8 @@
100 + /*
101 + * Initial memory map attributes.
102 + */
103 +-#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
104 +-#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
105 ++#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN)
106 ++#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PMD_SECT_UXN)
107 +
108 + #if ARM64_KERNEL_USES_PMD_MAPS
109 + #define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
110 +diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
111 +index 6a98f1a38c29a..8a93a0a7489b2 100644
112 +--- a/arch/arm64/kernel/head.S
113 ++++ b/arch/arm64/kernel/head.S
114 +@@ -285,7 +285,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
115 + subs x1, x1, #64
116 + b.ne 1b
117 +
118 +- mov x7, SWAPPER_MM_MMUFLAGS
119 ++ mov_q x7, SWAPPER_MM_MMUFLAGS
120 +
121 + /*
122 + * Create the identity mapping.
123 +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
124 +index 4d1d87f76a74f..ce1f5a876cfea 100644
125 +--- a/arch/x86/Kconfig
126 ++++ b/arch/x86/Kconfig
127 +@@ -2469,7 +2469,7 @@ config RETPOLINE
128 + config RETHUNK
129 + bool "Enable return-thunks"
130 + depends on RETPOLINE && CC_HAS_RETURN_THUNK
131 +- default y
132 ++ default y if X86_64
133 + help
134 + Compile the kernel with the return-thunks compiler option to guard
135 + against kernel-to-user data leaks by avoiding return speculation.
136 +@@ -2478,21 +2478,21 @@ config RETHUNK
137 +
138 + config CPU_UNRET_ENTRY
139 + bool "Enable UNRET on kernel entry"
140 +- depends on CPU_SUP_AMD && RETHUNK
141 ++ depends on CPU_SUP_AMD && RETHUNK && X86_64
142 + default y
143 + help
144 + Compile the kernel with support for the retbleed=unret mitigation.
145 +
146 + config CPU_IBPB_ENTRY
147 + bool "Enable IBPB on kernel entry"
148 +- depends on CPU_SUP_AMD
149 ++ depends on CPU_SUP_AMD && X86_64
150 + default y
151 + help
152 + Compile the kernel with support for the retbleed=ibpb mitigation.
153 +
154 + config CPU_IBRS_ENTRY
155 + bool "Enable IBRS on kernel entry"
156 +- depends on CPU_SUP_INTEL
157 ++ depends on CPU_SUP_INTEL && X86_64
158 + default y
159 + help
160 + Compile the kernel with support for the spectre_v2=ibrs mitigation.
161 +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
162 +index 49889f171e860..e82da174d28c3 100644
163 +--- a/arch/x86/include/asm/cpufeatures.h
164 ++++ b/arch/x86/include/asm/cpufeatures.h
165 +@@ -302,6 +302,7 @@
166 + #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
167 + #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
168 + #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
169 ++#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
170 +
171 + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
172 + #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
173 +@@ -453,5 +454,6 @@
174 + #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
175 + #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
176 + #define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
177 ++#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
178 +
179 + #endif /* _ASM_X86_CPUFEATURES_H */
180 +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
181 +index 4ff36610af6ab..9fdaa847d4b66 100644
182 +--- a/arch/x86/include/asm/kvm_host.h
183 ++++ b/arch/x86/include/asm/kvm_host.h
184 +@@ -651,6 +651,7 @@ struct kvm_vcpu_arch {
185 + u64 ia32_misc_enable_msr;
186 + u64 smbase;
187 + u64 smi_count;
188 ++ bool at_instruction_boundary;
189 + bool tpr_access_reporting;
190 + bool xsaves_enabled;
191 + bool xfd_no_write_intercept;
192 +@@ -1289,6 +1290,8 @@ struct kvm_vcpu_stat {
193 + u64 nested_run;
194 + u64 directed_yield_attempted;
195 + u64 directed_yield_successful;
196 ++ u64 preemption_reported;
197 ++ u64 preemption_other;
198 + u64 guest_mode;
199 + };
200 +
201 +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
202 +index ad084326f24c2..f951147cc7fdc 100644
203 +--- a/arch/x86/include/asm/msr-index.h
204 ++++ b/arch/x86/include/asm/msr-index.h
205 +@@ -148,6 +148,10 @@
206 + * are restricted to targets in
207 + * kernel.
208 + */
209 ++#define ARCH_CAP_PBRSB_NO BIT(24) /*
210 ++ * Not susceptible to Post-Barrier
211 ++ * Return Stack Buffer Predictions.
212 ++ */
213 +
214 + #define MSR_IA32_FLUSH_CMD 0x0000010b
215 + #define L1D_FLUSH BIT(0) /*
216 +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
217 +index 38a3e86e665ef..d3a3cc6772ee1 100644
218 +--- a/arch/x86/include/asm/nospec-branch.h
219 ++++ b/arch/x86/include/asm/nospec-branch.h
220 +@@ -60,7 +60,9 @@
221 + 774: \
222 + add $(BITS_PER_LONG/8) * 2, sp; \
223 + dec reg; \
224 +- jnz 771b;
225 ++ jnz 771b; \
226 ++ /* barrier for jnz misprediction */ \
227 ++ lfence;
228 +
229 + #ifdef __ASSEMBLY__
230 +
231 +@@ -118,13 +120,28 @@
232 + #endif
233 + .endm
234 +
235 ++.macro ISSUE_UNBALANCED_RET_GUARD
236 ++ ANNOTATE_INTRA_FUNCTION_CALL
237 ++ call .Lunbalanced_ret_guard_\@
238 ++ int3
239 ++.Lunbalanced_ret_guard_\@:
240 ++ add $(BITS_PER_LONG/8), %_ASM_SP
241 ++ lfence
242 ++.endm
243 ++
244 + /*
245 + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
246 + * monstrosity above, manually.
247 + */
248 +-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
249 ++.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
250 ++.ifb \ftr2
251 + ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
252 ++.else
253 ++ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
254 ++.endif
255 + __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
256 ++.Lunbalanced_\@:
257 ++ ISSUE_UNBALANCED_RET_GUARD
258 + .Lskip_rsb_\@:
259 + .endm
260 +
261 +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
262 +index fd986a8ba2bd7..fa625b2a8a939 100644
263 +--- a/arch/x86/kernel/cpu/bugs.c
264 ++++ b/arch/x86/kernel/cpu/bugs.c
265 +@@ -1328,6 +1328,53 @@ static void __init spec_ctrl_disable_kernel_rrsba(void)
266 + }
267 + }
268 +
269 ++static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
270 ++{
271 ++ /*
272 ++ * Similar to context switches, there are two types of RSB attacks
273 ++ * after VM exit:
274 ++ *
275 ++ * 1) RSB underflow
276 ++ *
277 ++ * 2) Poisoned RSB entry
278 ++ *
279 ++ * When retpoline is enabled, both are mitigated by filling/clearing
280 ++ * the RSB.
281 ++ *
282 ++ * When IBRS is enabled, while #1 would be mitigated by the IBRS branch
283 ++ * prediction isolation protections, RSB still needs to be cleared
284 ++ * because of #2. Note that SMEP provides no protection here, unlike
285 ++ * user-space-poisoned RSB entries.
286 ++ *
287 ++ * eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB
288 ++ * bug is present then a LITE version of RSB protection is required,
289 ++ * just a single call needs to retire before a RET is executed.
290 ++ */
291 ++ switch (mode) {
292 ++ case SPECTRE_V2_NONE:
293 ++ return;
294 ++
295 ++ case SPECTRE_V2_EIBRS_LFENCE:
296 ++ case SPECTRE_V2_EIBRS:
297 ++ if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
298 ++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE);
299 ++ pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n");
300 ++ }
301 ++ return;
302 ++
303 ++ case SPECTRE_V2_EIBRS_RETPOLINE:
304 ++ case SPECTRE_V2_RETPOLINE:
305 ++ case SPECTRE_V2_LFENCE:
306 ++ case SPECTRE_V2_IBRS:
307 ++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
308 ++ pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n");
309 ++ return;
310 ++ }
311 ++
312 ++ pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit");
313 ++ dump_stack();
314 ++}
315 ++
316 + static void __init spectre_v2_select_mitigation(void)
317 + {
318 + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
319 +@@ -1478,28 +1525,7 @@ static void __init spectre_v2_select_mitigation(void)
320 + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
321 + pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
322 +
323 +- /*
324 +- * Similar to context switches, there are two types of RSB attacks
325 +- * after vmexit:
326 +- *
327 +- * 1) RSB underflow
328 +- *
329 +- * 2) Poisoned RSB entry
330 +- *
331 +- * When retpoline is enabled, both are mitigated by filling/clearing
332 +- * the RSB.
333 +- *
334 +- * When IBRS is enabled, while #1 would be mitigated by the IBRS branch
335 +- * prediction isolation protections, RSB still needs to be cleared
336 +- * because of #2. Note that SMEP provides no protection here, unlike
337 +- * user-space-poisoned RSB entries.
338 +- *
339 +- * eIBRS, on the other hand, has RSB-poisoning protections, so it
340 +- * doesn't need RSB clearing after vmexit.
341 +- */
342 +- if (boot_cpu_has(X86_FEATURE_RETPOLINE) ||
343 +- boot_cpu_has(X86_FEATURE_KERNEL_IBRS))
344 +- setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
345 ++ spectre_v2_determine_rsb_fill_type_at_vmexit(mode);
346 +
347 + /*
348 + * Retpoline protects the kernel, but doesn't protect firmware. IBRS
349 +@@ -2285,6 +2311,19 @@ static char *ibpb_state(void)
350 + return "";
351 + }
352 +
353 ++static char *pbrsb_eibrs_state(void)
354 ++{
355 ++ if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
356 ++ if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
357 ++ boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
358 ++ return ", PBRSB-eIBRS: SW sequence";
359 ++ else
360 ++ return ", PBRSB-eIBRS: Vulnerable";
361 ++ } else {
362 ++ return ", PBRSB-eIBRS: Not affected";
363 ++ }
364 ++}
365 ++
366 + static ssize_t spectre_v2_show_state(char *buf)
367 + {
368 + if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
369 +@@ -2297,12 +2336,13 @@ static ssize_t spectre_v2_show_state(char *buf)
370 + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
371 + return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
372 +
373 +- return sprintf(buf, "%s%s%s%s%s%s\n",
374 ++ return sprintf(buf, "%s%s%s%s%s%s%s\n",
375 + spectre_v2_strings[spectre_v2_enabled],
376 + ibpb_state(),
377 + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
378 + stibp_state(),
379 + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
380 ++ pbrsb_eibrs_state(),
381 + spectre_v2_module_string());
382 + }
383 +
384 +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
385 +index 1f43ddf2ffc36..d47e20e305cd2 100644
386 +--- a/arch/x86/kernel/cpu/common.c
387 ++++ b/arch/x86/kernel/cpu/common.c
388 +@@ -1161,6 +1161,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
389 + #define NO_SWAPGS BIT(6)
390 + #define NO_ITLB_MULTIHIT BIT(7)
391 + #define NO_SPECTRE_V2 BIT(8)
392 ++#define NO_EIBRS_PBRSB BIT(9)
393 +
394 + #define VULNWL(vendor, family, model, whitelist) \
395 + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
396 +@@ -1203,7 +1204,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
397 +
398 + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
399 + VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
400 +- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
401 ++ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
402 +
403 + /*
404 + * Technically, swapgs isn't serializing on AMD (despite it previously
405 +@@ -1213,7 +1214,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
406 + * good enough for our purposes.
407 + */
408 +
409 +- VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT),
410 ++ VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB),
411 ++ VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB),
412 ++ VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
413 +
414 + /* AMD Family 0xf - 0x12 */
415 + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
416 +@@ -1391,6 +1394,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
417 + setup_force_cpu_bug(X86_BUG_RETBLEED);
418 + }
419 +
420 ++ if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) &&
421 ++ !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
422 ++ !(ia32_cap & ARCH_CAP_PBRSB_NO))
423 ++ setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
424 ++
425 + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
426 + return;
427 +
428 +diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c
429 +index 6d3b3e5a5533b..ee4802d7b36cd 100644
430 +--- a/arch/x86/kvm/mmu/tdp_iter.c
431 ++++ b/arch/x86/kvm/mmu/tdp_iter.c
432 +@@ -145,6 +145,15 @@ static bool try_step_up(struct tdp_iter *iter)
433 + return true;
434 + }
435 +
436 ++/*
437 ++ * Step the iterator back up a level in the paging structure. Should only be
438 ++ * used when the iterator is below the root level.
439 ++ */
440 ++void tdp_iter_step_up(struct tdp_iter *iter)
441 ++{
442 ++ WARN_ON(!try_step_up(iter));
443 ++}
444 ++
445 + /*
446 + * Step to the next SPTE in a pre-order traversal of the paging structure.
447 + * To get to the next SPTE, the iterator either steps down towards the goal
448 +diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
449 +index f0af385c56e03..adfca0cf94d3a 100644
450 +--- a/arch/x86/kvm/mmu/tdp_iter.h
451 ++++ b/arch/x86/kvm/mmu/tdp_iter.h
452 +@@ -114,5 +114,6 @@ void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
453 + int min_level, gfn_t next_last_level_gfn);
454 + void tdp_iter_next(struct tdp_iter *iter);
455 + void tdp_iter_restart(struct tdp_iter *iter);
456 ++void tdp_iter_step_up(struct tdp_iter *iter);
457 +
458 + #endif /* __KVM_X86_MMU_TDP_ITER_H */
459 +diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
460 +index 922b06bf4b948..b61a11d462ccb 100644
461 +--- a/arch/x86/kvm/mmu/tdp_mmu.c
462 ++++ b/arch/x86/kvm/mmu/tdp_mmu.c
463 +@@ -1748,12 +1748,12 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
464 + gfn_t start = slot->base_gfn;
465 + gfn_t end = start + slot->npages;
466 + struct tdp_iter iter;
467 ++ int max_mapping_level;
468 + kvm_pfn_t pfn;
469 +
470 + rcu_read_lock();
471 +
472 + tdp_root_for_each_pte(iter, root, start, end) {
473 +-retry:
474 + if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
475 + continue;
476 +
477 +@@ -1761,15 +1761,41 @@ retry:
478 + !is_last_spte(iter.old_spte, iter.level))
479 + continue;
480 +
481 ++ /*
482 ++ * This is a leaf SPTE. Check if the PFN it maps can
483 ++ * be mapped at a higher level.
484 ++ */
485 + pfn = spte_to_pfn(iter.old_spte);
486 +- if (kvm_is_reserved_pfn(pfn) ||
487 +- iter.level >= kvm_mmu_max_mapping_level(kvm, slot, iter.gfn,
488 +- pfn, PG_LEVEL_NUM))
489 ++
490 ++ if (kvm_is_reserved_pfn(pfn))
491 + continue;
492 +
493 ++ max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot,
494 ++ iter.gfn, pfn, PG_LEVEL_NUM);
495 ++
496 ++ WARN_ON(max_mapping_level < iter.level);
497 ++
498 ++ /*
499 ++ * If this page is already mapped at the highest
500 ++ * viable level, there's nothing more to do.
501 ++ */
502 ++ if (max_mapping_level == iter.level)
503 ++ continue;
504 ++
505 ++ /*
506 ++ * The page can be remapped at a higher level, so step
507 ++ * up to zap the parent SPTE.
508 ++ */
509 ++ while (max_mapping_level > iter.level)
510 ++ tdp_iter_step_up(&iter);
511 ++
512 + /* Note, a successful atomic zap also does a remote TLB flush. */
513 +- if (tdp_mmu_zap_spte_atomic(kvm, &iter))
514 +- goto retry;
515 ++ tdp_mmu_zap_spte_atomic(kvm, &iter);
516 ++
517 ++ /*
518 ++ * If the atomic zap fails, the iter will recurse back into
519 ++ * the same subtree to retry.
520 ++ */
521 + }
522 +
523 + rcu_read_unlock();
524 +diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
525 +index 76e9e6eb71d63..7aa1ce34a5204 100644
526 +--- a/arch/x86/kvm/svm/sev.c
527 ++++ b/arch/x86/kvm/svm/sev.c
528 +@@ -844,7 +844,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
529 +
530 + /* If source buffer is not aligned then use an intermediate buffer */
531 + if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
532 +- src_tpage = alloc_page(GFP_KERNEL);
533 ++ src_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
534 + if (!src_tpage)
535 + return -ENOMEM;
536 +
537 +@@ -865,7 +865,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
538 + if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
539 + int dst_offset;
540 +
541 +- dst_tpage = alloc_page(GFP_KERNEL);
542 ++ dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
543 + if (!dst_tpage) {
544 + ret = -ENOMEM;
545 + goto e_free;
546 +diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
547 +index 6bfb0b0e66bd3..c667214c630b1 100644
548 +--- a/arch/x86/kvm/svm/svm.c
549 ++++ b/arch/x86/kvm/svm/svm.c
550 +@@ -4166,6 +4166,8 @@ out:
551 +
552 + static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
553 + {
554 ++ if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR)
555 ++ vcpu->arch.at_instruction_boundary = true;
556 + }
557 +
558 + static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
559 +diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
560 +index 4182c7ffc9091..6de96b9438044 100644
561 +--- a/arch/x86/kvm/vmx/vmenter.S
562 ++++ b/arch/x86/kvm/vmx/vmenter.S
563 +@@ -227,11 +227,13 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
564 + * entries and (in some cases) RSB underflow.
565 + *
566 + * eIBRS has its own protection against poisoned RSB, so it doesn't
567 +- * need the RSB filling sequence. But it does need to be enabled
568 +- * before the first unbalanced RET.
569 ++ * need the RSB filling sequence. But it does need to be enabled, and a
570 ++ * single call to retire, before the first unbalanced RET.
571 + */
572 +
573 +- FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
574 ++ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
575 ++ X86_FEATURE_RSB_VMEXIT_LITE
576 ++
577 +
578 + pop %_ASM_ARG2 /* @flags */
579 + pop %_ASM_ARG1 /* @vmx */
580 +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
581 +index 4b6a0268c78e3..597c3c08da501 100644
582 +--- a/arch/x86/kvm/vmx/vmx.c
583 ++++ b/arch/x86/kvm/vmx/vmx.c
584 +@@ -6630,6 +6630,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
585 + return;
586 +
587 + handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
588 ++ vcpu->arch.at_instruction_boundary = true;
589 + }
590 +
591 + static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
592 +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
593 +index 53b6fdf30c99b..65b0ec28bd52b 100644
594 +--- a/arch/x86/kvm/x86.c
595 ++++ b/arch/x86/kvm/x86.c
596 +@@ -291,6 +291,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
597 + STATS_DESC_COUNTER(VCPU, nested_run),
598 + STATS_DESC_COUNTER(VCPU, directed_yield_attempted),
599 + STATS_DESC_COUNTER(VCPU, directed_yield_successful),
600 ++ STATS_DESC_COUNTER(VCPU, preemption_reported),
601 ++ STATS_DESC_COUNTER(VCPU, preemption_other),
602 + STATS_DESC_ICOUNTER(VCPU, guest_mode)
603 + };
604 +
605 +@@ -4607,6 +4609,19 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
606 + struct kvm_memslots *slots;
607 + static const u8 preempted = KVM_VCPU_PREEMPTED;
608 +
609 ++ /*
610 ++ * The vCPU can be marked preempted if and only if the VM-Exit was on
611 ++ * an instruction boundary and will not trigger guest emulation of any
612 ++ * kind (see vcpu_run). Vendor specific code controls (conservatively)
613 ++ * when this is true, for example allowing the vCPU to be marked
614 ++ * preempted if and only if the VM-Exit was due to a host interrupt.
615 ++ */
616 ++ if (!vcpu->arch.at_instruction_boundary) {
617 ++ vcpu->stat.preemption_other++;
618 ++ return;
619 ++ }
620 ++
621 ++ vcpu->stat.preemption_reported++;
622 + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
623 + return;
624 +
625 +@@ -4636,19 +4651,21 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
626 + {
627 + int idx;
628 +
629 +- if (vcpu->preempted && !vcpu->arch.guest_state_protected)
630 +- vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
631 ++ if (vcpu->preempted) {
632 ++ if (!vcpu->arch.guest_state_protected)
633 ++ vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
634 +
635 +- /*
636 +- * Take the srcu lock as memslots will be accessed to check the gfn
637 +- * cache generation against the memslots generation.
638 +- */
639 +- idx = srcu_read_lock(&vcpu->kvm->srcu);
640 +- if (kvm_xen_msr_enabled(vcpu->kvm))
641 +- kvm_xen_runstate_set_preempted(vcpu);
642 +- else
643 +- kvm_steal_time_set_preempted(vcpu);
644 +- srcu_read_unlock(&vcpu->kvm->srcu, idx);
645 ++ /*
646 ++ * Take the srcu lock as memslots will be accessed to check the gfn
647 ++ * cache generation against the memslots generation.
648 ++ */
649 ++ idx = srcu_read_lock(&vcpu->kvm->srcu);
650 ++ if (kvm_xen_msr_enabled(vcpu->kvm))
651 ++ kvm_xen_runstate_set_preempted(vcpu);
652 ++ else
653 ++ kvm_steal_time_set_preempted(vcpu);
654 ++ srcu_read_unlock(&vcpu->kvm->srcu, idx);
655 ++ }
656 +
657 + static_call(kvm_x86_vcpu_put)(vcpu);
658 + vcpu->arch.last_host_tsc = rdtsc();
659 +@@ -9767,6 +9784,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
660 + return;
661 +
662 + down_read(&vcpu->kvm->arch.apicv_update_lock);
663 ++ preempt_disable();
664 +
665 + activate = kvm_apicv_activated(vcpu->kvm);
666 + if (vcpu->arch.apicv_active == activate)
667 +@@ -9786,6 +9804,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
668 + kvm_make_request(KVM_REQ_EVENT, vcpu);
669 +
670 + out:
671 ++ preempt_enable();
672 + up_read(&vcpu->kvm->arch.apicv_update_lock);
673 + }
674 + EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
675 +@@ -10363,6 +10382,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
676 + vcpu->arch.l1tf_flush_l1d = true;
677 +
678 + for (;;) {
679 ++ /*
680 ++ * If another guest vCPU requests a PV TLB flush in the middle
681 ++ * of instruction emulation, the rest of the emulation could
682 ++ * use a stale page translation. Assume that any code after
683 ++ * this point can start executing an instruction.
684 ++ */
685 ++ vcpu->arch.at_instruction_boundary = false;
686 + if (kvm_vcpu_running(vcpu)) {
687 + r = vcpu_enter_guest(vcpu);
688 + } else {
689 +diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
690 +index adbcc9ed59dbc..fda1413f8af95 100644
691 +--- a/arch/x86/kvm/xen.h
692 ++++ b/arch/x86/kvm/xen.h
693 +@@ -103,8 +103,10 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
694 + * behalf of the vCPU. Only if the VMM does actually block
695 + * does it need to enter RUNSTATE_blocked.
696 + */
697 +- if (vcpu->preempted)
698 +- kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
699 ++ if (WARN_ON_ONCE(!vcpu->preempted))
700 ++ return;
701 ++
702 ++ kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
703 + }
704 +
705 + /* 32-bit compatibility definitions, also used natively in 32-bit build */
706 +diff --git a/block/blk-ioc.c b/block/blk-ioc.c
707 +index df9cfe4ca5328..63fc020424082 100644
708 +--- a/block/blk-ioc.c
709 ++++ b/block/blk-ioc.c
710 +@@ -247,6 +247,8 @@ static struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
711 + INIT_HLIST_HEAD(&ioc->icq_list);
712 + INIT_WORK(&ioc->release_work, ioc_release_fn);
713 + #endif
714 ++ ioc->ioprio = IOPRIO_DEFAULT;
715 ++
716 + return ioc;
717 + }
718 +
719 +diff --git a/block/ioprio.c b/block/ioprio.c
720 +index 2fe068fcaad58..2a34cbca18aed 100644
721 +--- a/block/ioprio.c
722 ++++ b/block/ioprio.c
723 +@@ -157,9 +157,9 @@ out:
724 + int ioprio_best(unsigned short aprio, unsigned short bprio)
725 + {
726 + if (!ioprio_valid(aprio))
727 +- aprio = IOPRIO_DEFAULT;
728 ++ aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM);
729 + if (!ioprio_valid(bprio))
730 +- bprio = IOPRIO_DEFAULT;
731 ++ bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM);
732 +
733 + return min(aprio, bprio);
734 + }
735 +diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
736 +index 598fd19b65fa4..45973aa6e06d4 100644
737 +--- a/drivers/acpi/apei/bert.c
738 ++++ b/drivers/acpi/apei/bert.c
739 +@@ -29,16 +29,26 @@
740 +
741 + #undef pr_fmt
742 + #define pr_fmt(fmt) "BERT: " fmt
743 ++
744 ++#define ACPI_BERT_PRINT_MAX_RECORDS 5
745 + #define ACPI_BERT_PRINT_MAX_LEN 1024
746 +
747 + static int bert_disable;
748 +
749 ++/*
750 ++ * Print "all" the error records in the BERT table, but avoid huge spam to
751 ++ * the console if the BIOS included oversize records, or too many records.
752 ++ * Skipping some records here does not lose anything because the full
753 ++ * data is available to user tools in:
754 ++ * /sys/firmware/acpi/tables/data/BERT
755 ++ */
756 + static void __init bert_print_all(struct acpi_bert_region *region,
757 + unsigned int region_len)
758 + {
759 + struct acpi_hest_generic_status *estatus =
760 + (struct acpi_hest_generic_status *)region;
761 + int remain = region_len;
762 ++ int printed = 0, skipped = 0;
763 + u32 estatus_len;
764 +
765 + while (remain >= sizeof(struct acpi_bert_region)) {
766 +@@ -46,24 +56,26 @@ static void __init bert_print_all(struct acpi_bert_region *region,
767 + if (remain < estatus_len) {
768 + pr_err(FW_BUG "Truncated status block (length: %u).\n",
769 + estatus_len);
770 +- return;
771 ++ break;
772 + }
773 +
774 + /* No more error records. */
775 + if (!estatus->block_status)
776 +- return;
777 ++ break;
778 +
779 + if (cper_estatus_check(estatus)) {
780 + pr_err(FW_BUG "Invalid error record.\n");
781 +- return;
782 ++ break;
783 + }
784 +
785 +- pr_info_once("Error records from previous boot:\n");
786 +- if (region_len < ACPI_BERT_PRINT_MAX_LEN)
787 ++ if (estatus_len < ACPI_BERT_PRINT_MAX_LEN &&
788 ++ printed < ACPI_BERT_PRINT_MAX_RECORDS) {
789 ++ pr_info_once("Error records from previous boot:\n");
790 + cper_estatus_print(KERN_INFO HW_ERR, estatus);
791 +- else
792 +- pr_info_once("Max print length exceeded, table data is available at:\n"
793 +- "/sys/firmware/acpi/tables/data/BERT");
794 ++ printed++;
795 ++ } else {
796 ++ skipped++;
797 ++ }
798 +
799 + /*
800 + * Because the boot error source is "one-time polled" type,
801 +@@ -75,6 +87,9 @@ static void __init bert_print_all(struct acpi_bert_region *region,
802 + estatus = (void *)estatus + estatus_len;
803 + remain -= estatus_len;
804 + }
805 ++
806 ++ if (skipped)
807 ++ pr_info(HW_ERR "Skipped %d error records\n", skipped);
808 + }
809 +
810 + static int __init setup_bert_disable(char *str)
811 +diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
812 +index becc198e4c224..6615f59ab7fd2 100644
813 +--- a/drivers/acpi/video_detect.c
814 ++++ b/drivers/acpi/video_detect.c
815 +@@ -430,7 +430,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
816 + .callback = video_detect_force_native,
817 + .ident = "Clevo NL5xRU",
818 + .matches = {
819 +- DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
820 + DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
821 + },
822 + },
823 +@@ -438,59 +437,75 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
824 + .callback = video_detect_force_native,
825 + .ident = "Clevo NL5xRU",
826 + .matches = {
827 +- DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"),
828 +- DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
829 ++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
830 ++ DMI_MATCH(DMI_BOARD_NAME, "AURA1501"),
831 + },
832 + },
833 + {
834 + .callback = video_detect_force_native,
835 + .ident = "Clevo NL5xRU",
836 + .matches = {
837 +- DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
838 +- DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
839 ++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
840 ++ DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"),
841 + },
842 + },
843 + {
844 + .callback = video_detect_force_native,
845 +- .ident = "Clevo NL5xRU",
846 ++ .ident = "Clevo NL5xNU",
847 + .matches = {
848 +- DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
849 +- DMI_MATCH(DMI_BOARD_NAME, "AURA1501"),
850 ++ DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
851 + },
852 + },
853 ++ /*
854 ++ * The TongFang PF5PU1G, PF4NU1F, PF5NU1G, and PF5LUXG/TUXEDO BA15 Gen10,
855 ++ * Pulse 14/15 Gen1, and Pulse 15 Gen2 have the same problem as the Clevo
856 ++ * NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2. See the description
857 ++ * above.
858 ++ */
859 + {
860 + .callback = video_detect_force_native,
861 +- .ident = "Clevo NL5xRU",
862 ++ .ident = "TongFang PF5PU1G",
863 + .matches = {
864 +- DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
865 +- DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"),
866 ++ DMI_MATCH(DMI_BOARD_NAME, "PF5PU1G"),
867 + },
868 + },
869 + {
870 + .callback = video_detect_force_native,
871 +- .ident = "Clevo NL5xNU",
872 ++ .ident = "TongFang PF4NU1F",
873 ++ .matches = {
874 ++ DMI_MATCH(DMI_BOARD_NAME, "PF4NU1F"),
875 ++ },
876 ++ },
877 ++ {
878 ++ .callback = video_detect_force_native,
879 ++ .ident = "TongFang PF4NU1F",
880 + .matches = {
881 + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
882 +- DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
883 ++ DMI_MATCH(DMI_BOARD_NAME, "PULSE1401"),
884 + },
885 + },
886 + {
887 + .callback = video_detect_force_native,
888 +- .ident = "Clevo NL5xNU",
889 ++ .ident = "TongFang PF5NU1G",
890 + .matches = {
891 +- DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"),
892 +- DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
893 ++ DMI_MATCH(DMI_BOARD_NAME, "PF5NU1G"),
894 + },
895 + },
896 + {
897 + .callback = video_detect_force_native,
898 +- .ident = "Clevo NL5xNU",
899 ++ .ident = "TongFang PF5NU1G",
900 + .matches = {
901 +- DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
902 +- DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
903 ++ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
904 ++ DMI_MATCH(DMI_BOARD_NAME, "PULSE1501"),
905 ++ },
906 ++ },
907 ++ {
908 ++ .callback = video_detect_force_native,
909 ++ .ident = "TongFang PF5LUXG",
910 ++ .matches = {
911 ++ DMI_MATCH(DMI_BOARD_NAME, "PF5LUXG"),
912 + },
913 + },
914 +-
915 + /*
916 + * Desktops which falsely report a backlight and which our heuristics
917 + * for this do not catch.
918 +diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
919 +index d9ceca7a7935c..a18f289d73466 100644
920 +--- a/drivers/bluetooth/btbcm.c
921 ++++ b/drivers/bluetooth/btbcm.c
922 +@@ -453,6 +453,8 @@ static const struct bcm_subver_table bcm_uart_subver_table[] = {
923 + { 0x6606, "BCM4345C5" }, /* 003.006.006 */
924 + { 0x230f, "BCM4356A2" }, /* 001.003.015 */
925 + { 0x220e, "BCM20702A1" }, /* 001.002.014 */
926 ++ { 0x420d, "BCM4349B1" }, /* 002.002.013 */
927 ++ { 0x420e, "BCM4349B1" }, /* 002.002.014 */
928 + { 0x4217, "BCM4329B1" }, /* 002.002.023 */
929 + { 0x6106, "BCM4359C0" }, /* 003.001.006 */
930 + { 0x4106, "BCM4335A0" }, /* 002.001.006 */
931 +diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
932 +index e48c3ad069bb4..d789c077d95dc 100644
933 +--- a/drivers/bluetooth/btusb.c
934 ++++ b/drivers/bluetooth/btusb.c
935 +@@ -422,6 +422,18 @@ static const struct usb_device_id blacklist_table[] = {
936 + { USB_DEVICE(0x04ca, 0x4006), .driver_info = BTUSB_REALTEK |
937 + BTUSB_WIDEBAND_SPEECH },
938 +
939 ++ /* Realtek 8852CE Bluetooth devices */
940 ++ { USB_DEVICE(0x04ca, 0x4007), .driver_info = BTUSB_REALTEK |
941 ++ BTUSB_WIDEBAND_SPEECH },
942 ++ { USB_DEVICE(0x04c5, 0x1675), .driver_info = BTUSB_REALTEK |
943 ++ BTUSB_WIDEBAND_SPEECH },
944 ++ { USB_DEVICE(0x0cb8, 0xc558), .driver_info = BTUSB_REALTEK |
945 ++ BTUSB_WIDEBAND_SPEECH },
946 ++ { USB_DEVICE(0x13d3, 0x3587), .driver_info = BTUSB_REALTEK |
947 ++ BTUSB_WIDEBAND_SPEECH },
948 ++ { USB_DEVICE(0x13d3, 0x3586), .driver_info = BTUSB_REALTEK |
949 ++ BTUSB_WIDEBAND_SPEECH },
950 ++
951 + /* Realtek Bluetooth devices */
952 + { USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01),
953 + .driver_info = BTUSB_REALTEK },
954 +@@ -469,6 +481,9 @@ static const struct usb_device_id blacklist_table[] = {
955 + { USB_DEVICE(0x0489, 0xe0d9), .driver_info = BTUSB_MEDIATEK |
956 + BTUSB_WIDEBAND_SPEECH |
957 + BTUSB_VALID_LE_STATES },
958 ++ { USB_DEVICE(0x13d3, 0x3568), .driver_info = BTUSB_MEDIATEK |
959 ++ BTUSB_WIDEBAND_SPEECH |
960 ++ BTUSB_VALID_LE_STATES },
961 +
962 + /* Additional Realtek 8723AE Bluetooth devices */
963 + { USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK },
964 +diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
965 +index 785f445dd60d5..49bed66b8c84e 100644
966 +--- a/drivers/bluetooth/hci_bcm.c
967 ++++ b/drivers/bluetooth/hci_bcm.c
968 +@@ -1544,8 +1544,10 @@ static const struct of_device_id bcm_bluetooth_of_match[] = {
969 + { .compatible = "brcm,bcm43430a0-bt" },
970 + { .compatible = "brcm,bcm43430a1-bt" },
971 + { .compatible = "brcm,bcm43438-bt", .data = &bcm43438_device_data },
972 ++ { .compatible = "brcm,bcm4349-bt", .data = &bcm43438_device_data },
973 + { .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data },
974 + { .compatible = "brcm,bcm4335a0" },
975 ++ { .compatible = "infineon,cyw55572-bt" },
976 + { },
977 + };
978 + MODULE_DEVICE_TABLE(of, bcm_bluetooth_of_match);
979 +diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
980 +index eab34e24d9446..8df11016fd51b 100644
981 +--- a/drivers/bluetooth/hci_qca.c
982 ++++ b/drivers/bluetooth/hci_qca.c
983 +@@ -1588,7 +1588,7 @@ static bool qca_wakeup(struct hci_dev *hdev)
984 + wakeup = device_may_wakeup(hu->serdev->ctrl->dev.parent);
985 + bt_dev_dbg(hu->hdev, "wakeup status : %d", wakeup);
986 +
987 +- return !wakeup;
988 ++ return wakeup;
989 + }
990 +
991 + static int qca_regulator_init(struct hci_uart *hu)
992 +diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
993 +index 73b3961890397..afb0942ccc293 100644
994 +--- a/drivers/macintosh/adb.c
995 ++++ b/drivers/macintosh/adb.c
996 +@@ -647,7 +647,7 @@ do_adb_query(struct adb_request *req)
997 +
998 + switch(req->data[1]) {
999 + case ADB_QUERY_GETDEVINFO:
1000 +- if (req->nbytes < 3)
1001 ++ if (req->nbytes < 3 || req->data[2] >= 16)
1002 + break;
1003 + mutex_lock(&adb_handler_mutex);
1004 + req->reply[0] = adb_handler[req->data[2]].original_address;
1005 +diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
1006 +index 19db5693175fe..2a0ead57db71c 100644
1007 +--- a/fs/btrfs/block-group.h
1008 ++++ b/fs/btrfs/block-group.h
1009 +@@ -104,6 +104,7 @@ struct btrfs_block_group {
1010 + unsigned int relocating_repair:1;
1011 + unsigned int chunk_item_inserted:1;
1012 + unsigned int zone_is_active:1;
1013 ++ unsigned int zoned_data_reloc_ongoing:1;
1014 +
1015 + int disk_cache_state;
1016 +
1017 +diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
1018 +index 6aa92f84f4654..f45ecd939a2cb 100644
1019 +--- a/fs/btrfs/extent-tree.c
1020 ++++ b/fs/btrfs/extent-tree.c
1021 +@@ -3836,7 +3836,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
1022 + block_group->start == fs_info->data_reloc_bg ||
1023 + fs_info->data_reloc_bg == 0);
1024 +
1025 +- if (block_group->ro) {
1026 ++ if (block_group->ro || block_group->zoned_data_reloc_ongoing) {
1027 + ret = 1;
1028 + goto out;
1029 + }
1030 +@@ -3898,8 +3898,24 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
1031 + out:
1032 + if (ret && ffe_ctl->for_treelog)
1033 + fs_info->treelog_bg = 0;
1034 +- if (ret && ffe_ctl->for_data_reloc)
1035 ++ if (ret && ffe_ctl->for_data_reloc &&
1036 ++ fs_info->data_reloc_bg == block_group->start) {
1037 ++ /*
1038 ++ * Do not allow further allocations from this block group.
1039 ++ * Compared to increasing the ->ro, setting the
1040 ++ * ->zoned_data_reloc_ongoing flag still allows nocow
1041 ++ * writers to come in. See btrfs_inc_nocow_writers().
1042 ++ *
1043 ++ * We need to disable an allocation to avoid an allocation of
1044 ++ * regular (non-relocation data) extent. With mix of relocation
1045 ++ * extents and regular extents, we can dispatch WRITE commands
1046 ++ * (for relocation extents) and ZONE APPEND commands (for
1047 ++ * regular extents) at the same time to the same zone, which
1048 ++ * easily break the write pointer.
1049 ++ */
1050 ++ block_group->zoned_data_reloc_ongoing = 1;
1051 + fs_info->data_reloc_bg = 0;
1052 ++ }
1053 + spin_unlock(&fs_info->relocation_bg_lock);
1054 + spin_unlock(&fs_info->treelog_bg_lock);
1055 + spin_unlock(&block_group->lock);
1056 +diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
1057 +index a23a42ba88cae..68ddd90685d9d 100644
1058 +--- a/fs/btrfs/extent_io.c
1059 ++++ b/fs/btrfs/extent_io.c
1060 +@@ -5214,13 +5214,14 @@ int extent_writepages(struct address_space *mapping,
1061 + */
1062 + btrfs_zoned_data_reloc_lock(BTRFS_I(inode));
1063 + ret = extent_write_cache_pages(mapping, wbc, &epd);
1064 +- btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
1065 + ASSERT(ret <= 0);
1066 + if (ret < 0) {
1067 ++ btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
1068 + end_write_bio(&epd, ret);
1069 + return ret;
1070 + }
1071 + ret = flush_write_bio(&epd);
1072 ++ btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
1073 + return ret;
1074 + }
1075 +
1076 +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
1077 +index 9ae79342631a8..5d15e374d0326 100644
1078 +--- a/fs/btrfs/inode.c
1079 ++++ b/fs/btrfs/inode.c
1080 +@@ -3102,6 +3102,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
1081 + ordered_extent->file_offset,
1082 + ordered_extent->file_offset +
1083 + logical_len);
1084 ++ btrfs_zoned_release_data_reloc_bg(fs_info, ordered_extent->disk_bytenr,
1085 ++ ordered_extent->disk_num_bytes);
1086 + } else {
1087 + BUG_ON(root == fs_info->tree_root);
1088 + ret = insert_ordered_extent_file_extent(trans, ordered_extent);
1089 +diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
1090 +index 5091d679a602c..84b6d39509bd3 100644
1091 +--- a/fs/btrfs/zoned.c
1092 ++++ b/fs/btrfs/zoned.c
1093 +@@ -2005,6 +2005,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len
1094 + struct btrfs_device *device;
1095 + u64 min_alloc_bytes;
1096 + u64 physical;
1097 ++ int i;
1098 +
1099 + if (!btrfs_is_zoned(fs_info))
1100 + return;
1101 +@@ -2039,13 +2040,25 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len
1102 + spin_unlock(&block_group->lock);
1103 +
1104 + map = block_group->physical_map;
1105 +- device = map->stripes[0].dev;
1106 +- physical = map->stripes[0].physical;
1107 ++ for (i = 0; i < map->num_stripes; i++) {
1108 ++ int ret;
1109 +
1110 +- if (!device->zone_info->max_active_zones)
1111 +- goto out;
1112 ++ device = map->stripes[i].dev;
1113 ++ physical = map->stripes[i].physical;
1114 ++
1115 ++ if (device->zone_info->max_active_zones == 0)
1116 ++ continue;
1117 +
1118 +- btrfs_dev_clear_active_zone(device, physical);
1119 ++ ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
1120 ++ physical >> SECTOR_SHIFT,
1121 ++ device->zone_info->zone_size >> SECTOR_SHIFT,
1122 ++ GFP_NOFS);
1123 ++
1124 ++ if (ret)
1125 ++ return;
1126 ++
1127 ++ btrfs_dev_clear_active_zone(device, physical);
1128 ++ }
1129 +
1130 + spin_lock(&fs_info->zone_active_bgs_lock);
1131 + ASSERT(!list_empty(&block_group->active_bg_list));
1132 +@@ -2116,3 +2129,30 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info)
1133 + }
1134 + mutex_unlock(&fs_devices->device_list_mutex);
1135 + }
1136 ++
1137 ++void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
1138 ++ u64 length)
1139 ++{
1140 ++ struct btrfs_block_group *block_group;
1141 ++
1142 ++ if (!btrfs_is_zoned(fs_info))
1143 ++ return;
1144 ++
1145 ++ block_group = btrfs_lookup_block_group(fs_info, logical);
1146 ++ /* It should be called on a previous data relocation block group. */
1147 ++ ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA));
1148 ++
1149 ++ spin_lock(&block_group->lock);
1150 ++ if (!block_group->zoned_data_reloc_ongoing)
1151 ++ goto out;
1152 ++
1153 ++ /* All relocation extents are written. */
1154 ++ if (block_group->start + block_group->alloc_offset == logical + length) {
1155 ++ /* Now, release this block group for further allocations. */
1156 ++ block_group->zoned_data_reloc_ongoing = 0;
1157 ++ }
1158 ++
1159 ++out:
1160 ++ spin_unlock(&block_group->lock);
1161 ++ btrfs_put_block_group(block_group);
1162 ++}
1163 +diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
1164 +index 2d898970aec5f..cf6320feef464 100644
1165 +--- a/fs/btrfs/zoned.h
1166 ++++ b/fs/btrfs/zoned.h
1167 +@@ -80,6 +80,8 @@ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
1168 + struct extent_buffer *eb);
1169 + void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
1170 + void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
1171 ++void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
1172 ++ u64 length);
1173 + #else /* CONFIG_BLK_DEV_ZONED */
1174 + static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
1175 + struct blk_zone *zone)
1176 +@@ -241,6 +243,9 @@ static inline void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
1177 + static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }
1178 +
1179 + static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { }
1180 ++
1181 ++static inline void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info,
1182 ++ u64 logical, u64 length) { }
1183 + #endif
1184 +
1185 + static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
1186 +diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
1187 +index 3f53bc27a19bf..3d088a88f8320 100644
1188 +--- a/include/linux/ioprio.h
1189 ++++ b/include/linux/ioprio.h
1190 +@@ -11,7 +11,7 @@
1191 + /*
1192 + * Default IO priority.
1193 + */
1194 +-#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM)
1195 ++#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0)
1196 +
1197 + /*
1198 + * Check that a priority value has a valid class.
1199 +diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c
1200 +index 9d09f489b60e0..2e0f75bcb7fd1 100644
1201 +--- a/kernel/entry/kvm.c
1202 ++++ b/kernel/entry/kvm.c
1203 +@@ -9,12 +9,6 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work)
1204 + int ret;
1205 +
1206 + if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) {
1207 +- clear_notify_signal();
1208 +- if (task_work_pending(current))
1209 +- task_work_run();
1210 +- }
1211 +-
1212 +- if (ti_work & _TIF_SIGPENDING) {
1213 + kvm_handle_signal_exit(vcpu);
1214 + return -EINTR;
1215 + }
1216 +diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
1217 +index 5d09ded0c491f..04b7e3654ff77 100644
1218 +--- a/tools/arch/x86/include/asm/cpufeatures.h
1219 ++++ b/tools/arch/x86/include/asm/cpufeatures.h
1220 +@@ -301,6 +301,7 @@
1221 + #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
1222 + #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
1223 + #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
1224 ++#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM-Exit when EIBRS is enabled */
1225 +
1226 + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
1227 + #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
1228 +diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
1229 +index ad084326f24c2..f951147cc7fdc 100644
1230 +--- a/tools/arch/x86/include/asm/msr-index.h
1231 ++++ b/tools/arch/x86/include/asm/msr-index.h
1232 +@@ -148,6 +148,10 @@
1233 + * are restricted to targets in
1234 + * kernel.
1235 + */
1236 ++#define ARCH_CAP_PBRSB_NO BIT(24) /*
1237 ++ * Not susceptible to Post-Barrier
1238 ++ * Return Stack Buffer Predictions.
1239 ++ */
1240 +
1241 + #define MSR_IA32_FLUSH_CMD 0x0000010b
1242 + #define L1D_FLUSH BIT(0) /*
1243 +diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
1244 +index 5a5bd74f55bd5..9c366b3a676db 100755
1245 +--- a/tools/kvm/kvm_stat/kvm_stat
1246 ++++ b/tools/kvm/kvm_stat/kvm_stat
1247 +@@ -1646,7 +1646,8 @@ Press any other key to refresh statistics immediately.
1248 + .format(values))
1249 + if len(pids) > 1:
1250 + sys.exit('Error: Multiple processes found (pids: {}). Use "-p"'
1251 +- ' to specify the desired pid'.format(" ".join(pids)))
1252 ++ ' to specify the desired pid'
1253 ++ .format(" ".join(map(str, pids))))
1254 + namespace.pid = pids[0]
1255 +
1256 + argparser = argparse.ArgumentParser(description=description_text,
1257 +diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
1258 +index e0b0164e9af85..be1d9728c4cea 100644
1259 +--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
1260 ++++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
1261 +@@ -73,20 +73,19 @@ void ucall_uninit(struct kvm_vm *vm)
1262 +
1263 + void ucall(uint64_t cmd, int nargs, ...)
1264 + {
1265 +- struct ucall uc = {
1266 +- .cmd = cmd,
1267 +- };
1268 ++ struct ucall uc = {};
1269 + va_list va;
1270 + int i;
1271 +
1272 ++ WRITE_ONCE(uc.cmd, cmd);
1273 + nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
1274 +
1275 + va_start(va, nargs);
1276 + for (i = 0; i < nargs; ++i)
1277 +- uc.args[i] = va_arg(va, uint64_t);
1278 ++ WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
1279 + va_end(va);
1280 +
1281 +- *ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
1282 ++ WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc);
1283 + }
1284 +
1285 + uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
1286 +diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
1287 +index 722df3a28791c..ddd68ba0c99fc 100644
1288 +--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
1289 ++++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
1290 +@@ -110,6 +110,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
1291 + struct kvm_vm *vm;
1292 + uint64_t guest_num_pages;
1293 + uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
1294 ++ uint64_t region_end_gfn;
1295 + int i;
1296 +
1297 + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
1298 +@@ -144,18 +145,29 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
1299 +
1300 + pta->vm = vm;
1301 +
1302 ++ /* Put the test region at the top guest physical memory. */
1303 ++ region_end_gfn = vm_get_max_gfn(vm) + 1;
1304 ++
1305 ++#ifdef __x86_64__
1306 ++ /*
1307 ++ * When running vCPUs in L2, restrict the test region to 48 bits to
1308 ++ * avoid needing 5-level page tables to identity map L2.
1309 ++ */
1310 ++ if (pta->nested)
1311 ++ region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size);
1312 ++#endif
1313 + /*
1314 + * If there should be more memory in the guest test region than there
1315 + * can be pages in the guest, it will definitely cause problems.
1316 + */
1317 +- TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
1318 ++ TEST_ASSERT(guest_num_pages < region_end_gfn,
1319 + "Requested more guest memory than address space allows.\n"
1320 + " guest pages: %" PRIx64 " max gfn: %" PRIx64
1321 + " vcpus: %d wss: %" PRIx64 "]\n",
1322 +- guest_num_pages, vm_get_max_gfn(vm), vcpus,
1323 ++ guest_num_pages, region_end_gfn - 1, vcpus,
1324 + vcpu_memory_bytes);
1325 +
1326 +- pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size;
1327 ++ pta->gpa = (region_end_gfn - guest_num_pages) * pta->guest_page_size;
1328 + pta->gpa = align_down(pta->gpa, backing_src_pagesz);
1329 + #ifdef __s390x__
1330 + /* Align to 1M (segment size) */
1331 +diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
1332 +index e0b2bb1339b16..3330fb183c680 100644
1333 +--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
1334 ++++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
1335 +@@ -44,7 +44,7 @@ static inline void nop_loop(void)
1336 + {
1337 + int i;
1338 +
1339 +- for (i = 0; i < 1000000; i++)
1340 ++ for (i = 0; i < 100000000; i++)
1341 + asm volatile("nop");
1342 + }
1343 +
1344 +@@ -56,12 +56,14 @@ static inline void check_tsc_msr_rdtsc(void)
1345 + tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
1346 + GUEST_ASSERT(tsc_freq > 0);
1347 +
1348 +- /* First, check MSR-based clocksource */
1349 ++ /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
1350 + r1 = rdtsc();
1351 + t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
1352 ++ r1 = (r1 + rdtsc()) / 2;
1353 + nop_loop();
1354 + r2 = rdtsc();
1355 + t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
1356 ++ r2 = (r2 + rdtsc()) / 2;
1357 +
1358 + GUEST_ASSERT(r2 > r1 && t2 > t1);
1359 +
1360 +@@ -181,12 +183,14 @@ static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm)
1361 + tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY);
1362 + TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
1363 +
1364 +- /* First, check MSR-based clocksource */
1365 ++ /* For increased accuracy, take mean rdtsc() before and afrer ioctl */
1366 + r1 = rdtsc();
1367 + t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
1368 ++ r1 = (r1 + rdtsc()) / 2;
1369 + nop_loop();
1370 + r2 = rdtsc();
1371 + t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
1372 ++ r2 = (r2 + rdtsc()) / 2;
1373 +
1374 + TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
1375 +
1376 +diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
1377 +index 9b68658b6bb85..5b98f3ee58a58 100644
1378 +--- a/tools/vm/slabinfo.c
1379 ++++ b/tools/vm/slabinfo.c
1380 +@@ -233,6 +233,24 @@ static unsigned long read_slab_obj(struct slabinfo *s, const char *name)
1381 + return l;
1382 + }
1383 +
1384 ++static unsigned long read_debug_slab_obj(struct slabinfo *s, const char *name)
1385 ++{
1386 ++ char x[128];
1387 ++ FILE *f;
1388 ++ size_t l;
1389 ++
1390 ++ snprintf(x, 128, "/sys/kernel/debug/slab/%s/%s", s->name, name);
1391 ++ f = fopen(x, "r");
1392 ++ if (!f) {
1393 ++ buffer[0] = 0;
1394 ++ l = 0;
1395 ++ } else {
1396 ++ l = fread(buffer, 1, sizeof(buffer), f);
1397 ++ buffer[l] = 0;
1398 ++ fclose(f);
1399 ++ }
1400 ++ return l;
1401 ++}
1402 +
1403 + /*
1404 + * Put a size string together
1405 +@@ -409,14 +427,18 @@ static void show_tracking(struct slabinfo *s)
1406 + {
1407 + printf("\n%s: Kernel object allocation\n", s->name);
1408 + printf("-----------------------------------------------------------------------\n");
1409 +- if (read_slab_obj(s, "alloc_calls"))
1410 ++ if (read_debug_slab_obj(s, "alloc_traces"))
1411 ++ printf("%s", buffer);
1412 ++ else if (read_slab_obj(s, "alloc_calls"))
1413 + printf("%s", buffer);
1414 + else
1415 + printf("No Data\n");
1416 +
1417 + printf("\n%s: Kernel object freeing\n", s->name);
1418 + printf("------------------------------------------------------------------------\n");
1419 +- if (read_slab_obj(s, "free_calls"))
1420 ++ if (read_debug_slab_obj(s, "free_traces"))
1421 ++ printf("%s", buffer);
1422 ++ else if (read_slab_obj(s, "free_calls"))
1423 + printf("%s", buffer);
1424 + else
1425 + printf("No Data\n");
1426 +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
1427 +index 24cb37d19c638..7f1d19689701b 100644
1428 +--- a/virt/kvm/kvm_main.c
1429 ++++ b/virt/kvm/kvm_main.c
1430 +@@ -3327,9 +3327,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
1431 +
1432 + vcpu->stat.generic.blocking = 1;
1433 +
1434 ++ preempt_disable();
1435 + kvm_arch_vcpu_blocking(vcpu);
1436 +-
1437 + prepare_to_rcuwait(wait);
1438 ++ preempt_enable();
1439 ++
1440 + for (;;) {
1441 + set_current_state(TASK_INTERRUPTIBLE);
1442 +
1443 +@@ -3339,9 +3341,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
1444 + waited = true;
1445 + schedule();
1446 + }
1447 +- finish_rcuwait(wait);
1448 +
1449 ++ preempt_disable();
1450 ++ finish_rcuwait(wait);
1451 + kvm_arch_vcpu_unblocking(vcpu);
1452 ++ preempt_enable();
1453 +
1454 + vcpu->stat.generic.blocking = 0;
1455 +