[gentoo-commits] proj/linux-patches:5.18 commit in: / - gentoo-commits

From:	Mike Pagano <mpagano@g.o>
To:	gentoo-commits@l.g.o
Subject:	[gentoo-commits] proj/linux-patches:5.18 commit in: /
Date:	Thu, 11 Aug 2022 12:33:09
Message-Id:	`1660221165.7aaaf24efa9606d739c15aa28d8118b093cc315e.mpagano@gentoo`

1

commit:     7aaaf24efa9606d739c15aa28d8118b093cc315e

2

Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>

3

AuthorDate: Thu Aug 11 12:32:45 2022 +0000

4

Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>

5

CommitDate: Thu Aug 11 12:32:45 2022 +0000

6

URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=7aaaf24e

7

8

Linux patch 5.18.17

9

10

Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>

11

12

 0000_README              |    4 +

13

 1016_linux-5.18.17.patch | 1418 ++++++++++++++++++++++++++++++++++++++++++++++

14

 2 files changed, 1422 insertions(+)

15

16

diff --git a/0000_README b/0000_README

17

index efa0b25e..e0f23579 100644

18

--- a/0000_README

19

+++ b/0000_README

20

@@ -107,6 +107,10 @@ Patch:  1015_linux-5.18.16.patch

21

 From:   http://www.kernel.org

22

 Desc:   Linux 5.18.16

23

24

+Patch:  1016_linux-5.18.17.patch

25

+From:   http://www.kernel.org

26

+Desc:   Linux 5.18.17

27

+

28

 Patch:  1500_XATTR_USER_PREFIX.patch

29

 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644

30

 Desc:   Support for namespace user.pax.* on tmpfs.

31

32

diff --git a/1016_linux-5.18.17.patch b/1016_linux-5.18.17.patch

33

new file mode 100644

34

index 00000000..94fc8829

35

--- /dev/null

36

+++ b/1016_linux-5.18.17.patch

37

@@ -0,0 +1,1418 @@

38

+diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst

39

+index 9e9556826450b..2ce2a38cdd556 100644

40

+--- a/Documentation/admin-guide/hw-vuln/spectre.rst

41

++++ b/Documentation/admin-guide/hw-vuln/spectre.rst

42

+@@ -422,6 +422,14 @@ The possible values in this file are:

43

+   'RSB filling'   Protection of RSB on context switch enabled

44

+   =============   ===========================================

45

+

46

++  - EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status:

47

++

48

++  ===========================  =======================================================

49

++  'PBRSB-eIBRS: SW sequence'   CPU is affected and protection of RSB on VMEXIT enabled

50

++  'PBRSB-eIBRS: Vulnerable'    CPU is vulnerable

51

++  'PBRSB-eIBRS: Not affected'  CPU is not affected by PBRSB

52

++  ===========================  =======================================================

53

++

54

+ Full mitigation might require a microcode update from the CPU

55

+ vendor. When the necessary microcode is not available, the kernel will

56

+ report vulnerability.

57

+diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml

58

+index 5aac094fd2172..58ecafc1b7f90 100644

59

+--- a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml

60

++++ b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml

61

+@@ -23,6 +23,7 @@ properties:

62

+       - brcm,bcm4345c5

63

+       - brcm,bcm43540-bt

64

+       - brcm,bcm4335a0

65

++      - brcm,bcm4349-bt

66

+

67

+   shutdown-gpios:

68

+     maxItems: 1

69

+diff --git a/Makefile b/Makefile

70

+index 18bcbcd037f0a..ef8c18e5c161c 100644

71

+--- a/Makefile

72

++++ b/Makefile

73

+@@ -1,7 +1,7 @@

74

+ # SPDX-License-Identifier: GPL-2.0

75

+ VERSION = 5

76

+ PATCHLEVEL = 18

77

+-SUBLEVEL = 16

78

++SUBLEVEL = 17

79

+ EXTRAVERSION =

80

+ NAME = Superb Owl

81

+

82

+diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c

83

+index 9c3d86e397bf3..1fae18ba11ed1 100644

84

+--- a/arch/arm64/crypto/poly1305-glue.c

85

++++ b/arch/arm64/crypto/poly1305-glue.c

86

+@@ -52,7 +52,7 @@ static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,

87

+ {

88

+ 	if (unlikely(!dctx->sset)) {

89

+ 		if (!dctx->rset) {

90

+-			poly1305_init_arch(dctx, src);

91

++			poly1305_init_arm64(&dctx->h, src);

92

+ 			src += POLY1305_BLOCK_SIZE;

93

+ 			len -= POLY1305_BLOCK_SIZE;

94

+ 			dctx->rset = 1;

95

+diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h

96

+index 96dc0f7da258d..a971d462f531c 100644

97

+--- a/arch/arm64/include/asm/kernel-pgtable.h

98

++++ b/arch/arm64/include/asm/kernel-pgtable.h

99

+@@ -103,8 +103,8 @@

100

+ /*

101

+  * Initial memory map attributes.

102

+  */

103

+-#define SWAPPER_PTE_FLAGS	(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)

104

+-#define SWAPPER_PMD_FLAGS	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)

105

++#define SWAPPER_PTE_FLAGS	(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN)

106

++#define SWAPPER_PMD_FLAGS	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PMD_SECT_UXN)

107

+

108

+ #if ARM64_KERNEL_USES_PMD_MAPS

109

+ #define SWAPPER_MM_MMUFLAGS	(PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)

110

+diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S

111

+index 6a98f1a38c29a..8a93a0a7489b2 100644

112

+--- a/arch/arm64/kernel/head.S

113

++++ b/arch/arm64/kernel/head.S

114

+@@ -285,7 +285,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)

115

+ 	subs	x1, x1, #64

116

+ 	b.ne	1b

117

+

118

+-	mov	x7, SWAPPER_MM_MMUFLAGS

119

++	mov_q	x7, SWAPPER_MM_MMUFLAGS

120

+

121

+ 	/*

122

+ 	 * Create the identity mapping.

123

+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

124

+index 4d1d87f76a74f..ce1f5a876cfea 100644

125

+--- a/arch/x86/Kconfig

126

++++ b/arch/x86/Kconfig

127

+@@ -2469,7 +2469,7 @@ config RETPOLINE

128

+ config RETHUNK

129

+ 	bool "Enable return-thunks"

130

+ 	depends on RETPOLINE && CC_HAS_RETURN_THUNK

131

+-	default y

132

++	default y if X86_64

133

+ 	help

134

+ 	  Compile the kernel with the return-thunks compiler option to guard

135

+ 	  against kernel-to-user data leaks by avoiding return speculation.

136

+@@ -2478,21 +2478,21 @@ config RETHUNK

137

+

138

+ config CPU_UNRET_ENTRY

139

+ 	bool "Enable UNRET on kernel entry"

140

+-	depends on CPU_SUP_AMD && RETHUNK

141

++	depends on CPU_SUP_AMD && RETHUNK && X86_64

142

+ 	default y

143

+ 	help

144

+ 	  Compile the kernel with support for the retbleed=unret mitigation.

145

+

146

+ config CPU_IBPB_ENTRY

147

+ 	bool "Enable IBPB on kernel entry"

148

+-	depends on CPU_SUP_AMD

149

++	depends on CPU_SUP_AMD && X86_64

150

+ 	default y

151

+ 	help

152

+ 	  Compile the kernel with support for the retbleed=ibpb mitigation.

153

+

154

+ config CPU_IBRS_ENTRY

155

+ 	bool "Enable IBRS on kernel entry"

156

+-	depends on CPU_SUP_INTEL

157

++	depends on CPU_SUP_INTEL && X86_64

158

+ 	default y

159

+ 	help

160

+ 	  Compile the kernel with support for the spectre_v2=ibrs mitigation.

161

+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h

162

+index 49889f171e860..e82da174d28c3 100644

163

+--- a/arch/x86/include/asm/cpufeatures.h

164

++++ b/arch/x86/include/asm/cpufeatures.h

165

+@@ -302,6 +302,7 @@

166

+ #define X86_FEATURE_RETHUNK		(11*32+14) /* "" Use REturn THUNK */

167

+ #define X86_FEATURE_UNRET		(11*32+15) /* "" AMD BTB untrain return */

168

+ #define X86_FEATURE_USE_IBPB_FW		(11*32+16) /* "" Use IBPB during runtime firmware calls */

169

++#define X86_FEATURE_RSB_VMEXIT_LITE	(11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */

170

+

171

+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */

172

+ #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */

173

+@@ -453,5 +454,6 @@

174

+ #define X86_BUG_SRBDS			X86_BUG(24) /* CPU may leak RNG bits if not mitigated */

175

+ #define X86_BUG_MMIO_STALE_DATA		X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */

176

+ #define X86_BUG_RETBLEED		X86_BUG(26) /* CPU is affected by RETBleed */

177

++#define X86_BUG_EIBRS_PBRSB		X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */

178

+

179

+ #endif /* _ASM_X86_CPUFEATURES_H */

180

+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

181

+index 4ff36610af6ab..9fdaa847d4b66 100644

182

+--- a/arch/x86/include/asm/kvm_host.h

183

++++ b/arch/x86/include/asm/kvm_host.h

184

+@@ -651,6 +651,7 @@ struct kvm_vcpu_arch {

185

+ 	u64 ia32_misc_enable_msr;

186

+ 	u64 smbase;

187

+ 	u64 smi_count;

188

++	bool at_instruction_boundary;

189

+ 	bool tpr_access_reporting;

190

+ 	bool xsaves_enabled;

191

+ 	bool xfd_no_write_intercept;

192

+@@ -1289,6 +1290,8 @@ struct kvm_vcpu_stat {

193

+ 	u64 nested_run;

194

+ 	u64 directed_yield_attempted;

195

+ 	u64 directed_yield_successful;

196

++	u64 preemption_reported;

197

++	u64 preemption_other;

198

+ 	u64 guest_mode;

199

+ };

200

+

201

+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h

202

+index ad084326f24c2..f951147cc7fdc 100644

203

+--- a/arch/x86/include/asm/msr-index.h

204

++++ b/arch/x86/include/asm/msr-index.h

205

+@@ -148,6 +148,10 @@

206

+ 						 * are restricted to targets in

207

+ 						 * kernel.

208

+ 						 */

209

++#define ARCH_CAP_PBRSB_NO		BIT(24)	/*

210

++						 * Not susceptible to Post-Barrier

211

++						 * Return Stack Buffer Predictions.

212

++						 */

213

+

214

+ #define MSR_IA32_FLUSH_CMD		0x0000010b

215

+ #define L1D_FLUSH			BIT(0)	/*

216

+diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h

217

+index 38a3e86e665ef..d3a3cc6772ee1 100644

218

+--- a/arch/x86/include/asm/nospec-branch.h

219

++++ b/arch/x86/include/asm/nospec-branch.h

220

+@@ -60,7 +60,9 @@

221

+ 774:						\

222

+ 	add	$(BITS_PER_LONG/8) * 2, sp;	\

223

+ 	dec	reg;				\

224

+-	jnz	771b;

225

++	jnz	771b;				\

226

++	/* barrier for jnz misprediction */	\

227

++	lfence;

228

+

229

+ #ifdef __ASSEMBLY__

230

+

231

+@@ -118,13 +120,28 @@

232

+ #endif

233

+ .endm

234

+

235

++.macro ISSUE_UNBALANCED_RET_GUARD

236

++	ANNOTATE_INTRA_FUNCTION_CALL

237

++	call .Lunbalanced_ret_guard_\@

238

++	int3

239

++.Lunbalanced_ret_guard_\@:

240

++	add $(BITS_PER_LONG/8), %_ASM_SP

241

++	lfence

242

++.endm

243

++

244

+  /*

245

+   * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP

246

+   * monstrosity above, manually.

247

+   */

248

+-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req

249

++.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2

250

++.ifb \ftr2

251

+ 	ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr

252

++.else

253

++	ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2

254

++.endif

255

+ 	__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)

256

++.Lunbalanced_\@:

257

++	ISSUE_UNBALANCED_RET_GUARD

258

+ .Lskip_rsb_\@:

259

+ .endm

260

+

261

+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c

262

+index fd986a8ba2bd7..fa625b2a8a939 100644

263

+--- a/arch/x86/kernel/cpu/bugs.c

264

++++ b/arch/x86/kernel/cpu/bugs.c

265

+@@ -1328,6 +1328,53 @@ static void __init spec_ctrl_disable_kernel_rrsba(void)

266

+ 	}

267

+ }

268

+

269

++static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)

270

++{

271

++	/*

272

++	 * Similar to context switches, there are two types of RSB attacks

273

++	 * after VM exit:

274

++	 *

275

++	 * 1) RSB underflow

276

++	 *

277

++	 * 2) Poisoned RSB entry

278

++	 *

279

++	 * When retpoline is enabled, both are mitigated by filling/clearing

280

++	 * the RSB.

281

++	 *

282

++	 * When IBRS is enabled, while #1 would be mitigated by the IBRS branch

283

++	 * prediction isolation protections, RSB still needs to be cleared

284

++	 * because of #2.  Note that SMEP provides no protection here, unlike

285

++	 * user-space-poisoned RSB entries.

286

++	 *

287

++	 * eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB

288

++	 * bug is present then a LITE version of RSB protection is required,

289

++	 * just a single call needs to retire before a RET is executed.

290

++	 */

291

++	switch (mode) {

292

++	case SPECTRE_V2_NONE:

293

++		return;

294

++

295

++	case SPECTRE_V2_EIBRS_LFENCE:

296

++	case SPECTRE_V2_EIBRS:

297

++		if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {

298

++			setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE);

299

++			pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n");

300

++		}

301

++		return;

302

++

303

++	case SPECTRE_V2_EIBRS_RETPOLINE:

304

++	case SPECTRE_V2_RETPOLINE:

305

++	case SPECTRE_V2_LFENCE:

306

++	case SPECTRE_V2_IBRS:

307

++		setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);

308

++		pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n");

309

++		return;

310

++	}

311

++

312

++	pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit");

313

++	dump_stack();

314

++}

315

++

316

+ static void __init spectre_v2_select_mitigation(void)

317

+ {

318

+ 	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();

319

+@@ -1478,28 +1525,7 @@ static void __init spectre_v2_select_mitigation(void)

320

+ 	setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);

321

+ 	pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");

322

+

323

+-	/*

324

+-	 * Similar to context switches, there are two types of RSB attacks

325

+-	 * after vmexit:

326

+-	 *

327

+-	 * 1) RSB underflow

328

+-	 *

329

+-	 * 2) Poisoned RSB entry

330

+-	 *

331

+-	 * When retpoline is enabled, both are mitigated by filling/clearing

332

+-	 * the RSB.

333

+-	 *

334

+-	 * When IBRS is enabled, while #1 would be mitigated by the IBRS branch

335

+-	 * prediction isolation protections, RSB still needs to be cleared

336

+-	 * because of #2.  Note that SMEP provides no protection here, unlike

337

+-	 * user-space-poisoned RSB entries.

338

+-	 *

339

+-	 * eIBRS, on the other hand, has RSB-poisoning protections, so it

340

+-	 * doesn't need RSB clearing after vmexit.

341

+-	 */

342

+-	if (boot_cpu_has(X86_FEATURE_RETPOLINE) ||

343

+-	    boot_cpu_has(X86_FEATURE_KERNEL_IBRS))

344

+-		setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);

345

++	spectre_v2_determine_rsb_fill_type_at_vmexit(mode);

346

+

347

+ 	/*

348

+ 	 * Retpoline protects the kernel, but doesn't protect firmware.  IBRS

349

+@@ -2285,6 +2311,19 @@ static char *ibpb_state(void)

350

+ 	return "";

351

+ }

352

+

353

++static char *pbrsb_eibrs_state(void)

354

++{

355

++	if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {

356

++		if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||

357

++		    boot_cpu_has(X86_FEATURE_RSB_VMEXIT))

358

++			return ", PBRSB-eIBRS: SW sequence";

359

++		else

360

++			return ", PBRSB-eIBRS: Vulnerable";

361

++	} else {

362

++		return ", PBRSB-eIBRS: Not affected";

363

++	}

364

++}

365

++

366

+ static ssize_t spectre_v2_show_state(char *buf)

367

+ {

368

+ 	if (spectre_v2_enabled == SPECTRE_V2_LFENCE)

369

+@@ -2297,12 +2336,13 @@ static ssize_t spectre_v2_show_state(char *buf)

370

+ 	    spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)

371

+ 		return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");

372

+

373

+-	return sprintf(buf, "%s%s%s%s%s%s\n",

374

++	return sprintf(buf, "%s%s%s%s%s%s%s\n",

375

+ 		       spectre_v2_strings[spectre_v2_enabled],

376

+ 		       ibpb_state(),

377

+ 		       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",

378

+ 		       stibp_state(),

379

+ 		       boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",

380

++		       pbrsb_eibrs_state(),

381

+ 		       spectre_v2_module_string());

382

+ }

383

+

384

+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c

385

+index 1f43ddf2ffc36..d47e20e305cd2 100644

386

+--- a/arch/x86/kernel/cpu/common.c

387

++++ b/arch/x86/kernel/cpu/common.c

388

+@@ -1161,6 +1161,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)

389

+ #define NO_SWAPGS		BIT(6)

390

+ #define NO_ITLB_MULTIHIT	BIT(7)

391

+ #define NO_SPECTRE_V2		BIT(8)

392

++#define NO_EIBRS_PBRSB		BIT(9)

393

+

394

+ #define VULNWL(vendor, family, model, whitelist)	\

395

+ 	X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)

396

+@@ -1203,7 +1204,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {

397

+

398

+ 	VULNWL_INTEL(ATOM_GOLDMONT,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),

399

+ 	VULNWL_INTEL(ATOM_GOLDMONT_D,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),

400

+-	VULNWL_INTEL(ATOM_GOLDMONT_PLUS,	NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),

401

++	VULNWL_INTEL(ATOM_GOLDMONT_PLUS,	NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),

402

+

403

+ 	/*

404

+ 	 * Technically, swapgs isn't serializing on AMD (despite it previously

405

+@@ -1213,7 +1214,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {

406

+ 	 * good enough for our purposes.

407

+ 	 */

408

+

409

+-	VULNWL_INTEL(ATOM_TREMONT_D,		NO_ITLB_MULTIHIT),

410

++	VULNWL_INTEL(ATOM_TREMONT,		NO_EIBRS_PBRSB),

411

++	VULNWL_INTEL(ATOM_TREMONT_L,		NO_EIBRS_PBRSB),

412

++	VULNWL_INTEL(ATOM_TREMONT_D,		NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),

413

+

414

+ 	/* AMD Family 0xf - 0x12 */

415

+ 	VULNWL_AMD(0x0f,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),

416

+@@ -1391,6 +1394,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)

417

+ 			setup_force_cpu_bug(X86_BUG_RETBLEED);

418

+ 	}

419

+

420

++	if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) &&

421

++	    !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&

422

++	    !(ia32_cap & ARCH_CAP_PBRSB_NO))

423

++		setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);

424

++

425

+ 	if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))

426

+ 		return;

427

+

428

+diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c

429

+index 6d3b3e5a5533b..ee4802d7b36cd 100644

430

+--- a/arch/x86/kvm/mmu/tdp_iter.c

431

++++ b/arch/x86/kvm/mmu/tdp_iter.c

432

+@@ -145,6 +145,15 @@ static bool try_step_up(struct tdp_iter *iter)

433

+ 	return true;

434

+ }

435

+

436

++/*

437

++ * Step the iterator back up a level in the paging structure. Should only be

438

++ * used when the iterator is below the root level.

439

++ */

440

++void tdp_iter_step_up(struct tdp_iter *iter)

441

++{

442

++	WARN_ON(!try_step_up(iter));

443

++}

444

++

445

+ /*

446

+  * Step to the next SPTE in a pre-order traversal of the paging structure.

447

+  * To get to the next SPTE, the iterator either steps down towards the goal

448

+diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h

449

+index f0af385c56e03..adfca0cf94d3a 100644

450

+--- a/arch/x86/kvm/mmu/tdp_iter.h

451

++++ b/arch/x86/kvm/mmu/tdp_iter.h

452

+@@ -114,5 +114,6 @@ void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,

453

+ 		    int min_level, gfn_t next_last_level_gfn);

454

+ void tdp_iter_next(struct tdp_iter *iter);

455

+ void tdp_iter_restart(struct tdp_iter *iter);

456

++void tdp_iter_step_up(struct tdp_iter *iter);

457

+

458

+ #endif /* __KVM_X86_MMU_TDP_ITER_H */

459

+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c

460

+index 922b06bf4b948..b61a11d462ccb 100644

461

+--- a/arch/x86/kvm/mmu/tdp_mmu.c

462

++++ b/arch/x86/kvm/mmu/tdp_mmu.c

463

+@@ -1748,12 +1748,12 @@ static void zap_collapsible_spte_range(struct kvm *kvm,

464

+ 	gfn_t start = slot->base_gfn;

465

+ 	gfn_t end = start + slot->npages;

466

+ 	struct tdp_iter iter;

467

++	int max_mapping_level;

468

+ 	kvm_pfn_t pfn;

469

+

470

+ 	rcu_read_lock();

471

+

472

+ 	tdp_root_for_each_pte(iter, root, start, end) {

473

+-retry:

474

+ 		if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))

475

+ 			continue;

476

+

477

+@@ -1761,15 +1761,41 @@ retry:

478

+ 		    !is_last_spte(iter.old_spte, iter.level))

479

+ 			continue;

480

+

481

++		/*

482

++		 * This is a leaf SPTE. Check if the PFN it maps can

483

++		 * be mapped at a higher level.

484

++		 */

485

+ 		pfn = spte_to_pfn(iter.old_spte);

486

+-		if (kvm_is_reserved_pfn(pfn) ||

487

+-		    iter.level >= kvm_mmu_max_mapping_level(kvm, slot, iter.gfn,

488

+-							    pfn, PG_LEVEL_NUM))

489

++

490

++		if (kvm_is_reserved_pfn(pfn))

491

+ 			continue;

492

+

493

++		max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot,

494

++				iter.gfn, pfn, PG_LEVEL_NUM);

495

++

496

++		WARN_ON(max_mapping_level < iter.level);

497

++

498

++		/*

499

++		 * If this page is already mapped at the highest

500

++		 * viable level, there's nothing more to do.

501

++		 */

502

++		if (max_mapping_level == iter.level)

503

++			continue;

504

++

505

++		/*

506

++		 * The page can be remapped at a higher level, so step

507

++		 * up to zap the parent SPTE.

508

++		 */

509

++		while (max_mapping_level > iter.level)

510

++			tdp_iter_step_up(&iter);

511

++

512

+ 		/* Note, a successful atomic zap also does a remote TLB flush. */

513

+-		if (tdp_mmu_zap_spte_atomic(kvm, &iter))

514

+-			goto retry;

515

++		tdp_mmu_zap_spte_atomic(kvm, &iter);

516

++

517

++		/*

518

++		 * If the atomic zap fails, the iter will recurse back into

519

++		 * the same subtree to retry.

520

++		 */

521

+ 	}

522

+

523

+ 	rcu_read_unlock();

524

+diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c

525

+index 76e9e6eb71d63..7aa1ce34a5204 100644

526

+--- a/arch/x86/kvm/svm/sev.c

527

++++ b/arch/x86/kvm/svm/sev.c

528

+@@ -844,7 +844,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,

529

+

530

+ 	/* If source buffer is not aligned then use an intermediate buffer */

531

+ 	if (!IS_ALIGNED((unsigned long)vaddr, 16)) {

532

+-		src_tpage = alloc_page(GFP_KERNEL);

533

++		src_tpage = alloc_page(GFP_KERNEL_ACCOUNT);

534

+ 		if (!src_tpage)

535

+ 			return -ENOMEM;

536

+

537

+@@ -865,7 +865,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,

538

+ 	if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {

539

+ 		int dst_offset;

540

+

541

+-		dst_tpage = alloc_page(GFP_KERNEL);

542

++		dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT);

543

+ 		if (!dst_tpage) {

544

+ 			ret = -ENOMEM;

545

+ 			goto e_free;

546

+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c

547

+index 6bfb0b0e66bd3..c667214c630b1 100644

548

+--- a/arch/x86/kvm/svm/svm.c

549

++++ b/arch/x86/kvm/svm/svm.c

550

+@@ -4166,6 +4166,8 @@ out:

551

+

552

+ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)

553

+ {

554

++	if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR)

555

++		vcpu->arch.at_instruction_boundary = true;

556

+ }

557

+

558

+ static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)

559

+diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S

560

+index 4182c7ffc9091..6de96b9438044 100644

561

+--- a/arch/x86/kvm/vmx/vmenter.S

562

++++ b/arch/x86/kvm/vmx/vmenter.S

563

+@@ -227,11 +227,13 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)

564

+ 	 * entries and (in some cases) RSB underflow.

565

+ 	 *

566

+ 	 * eIBRS has its own protection against poisoned RSB, so it doesn't

567

+-	 * need the RSB filling sequence.  But it does need to be enabled

568

+-	 * before the first unbalanced RET.

569

++	 * need the RSB filling sequence.  But it does need to be enabled, and a

570

++	 * single call to retire, before the first unbalanced RET.

571

+          */

572

+

573

+-	FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT

574

++	FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\

575

++			   X86_FEATURE_RSB_VMEXIT_LITE

576

++

577

+

578

+ 	pop %_ASM_ARG2	/* @flags */

579

+ 	pop %_ASM_ARG1	/* @vmx */

580

+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c

581

+index 4b6a0268c78e3..597c3c08da501 100644

582

+--- a/arch/x86/kvm/vmx/vmx.c

583

++++ b/arch/x86/kvm/vmx/vmx.c

584

+@@ -6630,6 +6630,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)

585

+ 		return;

586

+

587

+ 	handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));

588

++	vcpu->arch.at_instruction_boundary = true;

589

+ }

590

+

591

+ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)

592

+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

593

+index 53b6fdf30c99b..65b0ec28bd52b 100644

594

+--- a/arch/x86/kvm/x86.c

595

++++ b/arch/x86/kvm/x86.c

596

+@@ -291,6 +291,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {

597

+ 	STATS_DESC_COUNTER(VCPU, nested_run),

598

+ 	STATS_DESC_COUNTER(VCPU, directed_yield_attempted),

599

+ 	STATS_DESC_COUNTER(VCPU, directed_yield_successful),

600

++	STATS_DESC_COUNTER(VCPU, preemption_reported),

601

++	STATS_DESC_COUNTER(VCPU, preemption_other),

602

+ 	STATS_DESC_ICOUNTER(VCPU, guest_mode)

603

+ };

604

+

605

+@@ -4607,6 +4609,19 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)

606

+ 	struct kvm_memslots *slots;

607

+ 	static const u8 preempted = KVM_VCPU_PREEMPTED;

608

+

609

++	/*

610

++	 * The vCPU can be marked preempted if and only if the VM-Exit was on

611

++	 * an instruction boundary and will not trigger guest emulation of any

612

++	 * kind (see vcpu_run).  Vendor specific code controls (conservatively)

613

++	 * when this is true, for example allowing the vCPU to be marked

614

++	 * preempted if and only if the VM-Exit was due to a host interrupt.

615

++	 */

616

++	if (!vcpu->arch.at_instruction_boundary) {

617

++		vcpu->stat.preemption_other++;

618

++		return;

619

++	}

620

++

621

++	vcpu->stat.preemption_reported++;

622

+ 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))

623

+ 		return;

624

+

625

+@@ -4636,19 +4651,21 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)

626

+ {

627

+ 	int idx;

628

+

629

+-	if (vcpu->preempted && !vcpu->arch.guest_state_protected)

630

+-		vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);

631

++	if (vcpu->preempted) {

632

++		if (!vcpu->arch.guest_state_protected)

633

++			vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);

634

+

635

+-	/*

636

+-	 * Take the srcu lock as memslots will be accessed to check the gfn

637

+-	 * cache generation against the memslots generation.

638

+-	 */

639

+-	idx = srcu_read_lock(&vcpu->kvm->srcu);

640

+-	if (kvm_xen_msr_enabled(vcpu->kvm))

641

+-		kvm_xen_runstate_set_preempted(vcpu);

642

+-	else

643

+-		kvm_steal_time_set_preempted(vcpu);

644

+-	srcu_read_unlock(&vcpu->kvm->srcu, idx);

645

++		/*

646

++		 * Take the srcu lock as memslots will be accessed to check the gfn

647

++		 * cache generation against the memslots generation.

648

++		 */

649

++		idx = srcu_read_lock(&vcpu->kvm->srcu);

650

++		if (kvm_xen_msr_enabled(vcpu->kvm))

651

++			kvm_xen_runstate_set_preempted(vcpu);

652

++		else

653

++			kvm_steal_time_set_preempted(vcpu);

654

++		srcu_read_unlock(&vcpu->kvm->srcu, idx);

655

++	}

656

+

657

+ 	static_call(kvm_x86_vcpu_put)(vcpu);

658

+ 	vcpu->arch.last_host_tsc = rdtsc();

659

+@@ -9767,6 +9784,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)

660

+ 		return;

661

+

662

+ 	down_read(&vcpu->kvm->arch.apicv_update_lock);

663

++	preempt_disable();

664

+

665

+ 	activate = kvm_apicv_activated(vcpu->kvm);

666

+ 	if (vcpu->arch.apicv_active == activate)

667

+@@ -9786,6 +9804,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)

668

+ 		kvm_make_request(KVM_REQ_EVENT, vcpu);

669

+

670

+ out:

671

++	preempt_enable();

672

+ 	up_read(&vcpu->kvm->arch.apicv_update_lock);

673

+ }

674

+ EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);

675

+@@ -10363,6 +10382,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu)

676

+ 	vcpu->arch.l1tf_flush_l1d = true;

677

+

678

+ 	for (;;) {

679

++		/*

680

++		 * If another guest vCPU requests a PV TLB flush in the middle

681

++		 * of instruction emulation, the rest of the emulation could

682

++		 * use a stale page translation. Assume that any code after

683

++		 * this point can start executing an instruction.

684

++		 */

685

++		vcpu->arch.at_instruction_boundary = false;

686

+ 		if (kvm_vcpu_running(vcpu)) {

687

+ 			r = vcpu_enter_guest(vcpu);

688

+ 		} else {

689

+diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h

690

+index adbcc9ed59dbc..fda1413f8af95 100644

691

+--- a/arch/x86/kvm/xen.h

692

++++ b/arch/x86/kvm/xen.h

693

+@@ -103,8 +103,10 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)

694

+ 	 * behalf of the vCPU. Only if the VMM does actually block

695

+ 	 * does it need to enter RUNSTATE_blocked.

696

+ 	 */

697

+-	if (vcpu->preempted)

698

+-		kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);

699

++	if (WARN_ON_ONCE(!vcpu->preempted))

700

++		return;

701

++

702

++	kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);

703

+ }

704

+

705

+ /* 32-bit compatibility definitions, also used natively in 32-bit build */

706

+diff --git a/block/blk-ioc.c b/block/blk-ioc.c

707

+index df9cfe4ca5328..63fc020424082 100644

708

+--- a/block/blk-ioc.c

709

++++ b/block/blk-ioc.c

710

+@@ -247,6 +247,8 @@ static struct io_context *alloc_io_context(gfp_t gfp_flags, int node)

711

+ 	INIT_HLIST_HEAD(&ioc->icq_list);

712

+ 	INIT_WORK(&ioc->release_work, ioc_release_fn);

713

+ #endif

714

++	ioc->ioprio = IOPRIO_DEFAULT;

715

++

716

+ 	return ioc;

717

+ }

718

+

719

+diff --git a/block/ioprio.c b/block/ioprio.c

720

+index 2fe068fcaad58..2a34cbca18aed 100644

721

+--- a/block/ioprio.c

722

++++ b/block/ioprio.c

723

+@@ -157,9 +157,9 @@ out:

724

+ int ioprio_best(unsigned short aprio, unsigned short bprio)

725

+ {

726

+ 	if (!ioprio_valid(aprio))

727

+-		aprio = IOPRIO_DEFAULT;

728

++		aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM);

729

+ 	if (!ioprio_valid(bprio))

730

+-		bprio = IOPRIO_DEFAULT;

731

++		bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM);

732

+

733

+ 	return min(aprio, bprio);

734

+ }

735

+diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c

736

+index 598fd19b65fa4..45973aa6e06d4 100644

737

+--- a/drivers/acpi/apei/bert.c

738

++++ b/drivers/acpi/apei/bert.c

739

+@@ -29,16 +29,26 @@

740

+

741

+ #undef pr_fmt

742

+ #define pr_fmt(fmt) "BERT: " fmt

743

++

744

++#define ACPI_BERT_PRINT_MAX_RECORDS 5

745

+ #define ACPI_BERT_PRINT_MAX_LEN 1024

746

+

747

+ static int bert_disable;

748

+

749

++/*

750

++ * Print "all" the error records in the BERT table, but avoid huge spam to

751

++ * the console if the BIOS included oversize records, or too many records.

752

++ * Skipping some records here does not lose anything because the full

753

++ * data is available to user tools in:

754

++ *	/sys/firmware/acpi/tables/data/BERT

755

++ */

756

+ static void __init bert_print_all(struct acpi_bert_region *region,

757

+ 				  unsigned int region_len)

758

+ {

759

+ 	struct acpi_hest_generic_status *estatus =

760

+ 		(struct acpi_hest_generic_status *)region;

761

+ 	int remain = region_len;

762

++	int printed = 0, skipped = 0;

763

+ 	u32 estatus_len;

764

+

765

+ 	while (remain >= sizeof(struct acpi_bert_region)) {

766

+@@ -46,24 +56,26 @@ static void __init bert_print_all(struct acpi_bert_region *region,

767

+ 		if (remain < estatus_len) {

768

+ 			pr_err(FW_BUG "Truncated status block (length: %u).\n",

769

+ 			       estatus_len);

770

+-			return;

771

++			break;

772

+ 		}

773

+

774

+ 		/* No more error records. */

775

+ 		if (!estatus->block_status)

776

+-			return;

777

++			break;

778

+

779

+ 		if (cper_estatus_check(estatus)) {

780

+ 			pr_err(FW_BUG "Invalid error record.\n");

781

+-			return;

782

++			break;

783

+ 		}

784

+

785

+-		pr_info_once("Error records from previous boot:\n");

786

+-		if (region_len < ACPI_BERT_PRINT_MAX_LEN)

787

++		if (estatus_len < ACPI_BERT_PRINT_MAX_LEN &&

788

++		    printed < ACPI_BERT_PRINT_MAX_RECORDS) {

789

++			pr_info_once("Error records from previous boot:\n");

790

+ 			cper_estatus_print(KERN_INFO HW_ERR, estatus);

791

+-		else

792

+-			pr_info_once("Max print length exceeded, table data is available at:\n"

793

+-				     "/sys/firmware/acpi/tables/data/BERT");

794

++			printed++;

795

++		} else {

796

++			skipped++;

797

++		}

798

+

799

+ 		/*

800

+ 		 * Because the boot error source is "one-time polled" type,

801

+@@ -75,6 +87,9 @@ static void __init bert_print_all(struct acpi_bert_region *region,

802

+ 		estatus = (void *)estatus + estatus_len;

803

+ 		remain -= estatus_len;

804

+ 	}

805

++

806

++	if (skipped)

807

++		pr_info(HW_ERR "Skipped %d error records\n", skipped);

808

+ }

809

+

810

+ static int __init setup_bert_disable(char *str)

811

+diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c

812

+index becc198e4c224..6615f59ab7fd2 100644

813

+--- a/drivers/acpi/video_detect.c

814

++++ b/drivers/acpi/video_detect.c

815

+@@ -430,7 +430,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = {

816

+ 	.callback = video_detect_force_native,

817

+ 	.ident = "Clevo NL5xRU",

818

+ 	.matches = {

819

+-		DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),

820

+ 		DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),

821

+ 		},

822

+ 	},

823

+@@ -438,59 +437,75 @@ static const struct dmi_system_id video_detect_dmi_table[] = {

824

+ 	.callback = video_detect_force_native,

825

+ 	.ident = "Clevo NL5xRU",

826

+ 	.matches = {

827

+-		DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"),

828

+-		DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),

829

++		DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),

830

++		DMI_MATCH(DMI_BOARD_NAME, "AURA1501"),

831

+ 		},

832

+ 	},

833

+ 	{

834

+ 	.callback = video_detect_force_native,

835

+ 	.ident = "Clevo NL5xRU",

836

+ 	.matches = {

837

+-		DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),

838

+-		DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),

839

++		DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),

840

++		DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"),

841

+ 		},

842

+ 	},

843

+ 	{

844

+ 	.callback = video_detect_force_native,

845

+-	.ident = "Clevo NL5xRU",

846

++	.ident = "Clevo NL5xNU",

847

+ 	.matches = {

848

+-		DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),

849

+-		DMI_MATCH(DMI_BOARD_NAME, "AURA1501"),

850

++		DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),

851

+ 		},

852

+ 	},

853

++	/*

854

++	 * The TongFang PF5PU1G, PF4NU1F, PF5NU1G, and PF5LUXG/TUXEDO BA15 Gen10,

855

++	 * Pulse 14/15 Gen1, and Pulse 15 Gen2 have the same problem as the Clevo

856

++	 * NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2. See the description

857

++	 * above.

858

++	 */

859

+ 	{

860

+ 	.callback = video_detect_force_native,

861

+-	.ident = "Clevo NL5xRU",

862

++	.ident = "TongFang PF5PU1G",

863

+ 	.matches = {

864

+-		DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),

865

+-		DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"),

866

++		DMI_MATCH(DMI_BOARD_NAME, "PF5PU1G"),

867

+ 		},

868

+ 	},

869

+ 	{

870

+ 	.callback = video_detect_force_native,

871

+-	.ident = "Clevo NL5xNU",

872

++	.ident = "TongFang PF4NU1F",

873

++	.matches = {

874

++		DMI_MATCH(DMI_BOARD_NAME, "PF4NU1F"),

875

++		},

876

++	},

877

++	{

878

++	.callback = video_detect_force_native,

879

++	.ident = "TongFang PF4NU1F",

880

+ 	.matches = {

881

+ 		DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),

882

+-		DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),

883

++		DMI_MATCH(DMI_BOARD_NAME, "PULSE1401"),

884

+ 		},

885

+ 	},

886

+ 	{

887

+ 	.callback = video_detect_force_native,

888

+-	.ident = "Clevo NL5xNU",

889

++	.ident = "TongFang PF5NU1G",

890

+ 	.matches = {

891

+-		DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"),

892

+-		DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),

893

++		DMI_MATCH(DMI_BOARD_NAME, "PF5NU1G"),

894

+ 		},

895

+ 	},

896

+ 	{

897

+ 	.callback = video_detect_force_native,

898

+-	.ident = "Clevo NL5xNU",

899

++	.ident = "TongFang PF5NU1G",

900

+ 	.matches = {

901

+-		DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),

902

+-		DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),

903

++		DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),

904

++		DMI_MATCH(DMI_BOARD_NAME, "PULSE1501"),

905

++		},

906

++	},

907

++	{

908

++	.callback = video_detect_force_native,

909

++	.ident = "TongFang PF5LUXG",

910

++	.matches = {

911

++		DMI_MATCH(DMI_BOARD_NAME, "PF5LUXG"),

912

+ 		},

913

+ 	},

914

+-

915

+ 	/*

916

+ 	 * Desktops which falsely report a backlight and which our heuristics

917

+ 	 * for this do not catch.

918

+diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c

919

+index d9ceca7a7935c..a18f289d73466 100644

920

+--- a/drivers/bluetooth/btbcm.c

921

++++ b/drivers/bluetooth/btbcm.c

922

+@@ -453,6 +453,8 @@ static const struct bcm_subver_table bcm_uart_subver_table[] = {

923

+ 	{ 0x6606, "BCM4345C5"	},	/* 003.006.006 */

924

+ 	{ 0x230f, "BCM4356A2"	},	/* 001.003.015 */

925

+ 	{ 0x220e, "BCM20702A1"  },	/* 001.002.014 */

926

++	{ 0x420d, "BCM4349B1"	},	/* 002.002.013 */

927

++	{ 0x420e, "BCM4349B1"	},	/* 002.002.014 */

928

+ 	{ 0x4217, "BCM4329B1"   },	/* 002.002.023 */

929

+ 	{ 0x6106, "BCM4359C0"	},	/* 003.001.006 */

930

+ 	{ 0x4106, "BCM4335A0"	},	/* 002.001.006 */

931

+diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c

932

+index e48c3ad069bb4..d789c077d95dc 100644

933

+--- a/drivers/bluetooth/btusb.c

934

++++ b/drivers/bluetooth/btusb.c

935

+@@ -422,6 +422,18 @@ static const struct usb_device_id blacklist_table[] = {

936

+ 	{ USB_DEVICE(0x04ca, 0x4006), .driver_info = BTUSB_REALTEK |

937

+ 						     BTUSB_WIDEBAND_SPEECH },

938

+

939

++	/* Realtek 8852CE Bluetooth devices */

940

++	{ USB_DEVICE(0x04ca, 0x4007), .driver_info = BTUSB_REALTEK |

941

++						     BTUSB_WIDEBAND_SPEECH },

942

++	{ USB_DEVICE(0x04c5, 0x1675), .driver_info = BTUSB_REALTEK |

943

++						     BTUSB_WIDEBAND_SPEECH },

944

++	{ USB_DEVICE(0x0cb8, 0xc558), .driver_info = BTUSB_REALTEK |

945

++						     BTUSB_WIDEBAND_SPEECH },

946

++	{ USB_DEVICE(0x13d3, 0x3587), .driver_info = BTUSB_REALTEK |

947

++						     BTUSB_WIDEBAND_SPEECH },

948

++	{ USB_DEVICE(0x13d3, 0x3586), .driver_info = BTUSB_REALTEK |

949

++						     BTUSB_WIDEBAND_SPEECH },

950

++

951

+ 	/* Realtek Bluetooth devices */

952

+ 	{ USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01),

953

+ 	  .driver_info = BTUSB_REALTEK },

954

+@@ -469,6 +481,9 @@ static const struct usb_device_id blacklist_table[] = {

955

+ 	{ USB_DEVICE(0x0489, 0xe0d9), .driver_info = BTUSB_MEDIATEK |

956

+ 						     BTUSB_WIDEBAND_SPEECH |

957

+ 						     BTUSB_VALID_LE_STATES },

958

++	{ USB_DEVICE(0x13d3, 0x3568), .driver_info = BTUSB_MEDIATEK |

959

++						     BTUSB_WIDEBAND_SPEECH |

960

++						     BTUSB_VALID_LE_STATES },

961

+

962

+ 	/* Additional Realtek 8723AE Bluetooth devices */

963

+ 	{ USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK },

964

+diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c

965

+index 785f445dd60d5..49bed66b8c84e 100644

966

+--- a/drivers/bluetooth/hci_bcm.c

967

++++ b/drivers/bluetooth/hci_bcm.c

968

+@@ -1544,8 +1544,10 @@ static const struct of_device_id bcm_bluetooth_of_match[] = {

969

+ 	{ .compatible = "brcm,bcm43430a0-bt" },

970

+ 	{ .compatible = "brcm,bcm43430a1-bt" },

971

+ 	{ .compatible = "brcm,bcm43438-bt", .data = &bcm43438_device_data },

972

++	{ .compatible = "brcm,bcm4349-bt", .data = &bcm43438_device_data },

973

+ 	{ .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data },

974

+ 	{ .compatible = "brcm,bcm4335a0" },

975

++	{ .compatible = "infineon,cyw55572-bt" },

976

+ 	{ },

977

+ };

978

+ MODULE_DEVICE_TABLE(of, bcm_bluetooth_of_match);

979

+diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c

980

+index eab34e24d9446..8df11016fd51b 100644

981

+--- a/drivers/bluetooth/hci_qca.c

982

++++ b/drivers/bluetooth/hci_qca.c

983

+@@ -1588,7 +1588,7 @@ static bool qca_wakeup(struct hci_dev *hdev)

984

+ 	wakeup = device_may_wakeup(hu->serdev->ctrl->dev.parent);

985

+ 	bt_dev_dbg(hu->hdev, "wakeup status : %d", wakeup);

986

+

987

+-	return !wakeup;

988

++	return wakeup;

989

+ }

990

+

991

+ static int qca_regulator_init(struct hci_uart *hu)

992

+diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c

993

+index 73b3961890397..afb0942ccc293 100644

994

+--- a/drivers/macintosh/adb.c

995

++++ b/drivers/macintosh/adb.c

996

+@@ -647,7 +647,7 @@ do_adb_query(struct adb_request *req)

997

+

998

+ 	switch(req->data[1]) {

999

+ 	case ADB_QUERY_GETDEVINFO:

1000

+-		if (req->nbytes < 3)

1001

++		if (req->nbytes < 3 || req->data[2] >= 16)

1002

+ 			break;

1003

+ 		mutex_lock(&adb_handler_mutex);

1004

+ 		req->reply[0] = adb_handler[req->data[2]].original_address;

1005

+diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h

1006

+index 19db5693175fe..2a0ead57db71c 100644

1007

+--- a/fs/btrfs/block-group.h

1008

++++ b/fs/btrfs/block-group.h

1009

+@@ -104,6 +104,7 @@ struct btrfs_block_group {

1010

+ 	unsigned int relocating_repair:1;

1011

+ 	unsigned int chunk_item_inserted:1;

1012

+ 	unsigned int zone_is_active:1;

1013

++	unsigned int zoned_data_reloc_ongoing:1;

1014

+

1015

+ 	int disk_cache_state;

1016

+

1017

+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

1018

+index 6aa92f84f4654..f45ecd939a2cb 100644

1019

+--- a/fs/btrfs/extent-tree.c

1020

++++ b/fs/btrfs/extent-tree.c

1021

+@@ -3836,7 +3836,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,

1022

+ 	       block_group->start == fs_info->data_reloc_bg ||

1023

+ 	       fs_info->data_reloc_bg == 0);

1024

+

1025

+-	if (block_group->ro) {

1026

++	if (block_group->ro || block_group->zoned_data_reloc_ongoing) {

1027

+ 		ret = 1;

1028

+ 		goto out;

1029

+ 	}

1030

+@@ -3898,8 +3898,24 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,

1031

+ out:

1032

+ 	if (ret && ffe_ctl->for_treelog)

1033

+ 		fs_info->treelog_bg = 0;

1034

+-	if (ret && ffe_ctl->for_data_reloc)

1035

++	if (ret && ffe_ctl->for_data_reloc &&

1036

++	    fs_info->data_reloc_bg == block_group->start) {

1037

++		/*

1038

++		 * Do not allow further allocations from this block group.

1039

++		 * Compared to increasing the ->ro, setting the

1040

++		 * ->zoned_data_reloc_ongoing flag still allows nocow

1041

++		 *  writers to come in. See btrfs_inc_nocow_writers().

1042

++		 *

1043

++		 * We need to disable an allocation to avoid an allocation of

1044

++		 * regular (non-relocation data) extent. With mix of relocation

1045

++		 * extents and regular extents, we can dispatch WRITE commands

1046

++		 * (for relocation extents) and ZONE APPEND commands (for

1047

++		 * regular extents) at the same time to the same zone, which

1048

++		 * easily break the write pointer.

1049

++		 */

1050

++		block_group->zoned_data_reloc_ongoing = 1;

1051

+ 		fs_info->data_reloc_bg = 0;

1052

++	}

1053

+ 	spin_unlock(&fs_info->relocation_bg_lock);

1054

+ 	spin_unlock(&fs_info->treelog_bg_lock);

1055

+ 	spin_unlock(&block_group->lock);

1056

+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

1057

+index a23a42ba88cae..68ddd90685d9d 100644

1058

+--- a/fs/btrfs/extent_io.c

1059

++++ b/fs/btrfs/extent_io.c

1060

+@@ -5214,13 +5214,14 @@ int extent_writepages(struct address_space *mapping,

1061

+ 	 */

1062

+ 	btrfs_zoned_data_reloc_lock(BTRFS_I(inode));

1063

+ 	ret = extent_write_cache_pages(mapping, wbc, &epd);

1064

+-	btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));

1065

+ 	ASSERT(ret <= 0);

1066

+ 	if (ret < 0) {

1067

++		btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));

1068

+ 		end_write_bio(&epd, ret);

1069

+ 		return ret;

1070

+ 	}

1071

+ 	ret = flush_write_bio(&epd);

1072

++	btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));

1073

+ 	return ret;

1074

+ }

1075

+

1076

+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

1077

+index 9ae79342631a8..5d15e374d0326 100644

1078

+--- a/fs/btrfs/inode.c

1079

++++ b/fs/btrfs/inode.c

1080

+@@ -3102,6 +3102,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)

1081

+ 						ordered_extent->file_offset,

1082

+ 						ordered_extent->file_offset +

1083

+ 						logical_len);

1084

++		btrfs_zoned_release_data_reloc_bg(fs_info, ordered_extent->disk_bytenr,

1085

++						  ordered_extent->disk_num_bytes);

1086

+ 	} else {

1087

+ 		BUG_ON(root == fs_info->tree_root);

1088

+ 		ret = insert_ordered_extent_file_extent(trans, ordered_extent);

1089

+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c

1090

+index 5091d679a602c..84b6d39509bd3 100644

1091

+--- a/fs/btrfs/zoned.c

1092

++++ b/fs/btrfs/zoned.c

1093

+@@ -2005,6 +2005,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len

1094

+ 	struct btrfs_device *device;

1095

+ 	u64 min_alloc_bytes;

1096

+ 	u64 physical;

1097

++	int i;

1098

+

1099

+ 	if (!btrfs_is_zoned(fs_info))

1100

+ 		return;

1101

+@@ -2039,13 +2040,25 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len

1102

+ 	spin_unlock(&block_group->lock);

1103

+

1104

+ 	map = block_group->physical_map;

1105

+-	device = map->stripes[0].dev;

1106

+-	physical = map->stripes[0].physical;

1107

++	for (i = 0; i < map->num_stripes; i++) {

1108

++		int ret;

1109

+

1110

+-	if (!device->zone_info->max_active_zones)

1111

+-		goto out;

1112

++		device = map->stripes[i].dev;

1113

++		physical = map->stripes[i].physical;

1114

++

1115

++		if (device->zone_info->max_active_zones == 0)

1116

++			continue;

1117

+

1118

+-	btrfs_dev_clear_active_zone(device, physical);

1119

++		ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,

1120

++				       physical >> SECTOR_SHIFT,

1121

++				       device->zone_info->zone_size >> SECTOR_SHIFT,

1122

++				       GFP_NOFS);

1123

++

1124

++		if (ret)

1125

++			return;

1126

++

1127

++		btrfs_dev_clear_active_zone(device, physical);

1128

++	}

1129

+

1130

+ 	spin_lock(&fs_info->zone_active_bgs_lock);

1131

+ 	ASSERT(!list_empty(&block_group->active_bg_list));

1132

+@@ -2116,3 +2129,30 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info)

1133

+ 	}

1134

+ 	mutex_unlock(&fs_devices->device_list_mutex);

1135

+ }

1136

++

1137

++void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,

1138

++				       u64 length)

1139

++{

1140

++	struct btrfs_block_group *block_group;

1141

++

1142

++	if (!btrfs_is_zoned(fs_info))

1143

++		return;

1144

++

1145

++	block_group = btrfs_lookup_block_group(fs_info, logical);

1146

++	/* It should be called on a previous data relocation block group. */

1147

++	ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA));

1148

++

1149

++	spin_lock(&block_group->lock);

1150

++	if (!block_group->zoned_data_reloc_ongoing)

1151

++		goto out;

1152

++

1153

++	/* All relocation extents are written. */

1154

++	if (block_group->start + block_group->alloc_offset == logical + length) {

1155

++		/* Now, release this block group for further allocations. */

1156

++		block_group->zoned_data_reloc_ongoing = 0;

1157

++	}

1158

++

1159

++out:

1160

++	spin_unlock(&block_group->lock);

1161

++	btrfs_put_block_group(block_group);

1162

++}

1163

+diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h

1164

+index 2d898970aec5f..cf6320feef464 100644

1165

+--- a/fs/btrfs/zoned.h

1166

++++ b/fs/btrfs/zoned.h

1167

+@@ -80,6 +80,8 @@ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,

1168

+ 				   struct extent_buffer *eb);

1169

+ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);

1170

+ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);

1171

++void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,

1172

++				       u64 length);

1173

+ #else /* CONFIG_BLK_DEV_ZONED */

1174

+ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,

1175

+ 				     struct blk_zone *zone)

1176

+@@ -241,6 +243,9 @@ static inline void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,

1177

+ static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }

1178

+

1179

+ static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { }

1180

++

1181

++static inline void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info,

1182

++						     u64 logical, u64 length) { }

1183

+ #endif

1184

+

1185

+ static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)

1186

+diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h

1187

+index 3f53bc27a19bf..3d088a88f8320 100644

1188

+--- a/include/linux/ioprio.h

1189

++++ b/include/linux/ioprio.h

1190

+@@ -11,7 +11,7 @@

1191

+ /*

1192

+  * Default IO priority.

1193

+  */

1194

+-#define IOPRIO_DEFAULT	IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM)

1195

++#define IOPRIO_DEFAULT	IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0)

1196

+

1197

+ /*

1198

+  * Check that a priority value has a valid class.

1199

+diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c

1200

+index 9d09f489b60e0..2e0f75bcb7fd1 100644

1201

+--- a/kernel/entry/kvm.c

1202

++++ b/kernel/entry/kvm.c

1203

+@@ -9,12 +9,6 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work)

1204

+ 		int ret;

1205

+

1206

+ 		if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) {

1207

+-			clear_notify_signal();

1208

+-			if (task_work_pending(current))

1209

+-				task_work_run();

1210

+-		}

1211

+-

1212

+-		if (ti_work & _TIF_SIGPENDING) {

1213

+ 			kvm_handle_signal_exit(vcpu);

1214

+ 			return -EINTR;

1215

+ 		}

1216

+diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h

1217

+index 5d09ded0c491f..04b7e3654ff77 100644

1218

+--- a/tools/arch/x86/include/asm/cpufeatures.h

1219

++++ b/tools/arch/x86/include/asm/cpufeatures.h

1220

+@@ -301,6 +301,7 @@

1221

+ #define X86_FEATURE_RETPOLINE_LFENCE	(11*32+13) /* "" Use LFENCE for Spectre variant 2 */

1222

+ #define X86_FEATURE_RETHUNK		(11*32+14) /* "" Use REturn THUNK */

1223

+ #define X86_FEATURE_UNRET		(11*32+15) /* "" AMD BTB untrain return */

1224

++#define X86_FEATURE_RSB_VMEXIT_LITE	(11*32+17) /* "" Fill RSB on VM-Exit when EIBRS is enabled */

1225

+

1226

+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */

1227

+ #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */

1228

+diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h

1229

+index ad084326f24c2..f951147cc7fdc 100644

1230

+--- a/tools/arch/x86/include/asm/msr-index.h

1231

++++ b/tools/arch/x86/include/asm/msr-index.h

1232

+@@ -148,6 +148,10 @@

1233

+ 						 * are restricted to targets in

1234

+ 						 * kernel.

1235

+ 						 */

1236

++#define ARCH_CAP_PBRSB_NO		BIT(24)	/*

1237

++						 * Not susceptible to Post-Barrier

1238

++						 * Return Stack Buffer Predictions.

1239

++						 */

1240

+

1241

+ #define MSR_IA32_FLUSH_CMD		0x0000010b

1242

+ #define L1D_FLUSH			BIT(0)	/*

1243

+diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat

1244

+index 5a5bd74f55bd5..9c366b3a676db 100755

1245

+--- a/tools/kvm/kvm_stat/kvm_stat

1246

++++ b/tools/kvm/kvm_stat/kvm_stat

1247

+@@ -1646,7 +1646,8 @@ Press any other key to refresh statistics immediately.

1248

+                          .format(values))

1249

+             if len(pids) > 1:

1250

+                 sys.exit('Error: Multiple processes found (pids: {}). Use "-p"'

1251

+-                         ' to specify the desired pid'.format(" ".join(pids)))

1252

++                         ' to specify the desired pid'

1253

++                         .format(" ".join(map(str, pids))))

1254

+             namespace.pid = pids[0]

1255

+

1256

+     argparser = argparse.ArgumentParser(description=description_text,

1257

+diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c

1258

+index e0b0164e9af85..be1d9728c4cea 100644

1259

+--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c

1260

++++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c

1261

+@@ -73,20 +73,19 @@ void ucall_uninit(struct kvm_vm *vm)

1262

+

1263

+ void ucall(uint64_t cmd, int nargs, ...)

1264

+ {

1265

+-	struct ucall uc = {

1266

+-		.cmd = cmd,

1267

+-	};

1268

++	struct ucall uc = {};

1269

+ 	va_list va;

1270

+ 	int i;

1271

+

1272

++	WRITE_ONCE(uc.cmd, cmd);

1273

+ 	nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;

1274

+

1275

+ 	va_start(va, nargs);

1276

+ 	for (i = 0; i < nargs; ++i)

1277

+-		uc.args[i] = va_arg(va, uint64_t);

1278

++		WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));

1279

+ 	va_end(va);

1280

+

1281

+-	*ucall_exit_mmio_addr = (vm_vaddr_t)&uc;

1282

++	WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc);

1283

+ }

1284

+

1285

+ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)

1286

+diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c

1287

+index 722df3a28791c..ddd68ba0c99fc 100644

1288

+--- a/tools/testing/selftests/kvm/lib/perf_test_util.c

1289

++++ b/tools/testing/selftests/kvm/lib/perf_test_util.c

1290

+@@ -110,6 +110,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,

1291

+ 	struct kvm_vm *vm;

1292

+ 	uint64_t guest_num_pages;

1293

+ 	uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);

1294

++	uint64_t region_end_gfn;

1295

+ 	int i;

1296

+

1297

+ 	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));

1298

+@@ -144,18 +145,29 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,

1299

+

1300

+ 	pta->vm = vm;

1301

+

1302

++	/* Put the test region at the top guest physical memory. */

1303

++	region_end_gfn = vm_get_max_gfn(vm) + 1;

1304

++

1305

++#ifdef __x86_64__

1306

++	/*

1307

++	 * When running vCPUs in L2, restrict the test region to 48 bits to

1308

++	 * avoid needing 5-level page tables to identity map L2.

1309

++	 */

1310

++	if (pta->nested)

1311

++		region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size);

1312

++#endif

1313

+ 	/*

1314

+ 	 * If there should be more memory in the guest test region than there

1315

+ 	 * can be pages in the guest, it will definitely cause problems.

1316

+ 	 */

1317

+-	TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),

1318

++	TEST_ASSERT(guest_num_pages < region_end_gfn,

1319

+ 		    "Requested more guest memory than address space allows.\n"

1320

+ 		    "    guest pages: %" PRIx64 " max gfn: %" PRIx64

1321

+ 		    " vcpus: %d wss: %" PRIx64 "]\n",

1322

+-		    guest_num_pages, vm_get_max_gfn(vm), vcpus,

1323

++		    guest_num_pages, region_end_gfn - 1, vcpus,

1324

+ 		    vcpu_memory_bytes);

1325

+

1326

+-	pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size;

1327

++	pta->gpa = (region_end_gfn - guest_num_pages) * pta->guest_page_size;

1328

+ 	pta->gpa = align_down(pta->gpa, backing_src_pagesz);

1329

+ #ifdef __s390x__

1330

+ 	/* Align to 1M (segment size) */

1331

+diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c

1332

+index e0b2bb1339b16..3330fb183c680 100644

1333

+--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c

1334

++++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c

1335

+@@ -44,7 +44,7 @@ static inline void nop_loop(void)

1336

+ {

1337

+ 	int i;

1338

+

1339

+-	for (i = 0; i < 1000000; i++)

1340

++	for (i = 0; i < 100000000; i++)

1341

+ 		asm volatile("nop");

1342

+ }

1343

+

1344

+@@ -56,12 +56,14 @@ static inline void check_tsc_msr_rdtsc(void)

1345

+ 	tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);

1346

+ 	GUEST_ASSERT(tsc_freq > 0);

1347

+

1348

+-	/* First, check MSR-based clocksource */

1349

++	/* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */

1350

+ 	r1 = rdtsc();

1351

+ 	t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);

1352

++	r1 = (r1 + rdtsc()) / 2;

1353

+ 	nop_loop();

1354

+ 	r2 = rdtsc();

1355

+ 	t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);

1356

++	r2 = (r2 + rdtsc()) / 2;

1357

+

1358

+ 	GUEST_ASSERT(r2 > r1 && t2 > t1);

1359

+

1360

+@@ -181,12 +183,14 @@ static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm)

1361

+ 	tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY);

1362

+ 	TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");

1363

+

1364

+-	/* First, check MSR-based clocksource */

1365

++	/* For increased accuracy, take mean rdtsc() before and afrer ioctl */

1366

+ 	r1 = rdtsc();

1367

+ 	t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);

1368

++	r1 = (r1 + rdtsc()) / 2;

1369

+ 	nop_loop();

1370

+ 	r2 = rdtsc();

1371

+ 	t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);

1372

++	r2 = (r2 + rdtsc()) / 2;

1373

+

1374

+ 	TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);

1375

+

1376

+diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c

1377

+index 9b68658b6bb85..5b98f3ee58a58 100644

1378

+--- a/tools/vm/slabinfo.c

1379

++++ b/tools/vm/slabinfo.c

1380

+@@ -233,6 +233,24 @@ static unsigned long read_slab_obj(struct slabinfo *s, const char *name)

1381

+ 	return l;

1382

+ }

1383

+

1384

++static unsigned long read_debug_slab_obj(struct slabinfo *s, const char *name)

1385

++{

1386

++	char x[128];

1387

++	FILE *f;

1388

++	size_t l;

1389

++

1390

++	snprintf(x, 128, "/sys/kernel/debug/slab/%s/%s", s->name, name);

1391

++	f = fopen(x, "r");

1392

++	if (!f) {

1393

++		buffer[0] = 0;

1394

++		l = 0;

1395

++	} else {

1396

++		l = fread(buffer, 1, sizeof(buffer), f);

1397

++		buffer[l] = 0;

1398

++		fclose(f);

1399

++	}

1400

++	return l;

1401

++}

1402

+

1403

+ /*

1404

+  * Put a size string together

1405

+@@ -409,14 +427,18 @@ static void show_tracking(struct slabinfo *s)

1406

+ {

1407

+ 	printf("\n%s: Kernel object allocation\n", s->name);

1408

+ 	printf("-----------------------------------------------------------------------\n");

1409

+-	if (read_slab_obj(s, "alloc_calls"))

1410

++	if (read_debug_slab_obj(s, "alloc_traces"))

1411

++		printf("%s", buffer);

1412

++	else if (read_slab_obj(s, "alloc_calls"))

1413

+ 		printf("%s", buffer);

1414

+ 	else

1415

+ 		printf("No Data\n");

1416

+

1417

+ 	printf("\n%s: Kernel object freeing\n", s->name);

1418

+ 	printf("------------------------------------------------------------------------\n");

1419

+-	if (read_slab_obj(s, "free_calls"))

1420

++	if (read_debug_slab_obj(s, "free_traces"))

1421

++		printf("%s", buffer);

1422

++	else if (read_slab_obj(s, "free_calls"))

1423

+ 		printf("%s", buffer);

1424

+ 	else

1425

+ 		printf("No Data\n");

1426

+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

1427

+index 24cb37d19c638..7f1d19689701b 100644

1428

+--- a/virt/kvm/kvm_main.c

1429

++++ b/virt/kvm/kvm_main.c

1430

+@@ -3327,9 +3327,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)

1431

+

1432

+ 	vcpu->stat.generic.blocking = 1;

1433

+

1434

++	preempt_disable();

1435

+ 	kvm_arch_vcpu_blocking(vcpu);

1436

+-

1437

+ 	prepare_to_rcuwait(wait);

1438

++	preempt_enable();

1439

++

1440

+ 	for (;;) {

1441

+ 		set_current_state(TASK_INTERRUPTIBLE);

1442

+

1443

+@@ -3339,9 +3341,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)

1444

+ 		waited = true;

1445

+ 		schedule();

1446

+ 	}

1447

+-	finish_rcuwait(wait);

1448

+

1449

++	preempt_disable();

1450

++	finish_rcuwait(wait);

1451

+ 	kvm_arch_vcpu_unblocking(vcpu);

1452

++	preempt_enable();

1453

+

1454

+ 	vcpu->stat.generic.blocking = 0;

1455

+

Gentoo Archives: gentoo-commits