Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:4.14 commit in: /
Date: Fri, 28 Feb 2020 16:34:56
Message-Id: 1582907675.8223e3c16d2d2439d51839abc37579c3c7b058c7.mpagano@gentoo
1 commit: 8223e3c16d2d2439d51839abc37579c3c7b058c7
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Fri Feb 28 16:34:35 2020 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Fri Feb 28 16:34:35 2020 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=8223e3c1
7
8 Linux patch 4.14.172
9
10 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>
11
12 0000_README | 4 +
13 1171_linux-4.14.172.patch | 15749 ++++++++++++++++++++++++++++++++++++++++++++
14 2 files changed, 15753 insertions(+)
15
16 diff --git a/0000_README b/0000_README
17 index 9b3f4c4..699eb35 100644
18 --- a/0000_README
19 +++ b/0000_README
20 @@ -727,6 +727,10 @@ Patch: 1170_linux-4.14.171.patch
21 From: https://www.kernel.org
22 Desc: Linux 4.14.171
23
24 +Patch: 1171_linux-4.14.172.patch
25 +From: https://www.kernel.org
26 +Desc: Linux 4.14.172
27 +
28 Patch: 1500_XATTR_USER_PREFIX.patch
29 From: https://bugs.gentoo.org/show_bug.cgi?id=470644
30 Desc: Support for namespace user.pax.* on tmpfs.
31
32 diff --git a/1171_linux-4.14.172.patch b/1171_linux-4.14.172.patch
33 new file mode 100644
34 index 0000000..6532eed
35 --- /dev/null
36 +++ b/1171_linux-4.14.172.patch
37 @@ -0,0 +1,15749 @@
38 +diff --git a/MAINTAINERS b/MAINTAINERS
39 +index 029f96c43250..e2dd302345c2 100644
40 +--- a/MAINTAINERS
41 ++++ b/MAINTAINERS
42 +@@ -6877,7 +6877,7 @@ M: Joonas Lahtinen <joonas.lahtinen@×××××××××××.com>
43 + M: Rodrigo Vivi <rodrigo.vivi@×××××.com>
44 + L: intel-gfx@×××××××××××××××××.org
45 + W: https://01.org/linuxgraphics/
46 +-B: https://01.org/linuxgraphics/documentation/how-report-bugs
47 ++B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
48 + C: irc://chat.freenode.net/intel-gfx
49 + Q: http://patchwork.freedesktop.org/project/intel-gfx/
50 + T: git git://anongit.freedesktop.org/drm-intel
51 +diff --git a/Makefile b/Makefile
52 +index f2657f4838db..6d3cecad7f1e 100644
53 +--- a/Makefile
54 ++++ b/Makefile
55 +@@ -1,7 +1,7 @@
56 + # SPDX-License-Identifier: GPL-2.0
57 + VERSION = 4
58 + PATCHLEVEL = 14
59 +-SUBLEVEL = 171
60 ++SUBLEVEL = 172
61 + EXTRAVERSION =
62 + NAME = Petit Gorille
63 +
64 +diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
65 +index cf69aab648fb..7a8fbe9a077b 100644
66 +--- a/arch/arm/Kconfig
67 ++++ b/arch/arm/Kconfig
68 +@@ -1533,12 +1533,10 @@ config THUMB2_KERNEL
69 + bool "Compile the kernel in Thumb-2 mode" if !CPU_THUMBONLY
70 + depends on (CPU_V7 || CPU_V7M) && !CPU_V6 && !CPU_V6K
71 + default y if CPU_THUMBONLY
72 +- select ARM_ASM_UNIFIED
73 + select ARM_UNWIND
74 + help
75 + By enabling this option, the kernel will be compiled in
76 +- Thumb-2 mode. A compiler/assembler that understand the unified
77 +- ARM-Thumb syntax is needed.
78 ++ Thumb-2 mode.
79 +
80 + If unsure, say N.
81 +
82 +@@ -1573,9 +1571,6 @@ config THUMB2_AVOID_R_ARM_THM_JUMP11
83 +
84 + Unless you are sure your tools don't have this problem, say Y.
85 +
86 +-config ARM_ASM_UNIFIED
87 +- bool
88 +-
89 + config ARM_PATCH_IDIV
90 + bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()"
91 + depends on CPU_32v7 && !XIP_KERNEL
92 +@@ -2010,7 +2005,7 @@ config XIP_PHYS_ADDR
93 + config KEXEC
94 + bool "Kexec system call (EXPERIMENTAL)"
95 + depends on (!SMP || PM_SLEEP_SMP)
96 +- depends on !CPU_V7M
97 ++ depends on MMU
98 + select KEXEC_CORE
99 + help
100 + kexec is a system call that implements the ability to shutdown your
101 +diff --git a/arch/arm/Makefile b/arch/arm/Makefile
102 +index 17e80f483281..234ee43b4438 100644
103 +--- a/arch/arm/Makefile
104 ++++ b/arch/arm/Makefile
105 +@@ -115,9 +115,11 @@ ifeq ($(CONFIG_ARM_UNWIND),y)
106 + CFLAGS_ABI +=-funwind-tables
107 + endif
108 +
109 ++# Accept old syntax despite ".syntax unified"
110 ++AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)
111 ++
112 + ifeq ($(CONFIG_THUMB2_KERNEL),y)
113 + AFLAGS_AUTOIT :=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it)
114 +-AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)
115 + CFLAGS_ISA :=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN)
116 + AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb
117 + # Work around buggy relocation from gas if requested:
118 +@@ -125,7 +127,7 @@ ifeq ($(CONFIG_THUMB2_AVOID_R_ARM_THM_JUMP11),y)
119 + CFLAGS_MODULE +=-fno-optimize-sibling-calls
120 + endif
121 + else
122 +-CFLAGS_ISA :=$(call cc-option,-marm,)
123 ++CFLAGS_ISA :=$(call cc-option,-marm,) $(AFLAGS_NOWARN)
124 + AFLAGS_ISA :=$(CFLAGS_ISA)
125 + endif
126 +
127 +diff --git a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi
128 +index 849eb3443cde..719e63092c2e 100644
129 +--- a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi
130 ++++ b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi
131 +@@ -587,7 +587,7 @@
132 + pinctrl-0 = <&pinctrl_usdhc2>;
133 + bus-width = <4>;
134 + cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>;
135 +- wp-gpios = <&gpio2 3 GPIO_ACTIVE_HIGH>;
136 ++ disable-wp;
137 + vmmc-supply = <&reg_3p3v_sd>;
138 + vqmmc-supply = <&reg_3p3v>;
139 + status = "okay";
140 +@@ -598,7 +598,7 @@
141 + pinctrl-0 = <&pinctrl_usdhc3>;
142 + bus-width = <4>;
143 + cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>;
144 +- wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>;
145 ++ disable-wp;
146 + vmmc-supply = <&reg_3p3v_sd>;
147 + vqmmc-supply = <&reg_3p3v>;
148 + status = "okay";
149 +@@ -1001,7 +1001,6 @@
150 + MX6QDL_PAD_SD2_DAT1__SD2_DATA1 0x17059
151 + MX6QDL_PAD_SD2_DAT2__SD2_DATA2 0x17059
152 + MX6QDL_PAD_SD2_DAT3__SD2_DATA3 0x17059
153 +- MX6QDL_PAD_NANDF_D3__GPIO2_IO03 0x40010040
154 + MX6QDL_PAD_NANDF_D2__GPIO2_IO02 0x40010040
155 + >;
156 + };
157 +@@ -1014,7 +1013,6 @@
158 + MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059
159 + MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059
160 + MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059
161 +- MX6QDL_PAD_NANDF_D1__GPIO2_IO01 0x40010040
162 + MX6QDL_PAD_NANDF_D0__GPIO2_IO00 0x40010040
163 +
164 + >;
165 +diff --git a/arch/arm/boot/dts/r8a7779.dtsi b/arch/arm/boot/dts/r8a7779.dtsi
166 +index 8ee0b2ca5d39..2face089d65b 100644
167 +--- a/arch/arm/boot/dts/r8a7779.dtsi
168 ++++ b/arch/arm/boot/dts/r8a7779.dtsi
169 +@@ -67,6 +67,14 @@
170 + <0xf0000100 0x100>;
171 + };
172 +
173 ++ timer@f0000200 {
174 ++ compatible = "arm,cortex-a9-global-timer";
175 ++ reg = <0xf0000200 0x100>;
176 ++ interrupts = <GIC_PPI 11
177 ++ (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>;
178 ++ clocks = <&cpg_clocks R8A7779_CLK_ZS>;
179 ++ };
180 ++
181 + timer@f0000600 {
182 + compatible = "arm,cortex-a9-twd-timer";
183 + reg = <0xf0000600 0x20>;
184 +diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
185 +index a91ae499614c..2c3b952be63e 100644
186 +--- a/arch/arm/include/asm/unified.h
187 ++++ b/arch/arm/include/asm/unified.h
188 +@@ -20,8 +20,10 @@
189 + #ifndef __ASM_UNIFIED_H
190 + #define __ASM_UNIFIED_H
191 +
192 +-#if defined(__ASSEMBLY__) && defined(CONFIG_ARM_ASM_UNIFIED)
193 ++#if defined(__ASSEMBLY__)
194 + .syntax unified
195 ++#else
196 ++__asm__(".syntax unified");
197 + #endif
198 +
199 + #ifdef CONFIG_CPU_V7M
200 +@@ -64,77 +66,4 @@
201 +
202 + #endif /* CONFIG_THUMB2_KERNEL */
203 +
204 +-#ifndef CONFIG_ARM_ASM_UNIFIED
205 +-
206 +-/*
207 +- * If the unified assembly syntax isn't used (in ARM mode), these
208 +- * macros expand to an empty string
209 +- */
210 +-#ifdef __ASSEMBLY__
211 +- .macro it, cond
212 +- .endm
213 +- .macro itt, cond
214 +- .endm
215 +- .macro ite, cond
216 +- .endm
217 +- .macro ittt, cond
218 +- .endm
219 +- .macro itte, cond
220 +- .endm
221 +- .macro itet, cond
222 +- .endm
223 +- .macro itee, cond
224 +- .endm
225 +- .macro itttt, cond
226 +- .endm
227 +- .macro ittte, cond
228 +- .endm
229 +- .macro ittet, cond
230 +- .endm
231 +- .macro ittee, cond
232 +- .endm
233 +- .macro itett, cond
234 +- .endm
235 +- .macro itete, cond
236 +- .endm
237 +- .macro iteet, cond
238 +- .endm
239 +- .macro iteee, cond
240 +- .endm
241 +-#else /* !__ASSEMBLY__ */
242 +-__asm__(
243 +-" .macro it, cond\n"
244 +-" .endm\n"
245 +-" .macro itt, cond\n"
246 +-" .endm\n"
247 +-" .macro ite, cond\n"
248 +-" .endm\n"
249 +-" .macro ittt, cond\n"
250 +-" .endm\n"
251 +-" .macro itte, cond\n"
252 +-" .endm\n"
253 +-" .macro itet, cond\n"
254 +-" .endm\n"
255 +-" .macro itee, cond\n"
256 +-" .endm\n"
257 +-" .macro itttt, cond\n"
258 +-" .endm\n"
259 +-" .macro ittte, cond\n"
260 +-" .endm\n"
261 +-" .macro ittet, cond\n"
262 +-" .endm\n"
263 +-" .macro ittee, cond\n"
264 +-" .endm\n"
265 +-" .macro itett, cond\n"
266 +-" .endm\n"
267 +-" .macro itete, cond\n"
268 +-" .endm\n"
269 +-" .macro iteet, cond\n"
270 +-" .endm\n"
271 +-" .macro iteee, cond\n"
272 +-" .endm\n");
273 +-#endif /* __ASSEMBLY__ */
274 +-
275 +-#endif /* CONFIG_ARM_ASM_UNIFIED */
276 +-
277 + #endif /* !__ASM_UNIFIED_H */
278 +diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
279 +index 6f372ec055dd..da2949586c7a 100644
280 +--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
281 ++++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
282 +@@ -788,6 +788,8 @@
283 + interrupts = <0 138 0>;
284 + phys = <&hsusb_phy2>;
285 + phy-names = "usb2-phy";
286 ++ snps,dis_u2_susphy_quirk;
287 ++ snps,dis_enblslpm_quirk;
288 + };
289 + };
290 +
291 +@@ -817,6 +819,8 @@
292 + interrupts = <0 131 0>;
293 + phys = <&hsusb_phy1>, <&ssusb_phy_0>;
294 + phy-names = "usb2-phy", "usb3-phy";
295 ++ snps,dis_u2_susphy_quirk;
296 ++ snps,dis_enblslpm_quirk;
297 + };
298 + };
299 + };
300 +diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
301 +index a91933b1e2e6..4cd4a793dc32 100644
302 +--- a/arch/arm64/include/asm/alternative.h
303 ++++ b/arch/arm64/include/asm/alternative.h
304 +@@ -30,13 +30,16 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt,
305 + void __init apply_alternatives_all(void);
306 + void apply_alternatives(void *start, size_t length);
307 +
308 +-#define ALTINSTR_ENTRY(feature,cb) \
309 ++#define ALTINSTR_ENTRY(feature) \
310 + " .word 661b - .\n" /* label */ \
311 +- " .if " __stringify(cb) " == 0\n" \
312 + " .word 663f - .\n" /* new instruction */ \
313 +- " .else\n" \
314 ++ " .hword " __stringify(feature) "\n" /* feature bit */ \
315 ++ " .byte 662b-661b\n" /* source len */ \
316 ++ " .byte 664f-663f\n" /* replacement len */
317 ++
318 ++#define ALTINSTR_ENTRY_CB(feature, cb) \
319 ++ " .word 661b - .\n" /* label */ \
320 + " .word " __stringify(cb) "- .\n" /* callback */ \
321 +- " .endif\n" \
322 + " .hword " __stringify(feature) "\n" /* feature bit */ \
323 + " .byte 662b-661b\n" /* source len */ \
324 + " .byte 664f-663f\n" /* replacement len */
325 +@@ -57,15 +60,14 @@ void apply_alternatives(void *start, size_t length);
326 + *
327 + * Alternatives with callbacks do not generate replacement instructions.
328 + */
329 +-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \
330 ++#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
331 + ".if "__stringify(cfg_enabled)" == 1\n" \
332 + "661:\n\t" \
333 + oldinstr "\n" \
334 + "662:\n" \
335 + ".pushsection .altinstructions,\"a\"\n" \
336 +- ALTINSTR_ENTRY(feature,cb) \
337 ++ ALTINSTR_ENTRY(feature) \
338 + ".popsection\n" \
339 +- " .if " __stringify(cb) " == 0\n" \
340 + ".pushsection .altinstr_replacement, \"a\"\n" \
341 + "663:\n\t" \
342 + newinstr "\n" \
343 +@@ -73,17 +75,25 @@ void apply_alternatives(void *start, size_t length);
344 + ".popsection\n\t" \
345 + ".org . - (664b-663b) + (662b-661b)\n\t" \
346 + ".org . - (662b-661b) + (664b-663b)\n" \
347 +- ".else\n\t" \
348 ++ ".endif\n"
349 ++
350 ++#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \
351 ++ ".if "__stringify(cfg_enabled)" == 1\n" \
352 ++ "661:\n\t" \
353 ++ oldinstr "\n" \
354 ++ "662:\n" \
355 ++ ".pushsection .altinstructions,\"a\"\n" \
356 ++ ALTINSTR_ENTRY_CB(feature, cb) \
357 ++ ".popsection\n" \
358 + "663:\n\t" \
359 + "664:\n\t" \
360 +- ".endif\n" \
361 + ".endif\n"
362 +
363 + #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
364 +- __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0)
365 ++ __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
366 +
367 + #define ALTERNATIVE_CB(oldinstr, cb) \
368 +- __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb)
369 ++ __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
370 + #else
371 +
372 + #include <asm/assembler.h>
373 +diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
374 +index c477fd34a912..6b3bb67596ae 100644
375 +--- a/arch/arm64/kernel/cpufeature.c
376 ++++ b/arch/arm64/kernel/cpufeature.c
377 +@@ -41,9 +41,7 @@ EXPORT_SYMBOL_GPL(elf_hwcap);
378 + #define COMPAT_ELF_HWCAP_DEFAULT \
379 + (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
380 + COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
381 +- COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
382 +- COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
383 +- COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
384 ++ COMPAT_HWCAP_TLS|COMPAT_HWCAP_IDIV|\
385 + COMPAT_HWCAP_LPAE)
386 + unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
387 + unsigned int compat_elf_hwcap2 __read_mostly;
388 +@@ -1134,17 +1132,30 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
389 + {},
390 + };
391 +
392 +-#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \
393 +- { \
394 +- .desc = #cap, \
395 +- .type = ARM64_CPUCAP_SYSTEM_FEATURE, \
396 ++
397 ++#define HWCAP_CPUID_MATCH(reg, field, s, min_value) \
398 + .matches = has_cpuid_feature, \
399 + .sys_reg = reg, \
400 + .field_pos = field, \
401 + .sign = s, \
402 + .min_field_value = min_value, \
403 ++
404 ++#define __HWCAP_CAP(name, cap_type, cap) \
405 ++ .desc = name, \
406 ++ .type = ARM64_CPUCAP_SYSTEM_FEATURE, \
407 + .hwcap_type = cap_type, \
408 + .hwcap = cap, \
409 ++
410 ++#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \
411 ++ { \
412 ++ __HWCAP_CAP(#cap, cap_type, cap) \
413 ++ HWCAP_CPUID_MATCH(reg, field, s, min_value) \
414 ++ }
415 ++
416 ++#define HWCAP_CAP_MATCH(match, cap_type, cap) \
417 ++ { \
418 ++ __HWCAP_CAP(#cap, cap_type, cap) \
419 ++ .matches = match, \
420 + }
421 +
422 + static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
423 +@@ -1177,8 +1188,35 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
424 + {},
425 + };
426 +
427 ++#ifdef CONFIG_COMPAT
428 ++static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope)
429 ++{
430 ++ /*
431 ++ * Check that all of MVFR1_EL1.{SIMDSP, SIMDInt, SIMDLS} are available,
432 ++ * in line with that of arm32 as in vfp_init(). We make sure that the
433 ++ * check is future proof, by making sure value is non-zero.
434 ++ */
435 ++ u32 mvfr1;
436 ++
437 ++ WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible());
438 ++ if (scope == SCOPE_SYSTEM)
439 ++ mvfr1 = read_sanitised_ftr_reg(SYS_MVFR1_EL1);
440 ++ else
441 ++ mvfr1 = read_sysreg_s(SYS_MVFR1_EL1);
442 ++
443 ++ return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDSP_SHIFT) &&
444 ++ cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDINT_SHIFT) &&
445 ++ cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDLS_SHIFT);
446 ++}
447 ++#endif
448 ++
449 + static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = {
450 + #ifdef CONFIG_COMPAT
451 ++ HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON),
452 ++ HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4),
453 ++ /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */
454 ++ HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP),
455 ++ HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3),
456 + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
457 + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
458 + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
459 +diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
460 +index f4fdf6420ac5..4cd962f6c430 100644
461 +--- a/arch/arm64/kernel/fpsimd.c
462 ++++ b/arch/arm64/kernel/fpsimd.c
463 +@@ -206,8 +206,19 @@ void fpsimd_preserve_current_state(void)
464 + */
465 + void fpsimd_restore_current_state(void)
466 + {
467 +- if (!system_supports_fpsimd())
468 ++ /*
469 ++ * For the tasks that were created before we detected the absence of
470 ++ * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(),
471 ++ * e.g, init. This could be then inherited by the children processes.
472 ++ * If we later detect that the system doesn't support FP/SIMD,
473 ++ * we must clear the flag for all the tasks to indicate that the
474 ++ * FPSTATE is clean (as we can't have one) to avoid looping for ever in
475 ++ * do_notify_resume().
476 ++ */
477 ++ if (!system_supports_fpsimd()) {
478 ++ clear_thread_flag(TIF_FOREIGN_FPSTATE);
479 + return;
480 ++ }
481 +
482 + local_bh_disable();
483 +
484 +@@ -229,7 +240,7 @@ void fpsimd_restore_current_state(void)
485 + */
486 + void fpsimd_update_current_state(struct fpsimd_state *state)
487 + {
488 +- if (!system_supports_fpsimd())
489 ++ if (WARN_ON(!system_supports_fpsimd()))
490 + return;
491 +
492 + local_bh_disable();
493 +diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
494 +index 243fd247d04e..ee5ce03c9315 100644
495 +--- a/arch/arm64/kernel/process.c
496 ++++ b/arch/arm64/kernel/process.c
497 +@@ -354,6 +354,13 @@ static void ssbs_thread_switch(struct task_struct *next)
498 + if (unlikely(next->flags & PF_KTHREAD))
499 + return;
500 +
501 ++ /*
502 ++ * If all CPUs implement the SSBS extension, then we just need to
503 ++ * context-switch the PSTATE field.
504 ++ */
505 ++ if (cpu_have_feature(cpu_feature(SSBS)))
506 ++ return;
507 ++
508 + /* If the mitigation is enabled, then we leave SSBS clear. */
509 + if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) ||
510 + test_tsk_thread_flag(next, TIF_SSBD))
511 +diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
512 +index 242527f29c41..e230b4dff960 100644
513 +--- a/arch/arm64/kernel/ptrace.c
514 ++++ b/arch/arm64/kernel/ptrace.c
515 +@@ -624,6 +624,13 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
516 + return 0;
517 + }
518 +
519 ++static int fpr_active(struct task_struct *target, const struct user_regset *regset)
520 ++{
521 ++ if (!system_supports_fpsimd())
522 ++ return -ENODEV;
523 ++ return regset->n;
524 ++}
525 ++
526 + /*
527 + * TODO: update fp accessors for lazy context switching (sync/flush hwstate)
528 + */
529 +@@ -634,6 +641,9 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,
530 + struct user_fpsimd_state *uregs;
531 + uregs = &target->thread.fpsimd_state.user_fpsimd;
532 +
533 ++ if (!system_supports_fpsimd())
534 ++ return -EINVAL;
535 ++
536 + if (target == current)
537 + fpsimd_preserve_current_state();
538 +
539 +@@ -648,6 +658,9 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
540 + struct user_fpsimd_state newstate =
541 + target->thread.fpsimd_state.user_fpsimd;
542 +
543 ++ if (!system_supports_fpsimd())
544 ++ return -EINVAL;
545 ++
546 + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1);
547 + if (ret)
548 + return ret;
549 +@@ -740,6 +753,7 @@ static const struct user_regset aarch64_regsets[] = {
550 + */
551 + .size = sizeof(u32),
552 + .align = sizeof(u32),
553 ++ .active = fpr_active,
554 + .get = fpr_get,
555 + .set = fpr_set
556 + },
557 +@@ -914,6 +928,9 @@ static int compat_vfp_get(struct task_struct *target,
558 + compat_ulong_t fpscr;
559 + int ret, vregs_end_pos;
560 +
561 ++ if (!system_supports_fpsimd())
562 ++ return -EINVAL;
563 ++
564 + uregs = &target->thread.fpsimd_state.user_fpsimd;
565 +
566 + if (target == current)
567 +@@ -947,6 +964,9 @@ static int compat_vfp_set(struct task_struct *target,
568 + compat_ulong_t fpscr;
569 + int ret, vregs_end_pos;
570 +
571 ++ if (!system_supports_fpsimd())
572 ++ return -EINVAL;
573 ++
574 + uregs = &target->thread.fpsimd_state.user_fpsimd;
575 +
576 + vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t);
577 +@@ -1004,6 +1024,7 @@ static const struct user_regset aarch32_regsets[] = {
578 + .n = VFP_STATE_SIZE / sizeof(compat_ulong_t),
579 + .size = sizeof(compat_ulong_t),
580 + .align = sizeof(compat_ulong_t),
581 ++ .active = fpr_active,
582 + .get = compat_vfp_get,
583 + .set = compat_vfp_set
584 + },
585 +diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c
586 +index 0bde47e4fa69..dcba53803fa5 100644
587 +--- a/arch/microblaze/kernel/cpu/cache.c
588 ++++ b/arch/microblaze/kernel/cpu/cache.c
589 +@@ -92,7 +92,8 @@ static inline void __disable_dcache_nomsr(void)
590 + #define CACHE_LOOP_LIMITS(start, end, cache_line_length, cache_size) \
591 + do { \
592 + int align = ~(cache_line_length - 1); \
593 +- end = min(start + cache_size, end); \
594 ++ if (start < UINT_MAX - cache_size) \
595 ++ end = min(start + cache_size, end); \
596 + start &= align; \
597 + } while (0)
598 +
599 +diff --git a/arch/mips/loongson64/loongson-3/platform.c b/arch/mips/loongson64/loongson-3/platform.c
600 +index 25a97cc0ee33..0db4cc3196eb 100644
601 +--- a/arch/mips/loongson64/loongson-3/platform.c
602 ++++ b/arch/mips/loongson64/loongson-3/platform.c
603 +@@ -31,6 +31,9 @@ static int __init loongson3_platform_init(void)
604 + continue;
605 +
606 + pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL);
607 ++ if (!pdev)
608 ++ return -ENOMEM;
609 ++
610 + pdev->name = loongson_sysconf.sensors[i].name;
611 + pdev->id = loongson_sysconf.sensors[i].id;
612 + pdev->dev.platform_data = &loongson_sysconf.sensors[i];
613 +diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
614 +index 470284f9e4f6..5a48c93aaa1b 100644
615 +--- a/arch/powerpc/kernel/eeh_driver.c
616 ++++ b/arch/powerpc/kernel/eeh_driver.c
617 +@@ -520,12 +520,6 @@ static void *eeh_rmv_device(void *data, void *userdata)
618 +
619 + pci_iov_remove_virtfn(edev->physfn, pdn->vf_index, 0);
620 + edev->pdev = NULL;
621 +-
622 +- /*
623 +- * We have to set the VF PE number to invalid one, which is
624 +- * required to plug the VF successfully.
625 +- */
626 +- pdn->pe_number = IODA_INVALID_PE;
627 + #endif
628 + if (rmv_data)
629 + list_add(&edev->rmv_list, &rmv_data->edev_list);
630 +diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
631 +index 0e395afbf0f4..0e45a446a8c7 100644
632 +--- a/arch/powerpc/kernel/pci_dn.c
633 ++++ b/arch/powerpc/kernel/pci_dn.c
634 +@@ -261,9 +261,22 @@ void remove_dev_pci_data(struct pci_dev *pdev)
635 + continue;
636 +
637 + #ifdef CONFIG_EEH
638 +- /* Release EEH device for the VF */
639 ++ /*
640 ++ * Release EEH state for this VF. The PCI core
641 ++ * has already torn down the pci_dev for this VF, but
642 ++ * we're responsible to removing the eeh_dev since it
643 ++ * has the same lifetime as the pci_dn that spawned it.
644 ++ */
645 + edev = pdn_to_eeh_dev(pdn);
646 + if (edev) {
647 ++ /*
648 ++ * We allocate pci_dn's for the totalvfs count,
649 ++ * but only only the vfs that were activated
650 ++ * have a configured PE.
651 ++ */
652 ++ if (edev->pe)
653 ++ eeh_rmv_from_parent_pe(edev);
654 ++
655 + pdn->edev = NULL;
656 + kfree(edev);
657 + }
658 +diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
659 +index d3d5796f7df6..36ef504eeab3 100644
660 +--- a/arch/powerpc/platforms/powernv/pci-ioda.c
661 ++++ b/arch/powerpc/platforms/powernv/pci-ioda.c
662 +@@ -1523,6 +1523,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
663 +
664 + /* Reserve PE for each VF */
665 + for (vf_index = 0; vf_index < num_vfs; vf_index++) {
666 ++ int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index);
667 ++ int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
668 ++ struct pci_dn *vf_pdn;
669 ++
670 + if (pdn->m64_single_mode)
671 + pe_num = pdn->pe_num_map[vf_index];
672 + else
673 +@@ -1535,13 +1539,11 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
674 + pe->pbus = NULL;
675 + pe->parent_dev = pdev;
676 + pe->mve_number = -1;
677 +- pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
678 +- pci_iov_virtfn_devfn(pdev, vf_index);
679 ++ pe->rid = (vf_bus << 8) | vf_devfn;
680 +
681 + pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
682 + hose->global_number, pdev->bus->number,
683 +- PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
684 +- PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num);
685 ++ PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
686 +
687 + if (pnv_ioda_configure_pe(phb, pe)) {
688 + /* XXX What do we do here ? */
689 +@@ -1555,6 +1557,15 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
690 + list_add_tail(&pe->list, &phb->ioda.pe_list);
691 + mutex_unlock(&phb->ioda.pe_list_mutex);
692 +
693 ++ /* associate this pe to it's pdn */
694 ++ list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) {
695 ++ if (vf_pdn->busno == vf_bus &&
696 ++ vf_pdn->devfn == vf_devfn) {
697 ++ vf_pdn->pe_number = pe_num;
698 ++ break;
699 ++ }
700 ++ }
701 ++
702 + pnv_pci_ioda2_setup_dma_pe(phb, pe);
703 + }
704 + }
705 +diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
706 +index 961c131a5b7e..844ca1886063 100644
707 +--- a/arch/powerpc/platforms/powernv/pci.c
708 ++++ b/arch/powerpc/platforms/powernv/pci.c
709 +@@ -978,16 +978,12 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
710 + struct pnv_phb *phb = hose->private_data;
711 + #ifdef CONFIG_PCI_IOV
712 + struct pnv_ioda_pe *pe;
713 +- struct pci_dn *pdn;
714 +
715 + /* Fix the VF pdn PE number */
716 + if (pdev->is_virtfn) {
717 +- pdn = pci_get_pdn(pdev);
718 +- WARN_ON(pdn->pe_number != IODA_INVALID_PE);
719 + list_for_each_entry(pe, &phb->ioda.pe_list, list) {
720 + if (pe->rid == ((pdev->bus->number << 8) |
721 + (pdev->devfn & 0xff))) {
722 +- pdn->pe_number = pe->pe_number;
723 + pe->pdev = pdev;
724 + break;
725 + }
726 +diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
727 +index 779c589b7089..5f2e272895ff 100644
728 +--- a/arch/s390/include/asm/page.h
729 ++++ b/arch/s390/include/asm/page.h
730 +@@ -42,7 +42,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end);
731 +
732 + static inline void storage_key_init_range(unsigned long start, unsigned long end)
733 + {
734 +- if (PAGE_DEFAULT_KEY)
735 ++ if (PAGE_DEFAULT_KEY != 0)
736 + __storage_key_init_range(start, end);
737 + }
738 +
739 +diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
740 +index 2dc9eb4e1acc..b6a4ce9dafaf 100644
741 +--- a/arch/s390/include/asm/timex.h
742 ++++ b/arch/s390/include/asm/timex.h
743 +@@ -155,7 +155,7 @@ static inline void get_tod_clock_ext(char *clk)
744 +
745 + static inline unsigned long long get_tod_clock(void)
746 + {
747 +- unsigned char clk[STORE_CLOCK_EXT_SIZE];
748 ++ char clk[STORE_CLOCK_EXT_SIZE];
749 +
750 + get_tod_clock_ext(clk);
751 + return *((unsigned long long *)&clk[1]);
752 +diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
753 +index 27110f3294ed..0cfd5a83a1da 100644
754 +--- a/arch/s390/kernel/mcount.S
755 ++++ b/arch/s390/kernel/mcount.S
756 +@@ -25,6 +25,12 @@ ENTRY(ftrace_stub)
757 + #define STACK_PTREGS (STACK_FRAME_OVERHEAD)
758 + #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
759 + #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
760 ++#ifdef __PACK_STACK
761 ++/* allocate just enough for r14, r15 and backchain */
762 ++#define TRACED_FUNC_FRAME_SIZE 24
763 ++#else
764 ++#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD
765 ++#endif
766 +
767 + ENTRY(_mcount)
768 + BR_EX %r14
769 +@@ -38,9 +44,16 @@ ENTRY(ftrace_caller)
770 + #ifndef CC_USING_HOTPATCH
771 + aghi %r0,MCOUNT_RETURN_FIXUP
772 + #endif
773 +- aghi %r15,-STACK_FRAME_SIZE
774 ++ # allocate stack frame for ftrace_caller to contain traced function
775 ++ aghi %r15,-TRACED_FUNC_FRAME_SIZE
776 + stg %r1,__SF_BACKCHAIN(%r15)
777 ++ stg %r0,(__SF_GPRS+8*8)(%r15)
778 ++ stg %r15,(__SF_GPRS+9*8)(%r15)
779 ++ # allocate pt_regs and stack frame for ftrace_trace_function
780 ++ aghi %r15,-STACK_FRAME_SIZE
781 + stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15)
782 ++ aghi %r1,-TRACED_FUNC_FRAME_SIZE
783 ++ stg %r1,__SF_BACKCHAIN(%r15)
784 + stg %r0,(STACK_PTREGS_PSW+8)(%r15)
785 + stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
786 + #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
787 +diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
788 +index 28f3796d23c8..61d25e2c82ef 100644
789 +--- a/arch/s390/kvm/interrupt.c
790 ++++ b/arch/s390/kvm/interrupt.c
791 +@@ -1913,7 +1913,7 @@ static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr)
792 + return -EINVAL;
793 +
794 + if (!test_kvm_facility(kvm, 72))
795 +- return -ENOTSUPP;
796 ++ return -EOPNOTSUPP;
797 +
798 + mutex_lock(&fi->ais_lock);
799 + ais.simm = fi->simm;
800 +@@ -2214,7 +2214,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr)
801 + int ret = 0;
802 +
803 + if (!test_kvm_facility(kvm, 72))
804 +- return -ENOTSUPP;
805 ++ return -EOPNOTSUPP;
806 +
807 + if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
808 + return -EFAULT;
809 +@@ -2294,7 +2294,7 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
810 + struct kvm_s390_ais_all ais;
811 +
812 + if (!test_kvm_facility(kvm, 72))
813 +- return -ENOTSUPP;
814 ++ return -EOPNOTSUPP;
815 +
816 + if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais)))
817 + return -EFAULT;
818 +diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7269.h b/arch/sh/include/cpu-sh2a/cpu/sh7269.h
819 +index d516e5d48818..b887cc402b71 100644
820 +--- a/arch/sh/include/cpu-sh2a/cpu/sh7269.h
821 ++++ b/arch/sh/include/cpu-sh2a/cpu/sh7269.h
822 +@@ -78,8 +78,15 @@ enum {
823 + GPIO_FN_WDTOVF,
824 +
825 + /* CAN */
826 +- GPIO_FN_CTX1, GPIO_FN_CRX1, GPIO_FN_CTX0, GPIO_FN_CTX0_CTX1,
827 +- GPIO_FN_CRX0, GPIO_FN_CRX0_CRX1, GPIO_FN_CRX0_CRX1_CRX2,
828 ++ GPIO_FN_CTX2, GPIO_FN_CRX2,
829 ++ GPIO_FN_CTX1, GPIO_FN_CRX1,
830 ++ GPIO_FN_CTX0, GPIO_FN_CRX0,
831 ++ GPIO_FN_CTX0_CTX1, GPIO_FN_CRX0_CRX1,
832 ++ GPIO_FN_CTX0_CTX1_CTX2, GPIO_FN_CRX0_CRX1_CRX2,
833 ++ GPIO_FN_CTX2_PJ21, GPIO_FN_CRX2_PJ20,
834 ++ GPIO_FN_CTX1_PJ23, GPIO_FN_CRX1_PJ22,
835 ++ GPIO_FN_CTX0_CTX1_PJ23, GPIO_FN_CRX0_CRX1_PJ22,
836 ++ GPIO_FN_CTX0_CTX1_CTX2_PJ21, GPIO_FN_CRX0_CRX1_CRX2_PJ20,
837 +
838 + /* DMAC */
839 + GPIO_FN_TEND0, GPIO_FN_DACK0, GPIO_FN_DREQ0,
840 +diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
841 +index 5a2344574f39..4323dc4ae4c7 100644
842 +--- a/arch/sparc/kernel/vmlinux.lds.S
843 ++++ b/arch/sparc/kernel/vmlinux.lds.S
844 +@@ -167,12 +167,14 @@ SECTIONS
845 + }
846 + PERCPU_SECTION(SMP_CACHE_BYTES)
847 +
848 +-#ifdef CONFIG_JUMP_LABEL
849 + . = ALIGN(PAGE_SIZE);
850 + .exit.text : {
851 + EXIT_TEXT
852 + }
853 +-#endif
854 ++
855 ++ .exit.data : {
856 ++ EXIT_DATA
857 ++ }
858 +
859 + . = ALIGN(PAGE_SIZE);
860 + __init_end = .;
861 +diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
862 +index 42d4c89f990e..ddff0ca6f509 100644
863 +--- a/arch/x86/entry/vdso/vdso32-setup.c
864 ++++ b/arch/x86/entry/vdso/vdso32-setup.c
865 +@@ -11,6 +11,7 @@
866 + #include <linux/smp.h>
867 + #include <linux/kernel.h>
868 + #include <linux/mm_types.h>
869 ++#include <linux/elf.h>
870 +
871 + #include <asm/processor.h>
872 + #include <asm/vdso.h>
873 +diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
874 +index defb536aebce..c3ec535fd36b 100644
875 +--- a/arch/x86/events/amd/core.c
876 ++++ b/arch/x86/events/amd/core.c
877 +@@ -245,6 +245,7 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
878 + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
879 + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
880 + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
881 ++ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
882 + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
883 + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
884 + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287,
885 +diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
886 +index 3310f9f6c3e1..550b7814ef92 100644
887 +--- a/arch/x86/events/intel/ds.c
888 ++++ b/arch/x86/events/intel/ds.c
889 +@@ -1368,6 +1368,8 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
890 + old = ((s64)(prev_raw_count << shift) >> shift);
891 + local64_add(new - old + count * period, &event->count);
892 +
893 ++ local64_set(&hwc->period_left, -new);
894 ++
895 + perf_event_update_userpage(event);
896 +
897 + return 0;
898 +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
899 +index d0e17813a9b0..2cdf654ed132 100644
900 +--- a/arch/x86/include/asm/kvm_host.h
901 ++++ b/arch/x86/include/asm/kvm_host.h
902 +@@ -1006,7 +1006,7 @@ struct kvm_x86_ops {
903 + void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
904 + void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
905 + void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
906 +- void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
907 ++ int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
908 + int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
909 + int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
910 + int (*get_tdp_level)(struct kvm_vcpu *vcpu);
911 +diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
912 +index a8f47697276b..bbe94b682119 100644
913 +--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
914 ++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
915 +@@ -1116,9 +1116,12 @@ static const struct sysfs_ops threshold_ops = {
916 + .store = store,
917 + };
918 +
919 ++static void threshold_block_release(struct kobject *kobj);
920 ++
921 + static struct kobj_type threshold_ktype = {
922 + .sysfs_ops = &threshold_ops,
923 + .default_attrs = default_attrs,
924 ++ .release = threshold_block_release,
925 + };
926 +
927 + static const char *get_name(unsigned int bank, struct threshold_block *b)
928 +@@ -1151,8 +1154,9 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
929 + return buf_mcatype;
930 + }
931 +
932 +-static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
933 +- unsigned int block, u32 address)
934 ++static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb,
935 ++ unsigned int bank, unsigned int block,
936 ++ u32 address)
937 + {
938 + struct threshold_block *b = NULL;
939 + u32 low, high;
940 +@@ -1196,16 +1200,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
941 +
942 + INIT_LIST_HEAD(&b->miscj);
943 +
944 +- if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
945 +- list_add(&b->miscj,
946 +- &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
947 +- } else {
948 +- per_cpu(threshold_banks, cpu)[bank]->blocks = b;
949 +- }
950 ++ if (tb->blocks)
951 ++ list_add(&b->miscj, &tb->blocks->miscj);
952 ++ else
953 ++ tb->blocks = b;
954 +
955 +- err = kobject_init_and_add(&b->kobj, &threshold_ktype,
956 +- per_cpu(threshold_banks, cpu)[bank]->kobj,
957 +- get_name(bank, b));
958 ++ err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b));
959 + if (err)
960 + goto out_free;
961 + recurse:
962 +@@ -1213,7 +1213,7 @@ recurse:
963 + if (!address)
964 + return 0;
965 +
966 +- err = allocate_threshold_blocks(cpu, bank, block, address);
967 ++ err = allocate_threshold_blocks(cpu, tb, bank, block, address);
968 + if (err)
969 + goto out_free;
970 +
971 +@@ -1298,8 +1298,6 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
972 + goto out_free;
973 + }
974 +
975 +- per_cpu(threshold_banks, cpu)[bank] = b;
976 +-
977 + if (is_shared_bank(bank)) {
978 + refcount_set(&b->cpus, 1);
979 +
980 +@@ -1310,9 +1308,13 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
981 + }
982 + }
983 +
984 +- err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank));
985 +- if (!err)
986 +- goto out;
987 ++ err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank));
988 ++ if (err)
989 ++ goto out_free;
990 ++
991 ++ per_cpu(threshold_banks, cpu)[bank] = b;
992 ++
993 ++ return 0;
994 +
995 + out_free:
996 + kfree(b);
997 +@@ -1321,8 +1323,12 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
998 + return err;
999 + }
1000 +
1001 +-static void deallocate_threshold_block(unsigned int cpu,
1002 +- unsigned int bank)
1003 ++static void threshold_block_release(struct kobject *kobj)
1004 ++{
1005 ++ kfree(to_block(kobj));
1006 ++}
1007 ++
1008 ++static void deallocate_threshold_block(unsigned int cpu, unsigned int bank)
1009 + {
1010 + struct threshold_block *pos = NULL;
1011 + struct threshold_block *tmp = NULL;
1012 +@@ -1332,13 +1338,11 @@ static void deallocate_threshold_block(unsigned int cpu,
1013 + return;
1014 +
1015 + list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
1016 +- kobject_put(&pos->kobj);
1017 + list_del(&pos->miscj);
1018 +- kfree(pos);
1019 ++ kobject_put(&pos->kobj);
1020 + }
1021 +
1022 +- kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
1023 +- per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
1024 ++ kobject_put(&head->blocks->kobj);
1025 + }
1026 +
1027 + static void __threshold_remove_blocks(struct threshold_bank *b)
1028 +diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c
1029 +index 85195d447a92..f3215346e47f 100644
1030 +--- a/arch/x86/kernel/sysfb_simplefb.c
1031 ++++ b/arch/x86/kernel/sysfb_simplefb.c
1032 +@@ -94,11 +94,11 @@ __init int create_simplefb(const struct screen_info *si,
1033 + if (si->orig_video_isVGA == VIDEO_TYPE_VLFB)
1034 + size <<= 16;
1035 + length = mode->height * mode->stride;
1036 +- length = PAGE_ALIGN(length);
1037 + if (length > size) {
1038 + printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
1039 + return -EINVAL;
1040 + }
1041 ++ length = PAGE_ALIGN(length);
1042 +
1043 + /* setup IORESOURCE_MEM as framebuffer memory */
1044 + memset(&res, 0, sizeof(res));
1045 +diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
1046 +index 38959b173a42..1152afad524f 100644
1047 +--- a/arch/x86/kvm/cpuid.c
1048 ++++ b/arch/x86/kvm/cpuid.c
1049 +@@ -291,13 +291,18 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
1050 + {
1051 + switch (func) {
1052 + case 0:
1053 +- entry->eax = 1; /* only one leaf currently */
1054 ++ entry->eax = 7;
1055 + ++*nent;
1056 + break;
1057 + case 1:
1058 + entry->ecx = F(MOVBE);
1059 + ++*nent;
1060 + break;
1061 ++ case 7:
1062 ++ entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1063 ++ if (index == 0)
1064 ++ entry->ecx = F(RDPID);
1065 ++ ++*nent;
1066 + default:
1067 + break;
1068 + }
1069 +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
1070 +index 041b9b05fae1..70f3636aff11 100644
1071 +--- a/arch/x86/kvm/emulate.c
1072 ++++ b/arch/x86/kvm/emulate.c
1073 +@@ -3539,6 +3539,16 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt)
1074 + return X86EMUL_CONTINUE;
1075 + }
1076 +
1077 ++static int em_rdpid(struct x86_emulate_ctxt *ctxt)
1078 ++{
1079 ++ u64 tsc_aux = 0;
1080 ++
1081 ++ if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
1082 ++ return emulate_gp(ctxt, 0);
1083 ++ ctxt->dst.val = tsc_aux;
1084 ++ return X86EMUL_CONTINUE;
1085 ++}
1086 ++
1087 + static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
1088 + {
1089 + u64 tsc = 0;
1090 +@@ -4431,10 +4441,20 @@ static const struct opcode group8[] = {
1091 + F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
1092 + };
1093 +
1094 ++/*
1095 ++ * The "memory" destination is actually always a register, since we come
1096 ++ * from the register case of group9.
1097 ++ */
1098 ++static const struct gprefix pfx_0f_c7_7 = {
1099 ++ N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),
1100 ++};
1101 ++
1102 ++
1103 + static const struct group_dual group9 = { {
1104 + N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
1105 + }, {
1106 +- N, N, N, N, N, N, N, N,
1107 ++ N, N, N, N, N, N, N,
1108 ++ GP(0, &pfx_0f_c7_7),
1109 + } };
1110 +
1111 + static const struct opcode group11[] = {
1112 +diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
1113 +index 3cc3b2d130a0..4d000aea05e0 100644
1114 +--- a/arch/x86/kvm/irq_comm.c
1115 ++++ b/arch/x86/kvm/irq_comm.c
1116 +@@ -427,7 +427,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
1117 +
1118 + kvm_set_msi_irq(vcpu->kvm, entry, &irq);
1119 +
1120 +- if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0,
1121 ++ if (irq.trig_mode && kvm_apic_match_dest(vcpu, NULL, 0,
1122 + irq.dest_id, irq.dest_mode))
1123 + __set_bit(irq.vector, ioapic_handled_vectors);
1124 + }
1125 +diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
1126 +index 8715711f2755..537c36b55b5d 100644
1127 +--- a/arch/x86/kvm/lapic.c
1128 ++++ b/arch/x86/kvm/lapic.c
1129 +@@ -566,9 +566,11 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
1130 + static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
1131 + {
1132 + u8 val;
1133 +- if (pv_eoi_get_user(vcpu, &val) < 0)
1134 ++ if (pv_eoi_get_user(vcpu, &val) < 0) {
1135 + apic_debug("Can't read EOI MSR value: 0x%llx\n",
1136 + (unsigned long long)vcpu->arch.pv_eoi.msr_val);
1137 ++ return false;
1138 ++ }
1139 + return val & 0x1;
1140 + }
1141 +
1142 +@@ -993,11 +995,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1143 + apic_clear_vector(vector, apic->regs + APIC_TMR);
1144 + }
1145 +
1146 +- if (vcpu->arch.apicv_active)
1147 +- kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
1148 +- else {
1149 ++ if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) {
1150 + kvm_lapic_set_irr(vector, apic);
1151 +-
1152 + kvm_make_request(KVM_REQ_EVENT, vcpu);
1153 + kvm_vcpu_kick(vcpu);
1154 + }
1155 +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
1156 +index 52edb8cf1c40..8e65a9b40c18 100644
1157 +--- a/arch/x86/kvm/svm.c
1158 ++++ b/arch/x86/kvm/svm.c
1159 +@@ -4631,8 +4631,11 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
1160 + return;
1161 + }
1162 +
1163 +-static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
1164 ++static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
1165 + {
1166 ++ if (!vcpu->arch.apicv_active)
1167 ++ return -1;
1168 ++
1169 + kvm_lapic_set_irr(vec, vcpu->arch.apic);
1170 + smp_mb__after_atomic();
1171 +
1172 +@@ -4641,6 +4644,8 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
1173 + kvm_cpu_get_apicid(vcpu->cpu));
1174 + else
1175 + kvm_vcpu_wake_up(vcpu);
1176 ++
1177 ++ return 0;
1178 + }
1179 +
1180 + static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
1181 +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
1182 +index 809d1b031fd9..acf72da288f9 100644
1183 +--- a/arch/x86/kvm/vmx.c
1184 ++++ b/arch/x86/kvm/vmx.c
1185 +@@ -4597,6 +4597,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1186 +
1187 + static int get_ept_level(struct kvm_vcpu *vcpu)
1188 + {
1189 ++ /* Nested EPT currently only supports 4-level walks. */
1190 ++ if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
1191 ++ return 4;
1192 + if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
1193 + return 5;
1194 + return 4;
1195 +@@ -4988,6 +4991,26 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
1196 + (ss.selector & SEGMENT_RPL_MASK));
1197 + }
1198 +
1199 ++static bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu,
1200 ++ unsigned int port, int size);
1201 ++static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
1202 ++ struct vmcs12 *vmcs12)
1203 ++{
1204 ++ unsigned long exit_qualification;
1205 ++ unsigned short port;
1206 ++ int size;
1207 ++
1208 ++ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
1209 ++ return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
1210 ++
1211 ++ exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
1212 ++
1213 ++ port = exit_qualification >> 16;
1214 ++ size = (exit_qualification & 7) + 1;
1215 ++
1216 ++ return nested_vmx_check_io_bitmaps(vcpu, port, size);
1217 ++}
1218 ++
1219 + /*
1220 + * Check if guest state is valid. Returns true if valid, false if
1221 + * not.
1222 +@@ -5518,24 +5541,29 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
1223 + * 2. If target vcpu isn't running(root mode), kick it to pick up the
1224 + * interrupt from PIR in next vmentry.
1225 + */
1226 +-static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
1227 ++static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
1228 + {
1229 + struct vcpu_vmx *vmx = to_vmx(vcpu);
1230 + int r;
1231 +
1232 + r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
1233 + if (!r)
1234 +- return;
1235 ++ return 0;
1236 ++
1237 ++ if (!vcpu->arch.apicv_active)
1238 ++ return -1;
1239 +
1240 + if (pi_test_and_set_pir(vector, &vmx->pi_desc))
1241 +- return;
1242 ++ return 0;
1243 +
1244 + /* If a previous notification has sent the IPI, nothing to do. */
1245 + if (pi_test_and_set_on(&vmx->pi_desc))
1246 +- return;
1247 ++ return 0;
1248 +
1249 + if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
1250 + kvm_vcpu_kick(vcpu);
1251 ++
1252 ++ return 0;
1253 + }
1254 +
1255 + /*
1256 +@@ -8518,23 +8546,17 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
1257 + static const int kvm_vmx_max_exit_handlers =
1258 + ARRAY_SIZE(kvm_vmx_exit_handlers);
1259 +
1260 +-static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
1261 +- struct vmcs12 *vmcs12)
1262 ++/*
1263 ++ * Return true if an IO instruction with the specified port and size should cause
1264 ++ * a VM-exit into L1.
1265 ++ */
1266 ++bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
1267 ++ int size)
1268 + {
1269 +- unsigned long exit_qualification;
1270 ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1271 + gpa_t bitmap, last_bitmap;
1272 +- unsigned int port;
1273 +- int size;
1274 + u8 b;
1275 +
1276 +- if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
1277 +- return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
1278 +-
1279 +- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
1280 +-
1281 +- port = exit_qualification >> 16;
1282 +- size = (exit_qualification & 7) + 1;
1283 +-
1284 + last_bitmap = (gpa_t)-1;
1285 + b = -1;
1286 +
1287 +@@ -12318,11 +12340,71 @@ static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
1288 + to_vmx(vcpu)->nested.sync_shadow_vmcs = true;
1289 + }
1290 +
1291 ++static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
1292 ++ struct x86_instruction_info *info)
1293 ++{
1294 ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1295 ++ unsigned short port;
1296 ++ bool intercept;
1297 ++ int size;
1298 ++
1299 ++ if (info->intercept == x86_intercept_in ||
1300 ++ info->intercept == x86_intercept_ins) {
1301 ++ port = info->src_val;
1302 ++ size = info->dst_bytes;
1303 ++ } else {
1304 ++ port = info->dst_val;
1305 ++ size = info->src_bytes;
1306 ++ }
1307 ++
1308 ++ /*
1309 ++ * If the 'use IO bitmaps' VM-execution control is 0, IO instruction
1310 ++ * VM-exits depend on the 'unconditional IO exiting' VM-execution
1311 ++ * control.
1312 ++ *
1313 ++ * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps.
1314 ++ */
1315 ++ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
1316 ++ intercept = nested_cpu_has(vmcs12,
1317 ++ CPU_BASED_UNCOND_IO_EXITING);
1318 ++ else
1319 ++ intercept = nested_vmx_check_io_bitmaps(vcpu, port, size);
1320 ++
1321 ++ return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
1322 ++}
1323 ++
1324 + static int vmx_check_intercept(struct kvm_vcpu *vcpu,
1325 + struct x86_instruction_info *info,
1326 + enum x86_intercept_stage stage)
1327 + {
1328 +- return X86EMUL_CONTINUE;
1329 ++ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1330 ++ struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
1331 ++
1332 ++ switch (info->intercept) {
1333 ++ /*
1334 ++ * RDPID causes #UD if disabled through secondary execution controls.
1335 ++ * Because it is marked as EmulateOnUD, we need to intercept it here.
1336 ++ */
1337 ++ case x86_intercept_rdtscp:
1338 ++ if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
1339 ++ ctxt->exception.vector = UD_VECTOR;
1340 ++ ctxt->exception.error_code_valid = false;
1341 ++ return X86EMUL_PROPAGATE_FAULT;
1342 ++ }
1343 ++ break;
1344 ++
1345 ++ case x86_intercept_in:
1346 ++ case x86_intercept_ins:
1347 ++ case x86_intercept_out:
1348 ++ case x86_intercept_outs:
1349 ++ return vmx_check_intercept_io(vcpu, info);
1350 ++
1351 ++ /* TODO: check more intercepts... */
1352 ++ default:
1353 ++ break;
1354 ++ }
1355 ++
1356 ++ return X86EMUL_UNHANDLEABLE;
1357 + }
1358 +
1359 + #ifdef CONFIG_X86_64
1360 +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
1361 +deleted file mode 100644
1362 +index 3791ce8d269e..000000000000
1363 +--- a/arch/x86/kvm/vmx/vmx.c
1364 ++++ /dev/null
1365 +@@ -1,8033 +0,0 @@
1366 +-// SPDX-License-Identifier: GPL-2.0-only
1367 +-/*
1368 +- * Kernel-based Virtual Machine driver for Linux
1369 +- *
1370 +- * This module enables machines with Intel VT-x extensions to run virtual
1371 +- * machines without emulation or binary translation.
1372 +- *
1373 +- * Copyright (C) 2006 Qumranet, Inc.
1374 +- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
1375 +- *
1376 +- * Authors:
1377 +- * Avi Kivity <avi@××××××××.com>
1378 +- * Yaniv Kamay <yaniv@××××××××.com>
1379 +- */
1380 +-
1381 +-#include <linux/frame.h>
1382 +-#include <linux/highmem.h>
1383 +-#include <linux/hrtimer.h>
1384 +-#include <linux/kernel.h>
1385 +-#include <linux/kvm_host.h>
1386 +-#include <linux/module.h>
1387 +-#include <linux/moduleparam.h>
1388 +-#include <linux/mod_devicetable.h>
1389 +-#include <linux/mm.h>
1390 +-#include <linux/sched.h>
1391 +-#include <linux/sched/smt.h>
1392 +-#include <linux/slab.h>
1393 +-#include <linux/tboot.h>
1394 +-#include <linux/trace_events.h>
1395 +-
1396 +-#include <asm/apic.h>
1397 +-#include <asm/asm.h>
1398 +-#include <asm/cpu.h>
1399 +-#include <asm/debugreg.h>
1400 +-#include <asm/desc.h>
1401 +-#include <asm/fpu/internal.h>
1402 +-#include <asm/io.h>
1403 +-#include <asm/irq_remapping.h>
1404 +-#include <asm/kexec.h>
1405 +-#include <asm/perf_event.h>
1406 +-#include <asm/mce.h>
1407 +-#include <asm/mmu_context.h>
1408 +-#include <asm/mshyperv.h>
1409 +-#include <asm/spec-ctrl.h>
1410 +-#include <asm/virtext.h>
1411 +-#include <asm/vmx.h>
1412 +-
1413 +-#include "capabilities.h"
1414 +-#include "cpuid.h"
1415 +-#include "evmcs.h"
1416 +-#include "irq.h"
1417 +-#include "kvm_cache_regs.h"
1418 +-#include "lapic.h"
1419 +-#include "mmu.h"
1420 +-#include "nested.h"
1421 +-#include "ops.h"
1422 +-#include "pmu.h"
1423 +-#include "trace.h"
1424 +-#include "vmcs.h"
1425 +-#include "vmcs12.h"
1426 +-#include "vmx.h"
1427 +-#include "x86.h"
1428 +-
1429 +-MODULE_AUTHOR("Qumranet");
1430 +-MODULE_LICENSE("GPL");
1431 +-
1432 +-static const struct x86_cpu_id vmx_cpu_id[] = {
1433 +- X86_FEATURE_MATCH(X86_FEATURE_VMX),
1434 +- {}
1435 +-};
1436 +-MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
1437 +-
1438 +-bool __read_mostly enable_vpid = 1;
1439 +-module_param_named(vpid, enable_vpid, bool, 0444);
1440 +-
1441 +-static bool __read_mostly enable_vnmi = 1;
1442 +-module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
1443 +-
1444 +-bool __read_mostly flexpriority_enabled = 1;
1445 +-module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
1446 +-
1447 +-bool __read_mostly enable_ept = 1;
1448 +-module_param_named(ept, enable_ept, bool, S_IRUGO);
1449 +-
1450 +-bool __read_mostly enable_unrestricted_guest = 1;
1451 +-module_param_named(unrestricted_guest,
1452 +- enable_unrestricted_guest, bool, S_IRUGO);
1453 +-
1454 +-bool __read_mostly enable_ept_ad_bits = 1;
1455 +-module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
1456 +-
1457 +-static bool __read_mostly emulate_invalid_guest_state = true;
1458 +-module_param(emulate_invalid_guest_state, bool, S_IRUGO);
1459 +-
1460 +-static bool __read_mostly fasteoi = 1;
1461 +-module_param(fasteoi, bool, S_IRUGO);
1462 +-
1463 +-static bool __read_mostly enable_apicv = 1;
1464 +-module_param(enable_apicv, bool, S_IRUGO);
1465 +-
1466 +-/*
1467 +- * If nested=1, nested virtualization is supported, i.e., guests may use
1468 +- * VMX and be a hypervisor for its own guests. If nested=0, guests may not
1469 +- * use VMX instructions.
1470 +- */
1471 +-static bool __read_mostly nested = 1;
1472 +-module_param(nested, bool, S_IRUGO);
1473 +-
1474 +-bool __read_mostly enable_pml = 1;
1475 +-module_param_named(pml, enable_pml, bool, S_IRUGO);
1476 +-
1477 +-static bool __read_mostly dump_invalid_vmcs = 0;
1478 +-module_param(dump_invalid_vmcs, bool, 0644);
1479 +-
1480 +-#define MSR_BITMAP_MODE_X2APIC 1
1481 +-#define MSR_BITMAP_MODE_X2APIC_APICV 2
1482 +-
1483 +-#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
1484 +-
1485 +-/* Guest_tsc -> host_tsc conversion requires 64-bit division. */
1486 +-static int __read_mostly cpu_preemption_timer_multi;
1487 +-static bool __read_mostly enable_preemption_timer = 1;
1488 +-#ifdef CONFIG_X86_64
1489 +-module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
1490 +-#endif
1491 +-
1492 +-#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
1493 +-#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
1494 +-#define KVM_VM_CR0_ALWAYS_ON \
1495 +- (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \
1496 +- X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
1497 +-#define KVM_CR4_GUEST_OWNED_BITS \
1498 +- (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
1499 +- | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
1500 +-
1501 +-#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
1502 +-#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
1503 +-#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
1504 +-
1505 +-#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
1506 +-
1507 +-#define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \
1508 +- RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \
1509 +- RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
1510 +- RTIT_STATUS_BYTECNT))
1511 +-
1512 +-#define MSR_IA32_RTIT_OUTPUT_BASE_MASK \
1513 +- (~((1UL << cpuid_query_maxphyaddr(vcpu)) - 1) | 0x7f)
1514 +-
1515 +-/*
1516 +- * These 2 parameters are used to config the controls for Pause-Loop Exiting:
1517 +- * ple_gap: upper bound on the amount of time between two successive
1518 +- * executions of PAUSE in a loop. Also indicate if ple enabled.
1519 +- * According to test, this time is usually smaller than 128 cycles.
1520 +- * ple_window: upper bound on the amount of time a guest is allowed to execute
1521 +- * in a PAUSE loop. Tests indicate that most spinlocks are held for
1522 +- * less than 2^12 cycles
1523 +- * Time is measured based on a counter that runs at the same rate as the TSC,
1524 +- * refer SDM volume 3b section 21.6.13 & 22.1.3.
1525 +- */
1526 +-static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
1527 +-module_param(ple_gap, uint, 0444);
1528 +-
1529 +-static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
1530 +-module_param(ple_window, uint, 0444);
1531 +-
1532 +-/* Default doubles per-vcpu window every exit. */
1533 +-static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
1534 +-module_param(ple_window_grow, uint, 0444);
1535 +-
1536 +-/* Default resets per-vcpu window every exit to ple_window. */
1537 +-static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
1538 +-module_param(ple_window_shrink, uint, 0444);
1539 +-
1540 +-/* Default is to compute the maximum so we can never overflow. */
1541 +-static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
1542 +-module_param(ple_window_max, uint, 0444);
1543 +-
1544 +-/* Default is SYSTEM mode, 1 for host-guest mode */
1545 +-int __read_mostly pt_mode = PT_MODE_SYSTEM;
1546 +-module_param(pt_mode, int, S_IRUGO);
1547 +-
1548 +-static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
1549 +-static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
1550 +-static DEFINE_MUTEX(vmx_l1d_flush_mutex);
1551 +-
1552 +-/* Storage for pre module init parameter parsing */
1553 +-static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO;
1554 +-
1555 +-static const struct {
1556 +- const char *option;
1557 +- bool for_parse;
1558 +-} vmentry_l1d_param[] = {
1559 +- [VMENTER_L1D_FLUSH_AUTO] = {"auto", true},
1560 +- [VMENTER_L1D_FLUSH_NEVER] = {"never", true},
1561 +- [VMENTER_L1D_FLUSH_COND] = {"cond", true},
1562 +- [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true},
1563 +- [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
1564 +- [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
1565 +-};
1566 +-
1567 +-#define L1D_CACHE_ORDER 4
1568 +-static void *vmx_l1d_flush_pages;
1569 +-
1570 +-static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
1571 +-{
1572 +- struct page *page;
1573 +- unsigned int i;
1574 +-
1575 +- if (!boot_cpu_has_bug(X86_BUG_L1TF)) {
1576 +- l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
1577 +- return 0;
1578 +- }
1579 +-
1580 +- if (!enable_ept) {
1581 +- l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
1582 +- return 0;
1583 +- }
1584 +-
1585 +- if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
1586 +- u64 msr;
1587 +-
1588 +- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
1589 +- if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
1590 +- l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
1591 +- return 0;
1592 +- }
1593 +- }
1594 +-
1595 +- /* If set to auto use the default l1tf mitigation method */
1596 +- if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
1597 +- switch (l1tf_mitigation) {
1598 +- case L1TF_MITIGATION_OFF:
1599 +- l1tf = VMENTER_L1D_FLUSH_NEVER;
1600 +- break;
1601 +- case L1TF_MITIGATION_FLUSH_NOWARN:
1602 +- case L1TF_MITIGATION_FLUSH:
1603 +- case L1TF_MITIGATION_FLUSH_NOSMT:
1604 +- l1tf = VMENTER_L1D_FLUSH_COND;
1605 +- break;
1606 +- case L1TF_MITIGATION_FULL:
1607 +- case L1TF_MITIGATION_FULL_FORCE:
1608 +- l1tf = VMENTER_L1D_FLUSH_ALWAYS;
1609 +- break;
1610 +- }
1611 +- } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) {
1612 +- l1tf = VMENTER_L1D_FLUSH_ALWAYS;
1613 +- }
1614 +-
1615 +- if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
1616 +- !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
1617 +- /*
1618 +- * This allocation for vmx_l1d_flush_pages is not tied to a VM
1619 +- * lifetime and so should not be charged to a memcg.
1620 +- */
1621 +- page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
1622 +- if (!page)
1623 +- return -ENOMEM;
1624 +- vmx_l1d_flush_pages = page_address(page);
1625 +-
1626 +- /*
1627 +- * Initialize each page with a different pattern in
1628 +- * order to protect against KSM in the nested
1629 +- * virtualization case.
1630 +- */
1631 +- for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
1632 +- memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1,
1633 +- PAGE_SIZE);
1634 +- }
1635 +- }
1636 +-
1637 +- l1tf_vmx_mitigation = l1tf;
1638 +-
1639 +- if (l1tf != VMENTER_L1D_FLUSH_NEVER)
1640 +- static_branch_enable(&vmx_l1d_should_flush);
1641 +- else
1642 +- static_branch_disable(&vmx_l1d_should_flush);
1643 +-
1644 +- if (l1tf == VMENTER_L1D_FLUSH_COND)
1645 +- static_branch_enable(&vmx_l1d_flush_cond);
1646 +- else
1647 +- static_branch_disable(&vmx_l1d_flush_cond);
1648 +- return 0;
1649 +-}
1650 +-
1651 +-static int vmentry_l1d_flush_parse(const char *s)
1652 +-{
1653 +- unsigned int i;
1654 +-
1655 +- if (s) {
1656 +- for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
1657 +- if (vmentry_l1d_param[i].for_parse &&
1658 +- sysfs_streq(s, vmentry_l1d_param[i].option))
1659 +- return i;
1660 +- }
1661 +- }
1662 +- return -EINVAL;
1663 +-}
1664 +-
1665 +-static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
1666 +-{
1667 +- int l1tf, ret;
1668 +-
1669 +- l1tf = vmentry_l1d_flush_parse(s);
1670 +- if (l1tf < 0)
1671 +- return l1tf;
1672 +-
1673 +- if (!boot_cpu_has(X86_BUG_L1TF))
1674 +- return 0;
1675 +-
1676 +- /*
1677 +- * Has vmx_init() run already? If not then this is the pre init
1678 +- * parameter parsing. In that case just store the value and let
1679 +- * vmx_init() do the proper setup after enable_ept has been
1680 +- * established.
1681 +- */
1682 +- if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) {
1683 +- vmentry_l1d_flush_param = l1tf;
1684 +- return 0;
1685 +- }
1686 +-
1687 +- mutex_lock(&vmx_l1d_flush_mutex);
1688 +- ret = vmx_setup_l1d_flush(l1tf);
1689 +- mutex_unlock(&vmx_l1d_flush_mutex);
1690 +- return ret;
1691 +-}
1692 +-
1693 +-static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
1694 +-{
1695 +- if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
1696 +- return sprintf(s, "???\n");
1697 +-
1698 +- return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
1699 +-}
1700 +-
1701 +-static const struct kernel_param_ops vmentry_l1d_flush_ops = {
1702 +- .set = vmentry_l1d_flush_set,
1703 +- .get = vmentry_l1d_flush_get,
1704 +-};
1705 +-module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
1706 +-
1707 +-static bool guest_state_valid(struct kvm_vcpu *vcpu);
1708 +-static u32 vmx_segment_access_rights(struct kvm_segment *var);
1709 +-static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
1710 +- u32 msr, int type);
1711 +-
1712 +-void vmx_vmexit(void);
1713 +-
1714 +-#define vmx_insn_failed(fmt...) \
1715 +-do { \
1716 +- WARN_ONCE(1, fmt); \
1717 +- pr_warn_ratelimited(fmt); \
1718 +-} while (0)
1719 +-
1720 +-asmlinkage void vmread_error(unsigned long field, bool fault)
1721 +-{
1722 +- if (fault)
1723 +- kvm_spurious_fault();
1724 +- else
1725 +- vmx_insn_failed("kvm: vmread failed: field=%lx\n", field);
1726 +-}
1727 +-
1728 +-noinline void vmwrite_error(unsigned long field, unsigned long value)
1729 +-{
1730 +- vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n",
1731 +- field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
1732 +-}
1733 +-
1734 +-noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
1735 +-{
1736 +- vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr);
1737 +-}
1738 +-
1739 +-noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
1740 +-{
1741 +- vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr);
1742 +-}
1743 +-
1744 +-noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
1745 +-{
1746 +- vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
1747 +- ext, vpid, gva);
1748 +-}
1749 +-
1750 +-noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
1751 +-{
1752 +- vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
1753 +- ext, eptp, gpa);
1754 +-}
1755 +-
1756 +-static DEFINE_PER_CPU(struct vmcs *, vmxarea);
1757 +-DEFINE_PER_CPU(struct vmcs *, current_vmcs);
1758 +-/*
1759 +- * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
1760 +- * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
1761 +- */
1762 +-static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
1763 +-
1764 +-/*
1765 +- * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
1766 +- * can find which vCPU should be waken up.
1767 +- */
1768 +-static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
1769 +-static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
1770 +-
1771 +-static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
1772 +-static DEFINE_SPINLOCK(vmx_vpid_lock);
1773 +-
1774 +-struct vmcs_config vmcs_config;
1775 +-struct vmx_capability vmx_capability;
1776 +-
1777 +-#define VMX_SEGMENT_FIELD(seg) \
1778 +- [VCPU_SREG_##seg] = { \
1779 +- .selector = GUEST_##seg##_SELECTOR, \
1780 +- .base = GUEST_##seg##_BASE, \
1781 +- .limit = GUEST_##seg##_LIMIT, \
1782 +- .ar_bytes = GUEST_##seg##_AR_BYTES, \
1783 +- }
1784 +-
1785 +-static const struct kvm_vmx_segment_field {
1786 +- unsigned selector;
1787 +- unsigned base;
1788 +- unsigned limit;
1789 +- unsigned ar_bytes;
1790 +-} kvm_vmx_segment_fields[] = {
1791 +- VMX_SEGMENT_FIELD(CS),
1792 +- VMX_SEGMENT_FIELD(DS),
1793 +- VMX_SEGMENT_FIELD(ES),
1794 +- VMX_SEGMENT_FIELD(FS),
1795 +- VMX_SEGMENT_FIELD(GS),
1796 +- VMX_SEGMENT_FIELD(SS),
1797 +- VMX_SEGMENT_FIELD(TR),
1798 +- VMX_SEGMENT_FIELD(LDTR),
1799 +-};
1800 +-
1801 +-u64 host_efer;
1802 +-static unsigned long host_idt_base;
1803 +-
1804 +-/*
1805 +- * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
1806 +- * will emulate SYSCALL in legacy mode if the vendor string in guest
1807 +- * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
1808 +- * support this emulation, IA32_STAR must always be included in
1809 +- * vmx_msr_index[], even in i386 builds.
1810 +- */
1811 +-const u32 vmx_msr_index[] = {
1812 +-#ifdef CONFIG_X86_64
1813 +- MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
1814 +-#endif
1815 +- MSR_EFER, MSR_TSC_AUX, MSR_STAR,
1816 +- MSR_IA32_TSX_CTRL,
1817 +-};
1818 +-
1819 +-#if IS_ENABLED(CONFIG_HYPERV)
1820 +-static bool __read_mostly enlightened_vmcs = true;
1821 +-module_param(enlightened_vmcs, bool, 0444);
1822 +-
1823 +-/* check_ept_pointer() should be under protection of ept_pointer_lock. */
1824 +-static void check_ept_pointer_match(struct kvm *kvm)
1825 +-{
1826 +- struct kvm_vcpu *vcpu;
1827 +- u64 tmp_eptp = INVALID_PAGE;
1828 +- int i;
1829 +-
1830 +- kvm_for_each_vcpu(i, vcpu, kvm) {
1831 +- if (!VALID_PAGE(tmp_eptp)) {
1832 +- tmp_eptp = to_vmx(vcpu)->ept_pointer;
1833 +- } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
1834 +- to_kvm_vmx(kvm)->ept_pointers_match
1835 +- = EPT_POINTERS_MISMATCH;
1836 +- return;
1837 +- }
1838 +- }
1839 +-
1840 +- to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
1841 +-}
1842 +-
1843 +-static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
1844 +- void *data)
1845 +-{
1846 +- struct kvm_tlb_range *range = data;
1847 +-
1848 +- return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn,
1849 +- range->pages);
1850 +-}
1851 +-
1852 +-static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm,
1853 +- struct kvm_vcpu *vcpu, struct kvm_tlb_range *range)
1854 +-{
1855 +- u64 ept_pointer = to_vmx(vcpu)->ept_pointer;
1856 +-
1857 +- /*
1858 +- * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs address
1859 +- * of the base of EPT PML4 table, strip off EPT configuration
1860 +- * information.
1861 +- */
1862 +- if (range)
1863 +- return hyperv_flush_guest_mapping_range(ept_pointer & PAGE_MASK,
1864 +- kvm_fill_hv_flush_list_func, (void *)range);
1865 +- else
1866 +- return hyperv_flush_guest_mapping(ept_pointer & PAGE_MASK);
1867 +-}
1868 +-
1869 +-static int hv_remote_flush_tlb_with_range(struct kvm *kvm,
1870 +- struct kvm_tlb_range *range)
1871 +-{
1872 +- struct kvm_vcpu *vcpu;
1873 +- int ret = 0, i;
1874 +-
1875 +- spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
1876 +-
1877 +- if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
1878 +- check_ept_pointer_match(kvm);
1879 +-
1880 +- if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
1881 +- kvm_for_each_vcpu(i, vcpu, kvm) {
1882 +- /* If ept_pointer is invalid pointer, bypass flush request. */
1883 +- if (VALID_PAGE(to_vmx(vcpu)->ept_pointer))
1884 +- ret |= __hv_remote_flush_tlb_with_range(
1885 +- kvm, vcpu, range);
1886 +- }
1887 +- } else {
1888 +- ret = __hv_remote_flush_tlb_with_range(kvm,
1889 +- kvm_get_vcpu(kvm, 0), range);
1890 +- }
1891 +-
1892 +- spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
1893 +- return ret;
1894 +-}
1895 +-static int hv_remote_flush_tlb(struct kvm *kvm)
1896 +-{
1897 +- return hv_remote_flush_tlb_with_range(kvm, NULL);
1898 +-}
1899 +-
1900 +-static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
1901 +-{
1902 +- struct hv_enlightened_vmcs *evmcs;
1903 +- struct hv_partition_assist_pg **p_hv_pa_pg =
1904 +- &vcpu->kvm->arch.hyperv.hv_pa_pg;
1905 +- /*
1906 +- * Synthetic VM-Exit is not enabled in current code and so All
1907 +- * evmcs in singe VM shares same assist page.
1908 +- */
1909 +- if (!*p_hv_pa_pg)
1910 +- *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
1911 +-
1912 +- if (!*p_hv_pa_pg)
1913 +- return -ENOMEM;
1914 +-
1915 +- evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
1916 +-
1917 +- evmcs->partition_assist_page =
1918 +- __pa(*p_hv_pa_pg);
1919 +- evmcs->hv_vm_id = (unsigned long)vcpu->kvm;
1920 +- evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
1921 +-
1922 +- return 0;
1923 +-}
1924 +-
1925 +-#endif /* IS_ENABLED(CONFIG_HYPERV) */
1926 +-
1927 +-/*
1928 +- * Comment's format: document - errata name - stepping - processor name.
1929 +- * Refer from
1930 +- * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
1931 +- */
1932 +-static u32 vmx_preemption_cpu_tfms[] = {
1933 +-/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */
1934 +-0x000206E6,
1935 +-/* 323056.pdf - AAX65 - C2 - Xeon L3406 */
1936 +-/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
1937 +-/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
1938 +-0x00020652,
1939 +-/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
1940 +-0x00020655,
1941 +-/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */
1942 +-/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */
1943 +-/*
1944 +- * 320767.pdf - AAP86 - B1 -
1945 +- * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
1946 +- */
1947 +-0x000106E5,
1948 +-/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
1949 +-0x000106A0,
1950 +-/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
1951 +-0x000106A1,
1952 +-/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
1953 +-0x000106A4,
1954 +- /* 321333.pdf - AAM126 - D0 - Xeon 3500 */
1955 +- /* 321324.pdf - AAK139 - D0 - Xeon 5500 */
1956 +- /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
1957 +-0x000106A5,
1958 +- /* Xeon E3-1220 V2 */
1959 +-0x000306A8,
1960 +-};
1961 +-
1962 +-static inline bool cpu_has_broken_vmx_preemption_timer(void)
1963 +-{
1964 +- u32 eax = cpuid_eax(0x00000001), i;
1965 +-
1966 +- /* Clear the reserved bits */
1967 +- eax &= ~(0x3U << 14 | 0xfU << 28);
1968 +- for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
1969 +- if (eax == vmx_preemption_cpu_tfms[i])
1970 +- return true;
1971 +-
1972 +- return false;
1973 +-}
1974 +-
1975 +-static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
1976 +-{
1977 +- return flexpriority_enabled && lapic_in_kernel(vcpu);
1978 +-}
1979 +-
1980 +-static inline bool report_flexpriority(void)
1981 +-{
1982 +- return flexpriority_enabled;
1983 +-}
1984 +-
1985 +-static inline int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
1986 +-{
1987 +- int i;
1988 +-
1989 +- for (i = 0; i < vmx->nmsrs; ++i)
1990 +- if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
1991 +- return i;
1992 +- return -1;
1993 +-}
1994 +-
1995 +-struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
1996 +-{
1997 +- int i;
1998 +-
1999 +- i = __find_msr_index(vmx, msr);
2000 +- if (i >= 0)
2001 +- return &vmx->guest_msrs[i];
2002 +- return NULL;
2003 +-}
2004 +-
2005 +-static int vmx_set_guest_msr(struct vcpu_vmx *vmx, struct shared_msr_entry *msr, u64 data)
2006 +-{
2007 +- int ret = 0;
2008 +-
2009 +- u64 old_msr_data = msr->data;
2010 +- msr->data = data;
2011 +- if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
2012 +- preempt_disable();
2013 +- ret = kvm_set_shared_msr(msr->index, msr->data,
2014 +- msr->mask);
2015 +- preempt_enable();
2016 +- if (ret)
2017 +- msr->data = old_msr_data;
2018 +- }
2019 +- return ret;
2020 +-}
2021 +-
2022 +-void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
2023 +-{
2024 +- vmcs_clear(loaded_vmcs->vmcs);
2025 +- if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
2026 +- vmcs_clear(loaded_vmcs->shadow_vmcs);
2027 +- loaded_vmcs->cpu = -1;
2028 +- loaded_vmcs->launched = 0;
2029 +-}
2030 +-
2031 +-#ifdef CONFIG_KEXEC_CORE
2032 +-/*
2033 +- * This bitmap is used to indicate whether the vmclear
2034 +- * operation is enabled on all cpus. All disabled by
2035 +- * default.
2036 +- */
2037 +-static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
2038 +-
2039 +-static inline void crash_enable_local_vmclear(int cpu)
2040 +-{
2041 +- cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
2042 +-}
2043 +-
2044 +-static inline void crash_disable_local_vmclear(int cpu)
2045 +-{
2046 +- cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
2047 +-}
2048 +-
2049 +-static inline int crash_local_vmclear_enabled(int cpu)
2050 +-{
2051 +- return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
2052 +-}
2053 +-
2054 +-static void crash_vmclear_local_loaded_vmcss(void)
2055 +-{
2056 +- int cpu = raw_smp_processor_id();
2057 +- struct loaded_vmcs *v;
2058 +-
2059 +- if (!crash_local_vmclear_enabled(cpu))
2060 +- return;
2061 +-
2062 +- list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
2063 +- loaded_vmcss_on_cpu_link)
2064 +- vmcs_clear(v->vmcs);
2065 +-}
2066 +-#else
2067 +-static inline void crash_enable_local_vmclear(int cpu) { }
2068 +-static inline void crash_disable_local_vmclear(int cpu) { }
2069 +-#endif /* CONFIG_KEXEC_CORE */
2070 +-
2071 +-static void __loaded_vmcs_clear(void *arg)
2072 +-{
2073 +- struct loaded_vmcs *loaded_vmcs = arg;
2074 +- int cpu = raw_smp_processor_id();
2075 +-
2076 +- if (loaded_vmcs->cpu != cpu)
2077 +- return; /* vcpu migration can race with cpu offline */
2078 +- if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
2079 +- per_cpu(current_vmcs, cpu) = NULL;
2080 +- crash_disable_local_vmclear(cpu);
2081 +- list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
2082 +-
2083 +- /*
2084 +- * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
2085 +- * is before setting loaded_vmcs->vcpu to -1 which is done in
2086 +- * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
2087 +- * then adds the vmcs into percpu list before it is deleted.
2088 +- */
2089 +- smp_wmb();
2090 +-
2091 +- loaded_vmcs_init(loaded_vmcs);
2092 +- crash_enable_local_vmclear(cpu);
2093 +-}
2094 +-
2095 +-void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
2096 +-{
2097 +- int cpu = loaded_vmcs->cpu;
2098 +-
2099 +- if (cpu != -1)
2100 +- smp_call_function_single(cpu,
2101 +- __loaded_vmcs_clear, loaded_vmcs, 1);
2102 +-}
2103 +-
2104 +-static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
2105 +- unsigned field)
2106 +-{
2107 +- bool ret;
2108 +- u32 mask = 1 << (seg * SEG_FIELD_NR + field);
2109 +-
2110 +- if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) {
2111 +- kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS);
2112 +- vmx->segment_cache.bitmask = 0;
2113 +- }
2114 +- ret = vmx->segment_cache.bitmask & mask;
2115 +- vmx->segment_cache.bitmask |= mask;
2116 +- return ret;
2117 +-}
2118 +-
2119 +-static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
2120 +-{
2121 +- u16 *p = &vmx->segment_cache.seg[seg].selector;
2122 +-
2123 +- if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
2124 +- *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
2125 +- return *p;
2126 +-}
2127 +-
2128 +-static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
2129 +-{
2130 +- ulong *p = &vmx->segment_cache.seg[seg].base;
2131 +-
2132 +- if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
2133 +- *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
2134 +- return *p;
2135 +-}
2136 +-
2137 +-static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
2138 +-{
2139 +- u32 *p = &vmx->segment_cache.seg[seg].limit;
2140 +-
2141 +- if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
2142 +- *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
2143 +- return *p;
2144 +-}
2145 +-
2146 +-static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
2147 +-{
2148 +- u32 *p = &vmx->segment_cache.seg[seg].ar;
2149 +-
2150 +- if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
2151 +- *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
2152 +- return *p;
2153 +-}
2154 +-
2155 +-void update_exception_bitmap(struct kvm_vcpu *vcpu)
2156 +-{
2157 +- u32 eb;
2158 +-
2159 +- eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
2160 +- (1u << DB_VECTOR) | (1u << AC_VECTOR);
2161 +- /*
2162 +- * Guest access to VMware backdoor ports could legitimately
2163 +- * trigger #GP because of TSS I/O permission bitmap.
2164 +- * We intercept those #GP and allow access to them anyway
2165 +- * as VMware does.
2166 +- */
2167 +- if (enable_vmware_backdoor)
2168 +- eb |= (1u << GP_VECTOR);
2169 +- if ((vcpu->guest_debug &
2170 +- (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
2171 +- (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
2172 +- eb |= 1u << BP_VECTOR;
2173 +- if (to_vmx(vcpu)->rmode.vm86_active)
2174 +- eb = ~0;
2175 +- if (enable_ept)
2176 +- eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
2177 +-
2178 +- /* When we are running a nested L2 guest and L1 specified for it a
2179 +- * certain exception bitmap, we must trap the same exceptions and pass
2180 +- * them to L1. When running L2, we will only handle the exceptions
2181 +- * specified above if L1 did not want them.
2182 +- */
2183 +- if (is_guest_mode(vcpu))
2184 +- eb |= get_vmcs12(vcpu)->exception_bitmap;
2185 +-
2186 +- vmcs_write32(EXCEPTION_BITMAP, eb);
2187 +-}
2188 +-
2189 +-/*
2190 +- * Check if MSR is intercepted for currently loaded MSR bitmap.
2191 +- */
2192 +-static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
2193 +-{
2194 +- unsigned long *msr_bitmap;
2195 +- int f = sizeof(unsigned long);
2196 +-
2197 +- if (!cpu_has_vmx_msr_bitmap())
2198 +- return true;
2199 +-
2200 +- msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
2201 +-
2202 +- if (msr <= 0x1fff) {
2203 +- return !!test_bit(msr, msr_bitmap + 0x800 / f);
2204 +- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
2205 +- msr &= 0x1fff;
2206 +- return !!test_bit(msr, msr_bitmap + 0xc00 / f);
2207 +- }
2208 +-
2209 +- return true;
2210 +-}
2211 +-
2212 +-static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
2213 +- unsigned long entry, unsigned long exit)
2214 +-{
2215 +- vm_entry_controls_clearbit(vmx, entry);
2216 +- vm_exit_controls_clearbit(vmx, exit);
2217 +-}
2218 +-
2219 +-int vmx_find_msr_index(struct vmx_msrs *m, u32 msr)
2220 +-{
2221 +- unsigned int i;
2222 +-
2223 +- for (i = 0; i < m->nr; ++i) {
2224 +- if (m->val[i].index == msr)
2225 +- return i;
2226 +- }
2227 +- return -ENOENT;
2228 +-}
2229 +-
2230 +-static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
2231 +-{
2232 +- int i;
2233 +- struct msr_autoload *m = &vmx->msr_autoload;
2234 +-
2235 +- switch (msr) {
2236 +- case MSR_EFER:
2237 +- if (cpu_has_load_ia32_efer()) {
2238 +- clear_atomic_switch_msr_special(vmx,
2239 +- VM_ENTRY_LOAD_IA32_EFER,
2240 +- VM_EXIT_LOAD_IA32_EFER);
2241 +- return;
2242 +- }
2243 +- break;
2244 +- case MSR_CORE_PERF_GLOBAL_CTRL:
2245 +- if (cpu_has_load_perf_global_ctrl()) {
2246 +- clear_atomic_switch_msr_special(vmx,
2247 +- VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
2248 +- VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
2249 +- return;
2250 +- }
2251 +- break;
2252 +- }
2253 +- i = vmx_find_msr_index(&m->guest, msr);
2254 +- if (i < 0)
2255 +- goto skip_guest;
2256 +- --m->guest.nr;
2257 +- m->guest.val[i] = m->guest.val[m->guest.nr];
2258 +- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
2259 +-
2260 +-skip_guest:
2261 +- i = vmx_find_msr_index(&m->host, msr);
2262 +- if (i < 0)
2263 +- return;
2264 +-
2265 +- --m->host.nr;
2266 +- m->host.val[i] = m->host.val[m->host.nr];
2267 +- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
2268 +-}
2269 +-
2270 +-static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
2271 +- unsigned long entry, unsigned long exit,
2272 +- unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
2273 +- u64 guest_val, u64 host_val)
2274 +-{
2275 +- vmcs_write64(guest_val_vmcs, guest_val);
2276 +- if (host_val_vmcs != HOST_IA32_EFER)
2277 +- vmcs_write64(host_val_vmcs, host_val);
2278 +- vm_entry_controls_setbit(vmx, entry);
2279 +- vm_exit_controls_setbit(vmx, exit);
2280 +-}
2281 +-
2282 +-static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
2283 +- u64 guest_val, u64 host_val, bool entry_only)
2284 +-{
2285 +- int i, j = 0;
2286 +- struct msr_autoload *m = &vmx->msr_autoload;
2287 +-
2288 +- switch (msr) {
2289 +- case MSR_EFER:
2290 +- if (cpu_has_load_ia32_efer()) {
2291 +- add_atomic_switch_msr_special(vmx,
2292 +- VM_ENTRY_LOAD_IA32_EFER,
2293 +- VM_EXIT_LOAD_IA32_EFER,
2294 +- GUEST_IA32_EFER,
2295 +- HOST_IA32_EFER,
2296 +- guest_val, host_val);
2297 +- return;
2298 +- }
2299 +- break;
2300 +- case MSR_CORE_PERF_GLOBAL_CTRL:
2301 +- if (cpu_has_load_perf_global_ctrl()) {
2302 +- add_atomic_switch_msr_special(vmx,
2303 +- VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
2304 +- VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
2305 +- GUEST_IA32_PERF_GLOBAL_CTRL,
2306 +- HOST_IA32_PERF_GLOBAL_CTRL,
2307 +- guest_val, host_val);
2308 +- return;
2309 +- }
2310 +- break;
2311 +- case MSR_IA32_PEBS_ENABLE:
2312 +- /* PEBS needs a quiescent period after being disabled (to write
2313 +- * a record). Disabling PEBS through VMX MSR swapping doesn't
2314 +- * provide that period, so a CPU could write host's record into
2315 +- * guest's memory.
2316 +- */
2317 +- wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
2318 +- }
2319 +-
2320 +- i = vmx_find_msr_index(&m->guest, msr);
2321 +- if (!entry_only)
2322 +- j = vmx_find_msr_index(&m->host, msr);
2323 +-
2324 +- if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) ||
2325 +- (j < 0 && m->host.nr == NR_LOADSTORE_MSRS)) {
2326 +- printk_once(KERN_WARNING "Not enough msr switch entries. "
2327 +- "Can't add msr %x\n", msr);
2328 +- return;
2329 +- }
2330 +- if (i < 0) {
2331 +- i = m->guest.nr++;
2332 +- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
2333 +- }
2334 +- m->guest.val[i].index = msr;
2335 +- m->guest.val[i].value = guest_val;
2336 +-
2337 +- if (entry_only)
2338 +- return;
2339 +-
2340 +- if (j < 0) {
2341 +- j = m->host.nr++;
2342 +- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
2343 +- }
2344 +- m->host.val[j].index = msr;
2345 +- m->host.val[j].value = host_val;
2346 +-}
2347 +-
2348 +-static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
2349 +-{
2350 +- u64 guest_efer = vmx->vcpu.arch.efer;
2351 +- u64 ignore_bits = 0;
2352 +-
2353 +- /* Shadow paging assumes NX to be available. */
2354 +- if (!enable_ept)
2355 +- guest_efer |= EFER_NX;
2356 +-
2357 +- /*
2358 +- * LMA and LME handled by hardware; SCE meaningless outside long mode.
2359 +- */
2360 +- ignore_bits |= EFER_SCE;
2361 +-#ifdef CONFIG_X86_64
2362 +- ignore_bits |= EFER_LMA | EFER_LME;
2363 +- /* SCE is meaningful only in long mode on Intel */
2364 +- if (guest_efer & EFER_LMA)
2365 +- ignore_bits &= ~(u64)EFER_SCE;
2366 +-#endif
2367 +-
2368 +- /*
2369 +- * On EPT, we can't emulate NX, so we must switch EFER atomically.
2370 +- * On CPUs that support "load IA32_EFER", always switch EFER
2371 +- * atomically, since it's faster than switching it manually.
2372 +- */
2373 +- if (cpu_has_load_ia32_efer() ||
2374 +- (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
2375 +- if (!(guest_efer & EFER_LMA))
2376 +- guest_efer &= ~EFER_LME;
2377 +- if (guest_efer != host_efer)
2378 +- add_atomic_switch_msr(vmx, MSR_EFER,
2379 +- guest_efer, host_efer, false);
2380 +- else
2381 +- clear_atomic_switch_msr(vmx, MSR_EFER);
2382 +- return false;
2383 +- } else {
2384 +- clear_atomic_switch_msr(vmx, MSR_EFER);
2385 +-
2386 +- guest_efer &= ~ignore_bits;
2387 +- guest_efer |= host_efer & ignore_bits;
2388 +-
2389 +- vmx->guest_msrs[efer_offset].data = guest_efer;
2390 +- vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
2391 +-
2392 +- return true;
2393 +- }
2394 +-}
2395 +-
2396 +-#ifdef CONFIG_X86_32
2397 +-/*
2398 +- * On 32-bit kernels, VM exits still load the FS and GS bases from the
2399 +- * VMCS rather than the segment table. KVM uses this helper to figure
2400 +- * out the current bases to poke them into the VMCS before entry.
2401 +- */
2402 +-static unsigned long segment_base(u16 selector)
2403 +-{
2404 +- struct desc_struct *table;
2405 +- unsigned long v;
2406 +-
2407 +- if (!(selector & ~SEGMENT_RPL_MASK))
2408 +- return 0;
2409 +-
2410 +- table = get_current_gdt_ro();
2411 +-
2412 +- if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
2413 +- u16 ldt_selector = kvm_read_ldt();
2414 +-
2415 +- if (!(ldt_selector & ~SEGMENT_RPL_MASK))
2416 +- return 0;
2417 +-
2418 +- table = (struct desc_struct *)segment_base(ldt_selector);
2419 +- }
2420 +- v = get_desc_base(&table[selector >> 3]);
2421 +- return v;
2422 +-}
2423 +-#endif
2424 +-
2425 +-static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range)
2426 +-{
2427 +- u32 i;
2428 +-
2429 +- wrmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
2430 +- wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
2431 +- wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
2432 +- wrmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
2433 +- for (i = 0; i < addr_range; i++) {
2434 +- wrmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
2435 +- wrmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
2436 +- }
2437 +-}
2438 +-
2439 +-static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range)
2440 +-{
2441 +- u32 i;
2442 +-
2443 +- rdmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
2444 +- rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
2445 +- rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
2446 +- rdmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
2447 +- for (i = 0; i < addr_range; i++) {
2448 +- rdmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
2449 +- rdmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
2450 +- }
2451 +-}
2452 +-
2453 +-static void pt_guest_enter(struct vcpu_vmx *vmx)
2454 +-{
2455 +- if (pt_mode == PT_MODE_SYSTEM)
2456 +- return;
2457 +-
2458 +- /*
2459 +- * GUEST_IA32_RTIT_CTL is already set in the VMCS.
2460 +- * Save host state before VM entry.
2461 +- */
2462 +- rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
2463 +- if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
2464 +- wrmsrl(MSR_IA32_RTIT_CTL, 0);
2465 +- pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range);
2466 +- pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range);
2467 +- }
2468 +-}
2469 +-
2470 +-static void pt_guest_exit(struct vcpu_vmx *vmx)
2471 +-{
2472 +- if (pt_mode == PT_MODE_SYSTEM)
2473 +- return;
2474 +-
2475 +- if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
2476 +- pt_save_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range);
2477 +- pt_load_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range);
2478 +- }
2479 +-
2480 +- /* Reload host state (IA32_RTIT_CTL will be cleared on VM exit). */
2481 +- wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
2482 +-}
2483 +-
2484 +-void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
2485 +- unsigned long fs_base, unsigned long gs_base)
2486 +-{
2487 +- if (unlikely(fs_sel != host->fs_sel)) {
2488 +- if (!(fs_sel & 7))
2489 +- vmcs_write16(HOST_FS_SELECTOR, fs_sel);
2490 +- else
2491 +- vmcs_write16(HOST_FS_SELECTOR, 0);
2492 +- host->fs_sel = fs_sel;
2493 +- }
2494 +- if (unlikely(gs_sel != host->gs_sel)) {
2495 +- if (!(gs_sel & 7))
2496 +- vmcs_write16(HOST_GS_SELECTOR, gs_sel);
2497 +- else
2498 +- vmcs_write16(HOST_GS_SELECTOR, 0);
2499 +- host->gs_sel = gs_sel;
2500 +- }
2501 +- if (unlikely(fs_base != host->fs_base)) {
2502 +- vmcs_writel(HOST_FS_BASE, fs_base);
2503 +- host->fs_base = fs_base;
2504 +- }
2505 +- if (unlikely(gs_base != host->gs_base)) {
2506 +- vmcs_writel(HOST_GS_BASE, gs_base);
2507 +- host->gs_base = gs_base;
2508 +- }
2509 +-}
2510 +-
2511 +-void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
2512 +-{
2513 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
2514 +- struct vmcs_host_state *host_state;
2515 +-#ifdef CONFIG_X86_64
2516 +- int cpu = raw_smp_processor_id();
2517 +-#endif
2518 +- unsigned long fs_base, gs_base;
2519 +- u16 fs_sel, gs_sel;
2520 +- int i;
2521 +-
2522 +- vmx->req_immediate_exit = false;
2523 +-
2524 +- /*
2525 +- * Note that guest MSRs to be saved/restored can also be changed
2526 +- * when guest state is loaded. This happens when guest transitions
2527 +- * to/from long-mode by setting MSR_EFER.LMA.
2528 +- */
2529 +- if (!vmx->guest_msrs_ready) {
2530 +- vmx->guest_msrs_ready = true;
2531 +- for (i = 0; i < vmx->save_nmsrs; ++i)
2532 +- kvm_set_shared_msr(vmx->guest_msrs[i].index,
2533 +- vmx->guest_msrs[i].data,
2534 +- vmx->guest_msrs[i].mask);
2535 +-
2536 +- }
2537 +- if (vmx->guest_state_loaded)
2538 +- return;
2539 +-
2540 +- host_state = &vmx->loaded_vmcs->host_state;
2541 +-
2542 +- /*
2543 +- * Set host fs and gs selectors. Unfortunately, 22.2.3 does not
2544 +- * allow segment selectors with cpl > 0 or ti == 1.
2545 +- */
2546 +- host_state->ldt_sel = kvm_read_ldt();
2547 +-
2548 +-#ifdef CONFIG_X86_64
2549 +- savesegment(ds, host_state->ds_sel);
2550 +- savesegment(es, host_state->es_sel);
2551 +-
2552 +- gs_base = cpu_kernelmode_gs_base(cpu);
2553 +- if (likely(is_64bit_mm(current->mm))) {
2554 +- save_fsgs_for_kvm();
2555 +- fs_sel = current->thread.fsindex;
2556 +- gs_sel = current->thread.gsindex;
2557 +- fs_base = current->thread.fsbase;
2558 +- vmx->msr_host_kernel_gs_base = current->thread.gsbase;
2559 +- } else {
2560 +- savesegment(fs, fs_sel);
2561 +- savesegment(gs, gs_sel);
2562 +- fs_base = read_msr(MSR_FS_BASE);
2563 +- vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
2564 +- }
2565 +-
2566 +- wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
2567 +-#else
2568 +- savesegment(fs, fs_sel);
2569 +- savesegment(gs, gs_sel);
2570 +- fs_base = segment_base(fs_sel);
2571 +- gs_base = segment_base(gs_sel);
2572 +-#endif
2573 +-
2574 +- vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
2575 +- vmx->guest_state_loaded = true;
2576 +-}
2577 +-
2578 +-static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
2579 +-{
2580 +- struct vmcs_host_state *host_state;
2581 +-
2582 +- if (!vmx->guest_state_loaded)
2583 +- return;
2584 +-
2585 +- host_state = &vmx->loaded_vmcs->host_state;
2586 +-
2587 +- ++vmx->vcpu.stat.host_state_reload;
2588 +-
2589 +-#ifdef CONFIG_X86_64
2590 +- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
2591 +-#endif
2592 +- if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
2593 +- kvm_load_ldt(host_state->ldt_sel);
2594 +-#ifdef CONFIG_X86_64
2595 +- load_gs_index(host_state->gs_sel);
2596 +-#else
2597 +- loadsegment(gs, host_state->gs_sel);
2598 +-#endif
2599 +- }
2600 +- if (host_state->fs_sel & 7)
2601 +- loadsegment(fs, host_state->fs_sel);
2602 +-#ifdef CONFIG_X86_64
2603 +- if (unlikely(host_state->ds_sel | host_state->es_sel)) {
2604 +- loadsegment(ds, host_state->ds_sel);
2605 +- loadsegment(es, host_state->es_sel);
2606 +- }
2607 +-#endif
2608 +- invalidate_tss_limit();
2609 +-#ifdef CONFIG_X86_64
2610 +- wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
2611 +-#endif
2612 +- load_fixmap_gdt(raw_smp_processor_id());
2613 +- vmx->guest_state_loaded = false;
2614 +- vmx->guest_msrs_ready = false;
2615 +-}
2616 +-
2617 +-#ifdef CONFIG_X86_64
2618 +-static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
2619 +-{
2620 +- preempt_disable();
2621 +- if (vmx->guest_state_loaded)
2622 +- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
2623 +- preempt_enable();
2624 +- return vmx->msr_guest_kernel_gs_base;
2625 +-}
2626 +-
2627 +-static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
2628 +-{
2629 +- preempt_disable();
2630 +- if (vmx->guest_state_loaded)
2631 +- wrmsrl(MSR_KERNEL_GS_BASE, data);
2632 +- preempt_enable();
2633 +- vmx->msr_guest_kernel_gs_base = data;
2634 +-}
2635 +-#endif
2636 +-
2637 +-static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
2638 +-{
2639 +- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
2640 +- struct pi_desc old, new;
2641 +- unsigned int dest;
2642 +-
2643 +- /*
2644 +- * In case of hot-plug or hot-unplug, we may have to undo
2645 +- * vmx_vcpu_pi_put even if there is no assigned device. And we
2646 +- * always keep PI.NDST up to date for simplicity: it makes the
2647 +- * code easier, and CPU migration is not a fast path.
2648 +- */
2649 +- if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
2650 +- return;
2651 +-
2652 +- /*
2653 +- * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
2654 +- * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
2655 +- * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
2656 +- * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
2657 +- * correctly.
2658 +- */
2659 +- if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
2660 +- pi_clear_sn(pi_desc);
2661 +- goto after_clear_sn;
2662 +- }
2663 +-
2664 +- /* The full case. */
2665 +- do {
2666 +- old.control = new.control = pi_desc->control;
2667 +-
2668 +- dest = cpu_physical_id(cpu);
2669 +-
2670 +- if (x2apic_enabled())
2671 +- new.ndst = dest;
2672 +- else
2673 +- new.ndst = (dest << 8) & 0xFF00;
2674 +-
2675 +- new.sn = 0;
2676 +- } while (cmpxchg64(&pi_desc->control, old.control,
2677 +- new.control) != old.control);
2678 +-
2679 +-after_clear_sn:
2680 +-
2681 +- /*
2682 +- * Clear SN before reading the bitmap. The VT-d firmware
2683 +- * writes the bitmap and reads SN atomically (5.2.3 in the
2684 +- * spec), so it doesn't really have a memory barrier that
2685 +- * pairs with this, but we cannot do that and we need one.
2686 +- */
2687 +- smp_mb__after_atomic();
2688 +-
2689 +- if (!pi_is_pir_empty(pi_desc))
2690 +- pi_set_on(pi_desc);
2691 +-}
2692 +-
2693 +-void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
2694 +-{
2695 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
2696 +- bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
2697 +-
2698 +- if (!already_loaded) {
2699 +- loaded_vmcs_clear(vmx->loaded_vmcs);
2700 +- local_irq_disable();
2701 +- crash_disable_local_vmclear(cpu);
2702 +-
2703 +- /*
2704 +- * Read loaded_vmcs->cpu should be before fetching
2705 +- * loaded_vmcs->loaded_vmcss_on_cpu_link.
2706 +- * See the comments in __loaded_vmcs_clear().
2707 +- */
2708 +- smp_rmb();
2709 +-
2710 +- list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
2711 +- &per_cpu(loaded_vmcss_on_cpu, cpu));
2712 +- crash_enable_local_vmclear(cpu);
2713 +- local_irq_enable();
2714 +- }
2715 +-
2716 +- if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
2717 +- per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
2718 +- vmcs_load(vmx->loaded_vmcs->vmcs);
2719 +- indirect_branch_prediction_barrier();
2720 +- }
2721 +-
2722 +- if (!already_loaded) {
2723 +- void *gdt = get_current_gdt_ro();
2724 +- unsigned long sysenter_esp;
2725 +-
2726 +- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2727 +-
2728 +- /*
2729 +- * Linux uses per-cpu TSS and GDT, so set these when switching
2730 +- * processors. See 22.2.4.
2731 +- */
2732 +- vmcs_writel(HOST_TR_BASE,
2733 +- (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
2734 +- vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
2735 +-
2736 +- rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
2737 +- vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
2738 +-
2739 +- vmx->loaded_vmcs->cpu = cpu;
2740 +- }
2741 +-
2742 +- /* Setup TSC multiplier */
2743 +- if (kvm_has_tsc_control &&
2744 +- vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
2745 +- decache_tsc_multiplier(vmx);
2746 +-}
2747 +-
2748 +-/*
2749 +- * Switches to specified vcpu, until a matching vcpu_put(), but assumes
2750 +- * vcpu mutex is already taken.
2751 +- */
2752 +-void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2753 +-{
2754 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
2755 +-
2756 +- vmx_vcpu_load_vmcs(vcpu, cpu);
2757 +-
2758 +- vmx_vcpu_pi_load(vcpu, cpu);
2759 +-
2760 +- vmx->host_pkru = read_pkru();
2761 +- vmx->host_debugctlmsr = get_debugctlmsr();
2762 +-}
2763 +-
2764 +-static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
2765 +-{
2766 +- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
2767 +-
2768 +- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
2769 +- !irq_remapping_cap(IRQ_POSTING_CAP) ||
2770 +- !kvm_vcpu_apicv_active(vcpu))
2771 +- return;
2772 +-
2773 +- /* Set SN when the vCPU is preempted */
2774 +- if (vcpu->preempted)
2775 +- pi_set_sn(pi_desc);
2776 +-}
2777 +-
2778 +-static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
2779 +-{
2780 +- vmx_vcpu_pi_put(vcpu);
2781 +-
2782 +- vmx_prepare_switch_to_host(to_vmx(vcpu));
2783 +-}
2784 +-
2785 +-static bool emulation_required(struct kvm_vcpu *vcpu)
2786 +-{
2787 +- return emulate_invalid_guest_state && !guest_state_valid(vcpu);
2788 +-}
2789 +-
2790 +-static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
2791 +-
2792 +-unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
2793 +-{
2794 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
2795 +- unsigned long rflags, save_rflags;
2796 +-
2797 +- if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) {
2798 +- kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
2799 +- rflags = vmcs_readl(GUEST_RFLAGS);
2800 +- if (vmx->rmode.vm86_active) {
2801 +- rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
2802 +- save_rflags = vmx->rmode.save_rflags;
2803 +- rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
2804 +- }
2805 +- vmx->rflags = rflags;
2806 +- }
2807 +- return vmx->rflags;
2808 +-}
2809 +-
2810 +-void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
2811 +-{
2812 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
2813 +- unsigned long old_rflags;
2814 +-
2815 +- if (enable_unrestricted_guest) {
2816 +- kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
2817 +- vmx->rflags = rflags;
2818 +- vmcs_writel(GUEST_RFLAGS, rflags);
2819 +- return;
2820 +- }
2821 +-
2822 +- old_rflags = vmx_get_rflags(vcpu);
2823 +- vmx->rflags = rflags;
2824 +- if (vmx->rmode.vm86_active) {
2825 +- vmx->rmode.save_rflags = rflags;
2826 +- rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
2827 +- }
2828 +- vmcs_writel(GUEST_RFLAGS, rflags);
2829 +-
2830 +- if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM)
2831 +- vmx->emulation_required = emulation_required(vcpu);
2832 +-}
2833 +-
2834 +-u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
2835 +-{
2836 +- u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2837 +- int ret = 0;
2838 +-
2839 +- if (interruptibility & GUEST_INTR_STATE_STI)
2840 +- ret |= KVM_X86_SHADOW_INT_STI;
2841 +- if (interruptibility & GUEST_INTR_STATE_MOV_SS)
2842 +- ret |= KVM_X86_SHADOW_INT_MOV_SS;
2843 +-
2844 +- return ret;
2845 +-}
2846 +-
2847 +-void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
2848 +-{
2849 +- u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2850 +- u32 interruptibility = interruptibility_old;
2851 +-
2852 +- interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
2853 +-
2854 +- if (mask & KVM_X86_SHADOW_INT_MOV_SS)
2855 +- interruptibility |= GUEST_INTR_STATE_MOV_SS;
2856 +- else if (mask & KVM_X86_SHADOW_INT_STI)
2857 +- interruptibility |= GUEST_INTR_STATE_STI;
2858 +-
2859 +- if ((interruptibility != interruptibility_old))
2860 +- vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
2861 +-}
2862 +-
2863 +-static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
2864 +-{
2865 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
2866 +- unsigned long value;
2867 +-
2868 +- /*
2869 +- * Any MSR write that attempts to change bits marked reserved will
2870 +- * case a #GP fault.
2871 +- */
2872 +- if (data & vmx->pt_desc.ctl_bitmask)
2873 +- return 1;
2874 +-
2875 +- /*
2876 +- * Any attempt to modify IA32_RTIT_CTL while TraceEn is set will
2877 +- * result in a #GP unless the same write also clears TraceEn.
2878 +- */
2879 +- if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) &&
2880 +- ((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN))
2881 +- return 1;
2882 +-
2883 +- /*
2884 +- * WRMSR to IA32_RTIT_CTL that sets TraceEn but clears this bit
2885 +- * and FabricEn would cause #GP, if
2886 +- * CPUID.(EAX=14H, ECX=0):ECX.SNGLRGNOUT[bit 2] = 0
2887 +- */
2888 +- if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) &&
2889 +- !(data & RTIT_CTL_FABRIC_EN) &&
2890 +- !intel_pt_validate_cap(vmx->pt_desc.caps,
2891 +- PT_CAP_single_range_output))
2892 +- return 1;
2893 +-
2894 +- /*
2895 +- * MTCFreq, CycThresh and PSBFreq encodings check, any MSR write that
2896 +- * utilize encodings marked reserved will casue a #GP fault.
2897 +- */
2898 +- value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods);
2899 +- if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) &&
2900 +- !test_bit((data & RTIT_CTL_MTC_RANGE) >>
2901 +- RTIT_CTL_MTC_RANGE_OFFSET, &value))
2902 +- return 1;
2903 +- value = intel_pt_validate_cap(vmx->pt_desc.caps,
2904 +- PT_CAP_cycle_thresholds);
2905 +- if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
2906 +- !test_bit((data & RTIT_CTL_CYC_THRESH) >>
2907 +- RTIT_CTL_CYC_THRESH_OFFSET, &value))
2908 +- return 1;
2909 +- value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods);
2910 +- if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
2911 +- !test_bit((data & RTIT_CTL_PSB_FREQ) >>
2912 +- RTIT_CTL_PSB_FREQ_OFFSET, &value))
2913 +- return 1;
2914 +-
2915 +- /*
2916 +- * If ADDRx_CFG is reserved or the encodings is >2 will
2917 +- * cause a #GP fault.
2918 +- */
2919 +- value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET;
2920 +- if ((value && (vmx->pt_desc.addr_range < 1)) || (value > 2))
2921 +- return 1;
2922 +- value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET;
2923 +- if ((value && (vmx->pt_desc.addr_range < 2)) || (value > 2))
2924 +- return 1;
2925 +- value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET;
2926 +- if ((value && (vmx->pt_desc.addr_range < 3)) || (value > 2))
2927 +- return 1;
2928 +- value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET;
2929 +- if ((value && (vmx->pt_desc.addr_range < 4)) || (value > 2))
2930 +- return 1;
2931 +-
2932 +- return 0;
2933 +-}
2934 +-
2935 +-static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
2936 +-{
2937 +- unsigned long rip;
2938 +-
2939 +- /*
2940 +- * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
2941 +- * undefined behavior: Intel's SDM doesn't mandate the VMCS field be
2942 +- * set when EPT misconfig occurs. In practice, real hardware updates
2943 +- * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors
2944 +- * (namely Hyper-V) don't set it due to it being undefined behavior,
2945 +- * i.e. we end up advancing IP with some random value.
2946 +- */
2947 +- if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
2948 +- to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) {
2949 +- rip = kvm_rip_read(vcpu);
2950 +- rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
2951 +- kvm_rip_write(vcpu, rip);
2952 +- } else {
2953 +- if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
2954 +- return 0;
2955 +- }
2956 +-
2957 +- /* skipping an emulated instruction also counts */
2958 +- vmx_set_interrupt_shadow(vcpu, 0);
2959 +-
2960 +- return 1;
2961 +-}
2962 +-
2963 +-static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
2964 +-{
2965 +- /*
2966 +- * Ensure that we clear the HLT state in the VMCS. We don't need to
2967 +- * explicitly skip the instruction because if the HLT state is set,
2968 +- * then the instruction is already executing and RIP has already been
2969 +- * advanced.
2970 +- */
2971 +- if (kvm_hlt_in_guest(vcpu->kvm) &&
2972 +- vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
2973 +- vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
2974 +-}
2975 +-
2976 +-static void vmx_queue_exception(struct kvm_vcpu *vcpu)
2977 +-{
2978 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
2979 +- unsigned nr = vcpu->arch.exception.nr;
2980 +- bool has_error_code = vcpu->arch.exception.has_error_code;
2981 +- u32 error_code = vcpu->arch.exception.error_code;
2982 +- u32 intr_info = nr | INTR_INFO_VALID_MASK;
2983 +-
2984 +- kvm_deliver_exception_payload(vcpu);
2985 +-
2986 +- if (has_error_code) {
2987 +- vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
2988 +- intr_info |= INTR_INFO_DELIVER_CODE_MASK;
2989 +- }
2990 +-
2991 +- if (vmx->rmode.vm86_active) {
2992 +- int inc_eip = 0;
2993 +- if (kvm_exception_is_soft(nr))
2994 +- inc_eip = vcpu->arch.event_exit_inst_len;
2995 +- kvm_inject_realmode_interrupt(vcpu, nr, inc_eip);
2996 +- return;
2997 +- }
2998 +-
2999 +- WARN_ON_ONCE(vmx->emulation_required);
3000 +-
3001 +- if (kvm_exception_is_soft(nr)) {
3002 +- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
3003 +- vmx->vcpu.arch.event_exit_inst_len);
3004 +- intr_info |= INTR_TYPE_SOFT_EXCEPTION;
3005 +- } else
3006 +- intr_info |= INTR_TYPE_HARD_EXCEPTION;
3007 +-
3008 +- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
3009 +-
3010 +- vmx_clear_hlt(vcpu);
3011 +-}
3012 +-
3013 +-static bool vmx_rdtscp_supported(void)
3014 +-{
3015 +- return cpu_has_vmx_rdtscp();
3016 +-}
3017 +-
3018 +-static bool vmx_invpcid_supported(void)
3019 +-{
3020 +- return cpu_has_vmx_invpcid();
3021 +-}
3022 +-
3023 +-/*
3024 +- * Swap MSR entry in host/guest MSR entry array.
3025 +- */
3026 +-static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
3027 +-{
3028 +- struct shared_msr_entry tmp;
3029 +-
3030 +- tmp = vmx->guest_msrs[to];
3031 +- vmx->guest_msrs[to] = vmx->guest_msrs[from];
3032 +- vmx->guest_msrs[from] = tmp;
3033 +-}
3034 +-
3035 +-/*
3036 +- * Set up the vmcs to automatically save and restore system
3037 +- * msrs. Don't touch the 64-bit msrs if the guest is in legacy
3038 +- * mode, as fiddling with msrs is very expensive.
3039 +- */
3040 +-static void setup_msrs(struct vcpu_vmx *vmx)
3041 +-{
3042 +- int save_nmsrs, index;
3043 +-
3044 +- save_nmsrs = 0;
3045 +-#ifdef CONFIG_X86_64
3046 +- /*
3047 +- * The SYSCALL MSRs are only needed on long mode guests, and only
3048 +- * when EFER.SCE is set.
3049 +- */
3050 +- if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) {
3051 +- index = __find_msr_index(vmx, MSR_STAR);
3052 +- if (index >= 0)
3053 +- move_msr_up(vmx, index, save_nmsrs++);
3054 +- index = __find_msr_index(vmx, MSR_LSTAR);
3055 +- if (index >= 0)
3056 +- move_msr_up(vmx, index, save_nmsrs++);
3057 +- index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
3058 +- if (index >= 0)
3059 +- move_msr_up(vmx, index, save_nmsrs++);
3060 +- }
3061 +-#endif
3062 +- index = __find_msr_index(vmx, MSR_EFER);
3063 +- if (index >= 0 && update_transition_efer(vmx, index))
3064 +- move_msr_up(vmx, index, save_nmsrs++);
3065 +- index = __find_msr_index(vmx, MSR_TSC_AUX);
3066 +- if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
3067 +- move_msr_up(vmx, index, save_nmsrs++);
3068 +- index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL);
3069 +- if (index >= 0)
3070 +- move_msr_up(vmx, index, save_nmsrs++);
3071 +-
3072 +- vmx->save_nmsrs = save_nmsrs;
3073 +- vmx->guest_msrs_ready = false;
3074 +-
3075 +- if (cpu_has_vmx_msr_bitmap())
3076 +- vmx_update_msr_bitmap(&vmx->vcpu);
3077 +-}
3078 +-
3079 +-static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
3080 +-{
3081 +- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3082 +-
3083 +- if (is_guest_mode(vcpu) &&
3084 +- (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING))
3085 +- return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
3086 +-
3087 +- return vcpu->arch.tsc_offset;
3088 +-}
3089 +-
3090 +-static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
3091 +-{
3092 +- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3093 +- u64 g_tsc_offset = 0;
3094 +-
3095 +- /*
3096 +- * We're here if L1 chose not to trap WRMSR to TSC. According
3097 +- * to the spec, this should set L1's TSC; The offset that L1
3098 +- * set for L2 remains unchanged, and still needs to be added
3099 +- * to the newly set TSC to get L2's TSC.
3100 +- */
3101 +- if (is_guest_mode(vcpu) &&
3102 +- (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING))
3103 +- g_tsc_offset = vmcs12->tsc_offset;
3104 +-
3105 +- trace_kvm_write_tsc_offset(vcpu->vcpu_id,
3106 +- vcpu->arch.tsc_offset - g_tsc_offset,
3107 +- offset);
3108 +- vmcs_write64(TSC_OFFSET, offset + g_tsc_offset);
3109 +- return offset + g_tsc_offset;
3110 +-}
3111 +-
3112 +-/*
3113 +- * nested_vmx_allowed() checks whether a guest should be allowed to use VMX
3114 +- * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
3115 +- * all guests if the "nested" module option is off, and can also be disabled
3116 +- * for a single guest by disabling its VMX cpuid bit.
3117 +- */
3118 +-bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
3119 +-{
3120 +- return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
3121 +-}
3122 +-
3123 +-static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
3124 +- uint64_t val)
3125 +-{
3126 +- uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
3127 +-
3128 +- return !(val & ~valid_bits);
3129 +-}
3130 +-
3131 +-static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
3132 +-{
3133 +- switch (msr->index) {
3134 +- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
3135 +- if (!nested)
3136 +- return 1;
3137 +- return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
3138 +- default:
3139 +- return 1;
3140 +- }
3141 +-}
3142 +-
3143 +-/*
3144 +- * Reads an msr value (of 'msr_index') into 'pdata'.
3145 +- * Returns 0 on success, non-0 otherwise.
3146 +- * Assumes vcpu_load() was already called.
3147 +- */
3148 +-static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3149 +-{
3150 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
3151 +- struct shared_msr_entry *msr;
3152 +- u32 index;
3153 +-
3154 +- switch (msr_info->index) {
3155 +-#ifdef CONFIG_X86_64
3156 +- case MSR_FS_BASE:
3157 +- msr_info->data = vmcs_readl(GUEST_FS_BASE);
3158 +- break;
3159 +- case MSR_GS_BASE:
3160 +- msr_info->data = vmcs_readl(GUEST_GS_BASE);
3161 +- break;
3162 +- case MSR_KERNEL_GS_BASE:
3163 +- msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
3164 +- break;
3165 +-#endif
3166 +- case MSR_EFER:
3167 +- return kvm_get_msr_common(vcpu, msr_info);
3168 +- case MSR_IA32_TSX_CTRL:
3169 +- if (!msr_info->host_initiated &&
3170 +- !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
3171 +- return 1;
3172 +- goto find_shared_msr;
3173 +- case MSR_IA32_UMWAIT_CONTROL:
3174 +- if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
3175 +- return 1;
3176 +-
3177 +- msr_info->data = vmx->msr_ia32_umwait_control;
3178 +- break;
3179 +- case MSR_IA32_SPEC_CTRL:
3180 +- if (!msr_info->host_initiated &&
3181 +- !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
3182 +- return 1;
3183 +-
3184 +- msr_info->data = to_vmx(vcpu)->spec_ctrl;
3185 +- break;
3186 +- case MSR_IA32_SYSENTER_CS:
3187 +- msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
3188 +- break;
3189 +- case MSR_IA32_SYSENTER_EIP:
3190 +- msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
3191 +- break;
3192 +- case MSR_IA32_SYSENTER_ESP:
3193 +- msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
3194 +- break;
3195 +- case MSR_IA32_BNDCFGS:
3196 +- if (!kvm_mpx_supported() ||
3197 +- (!msr_info->host_initiated &&
3198 +- !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
3199 +- return 1;
3200 +- msr_info->data = vmcs_read64(GUEST_BNDCFGS);
3201 +- break;
3202 +- case MSR_IA32_MCG_EXT_CTL:
3203 +- if (!msr_info->host_initiated &&
3204 +- !(vmx->msr_ia32_feature_control &
3205 +- FEATURE_CONTROL_LMCE))
3206 +- return 1;
3207 +- msr_info->data = vcpu->arch.mcg_ext_ctl;
3208 +- break;
3209 +- case MSR_IA32_FEATURE_CONTROL:
3210 +- msr_info->data = vmx->msr_ia32_feature_control;
3211 +- break;
3212 +- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
3213 +- if (!nested_vmx_allowed(vcpu))
3214 +- return 1;
3215 +- return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
3216 +- &msr_info->data);
3217 +- case MSR_IA32_RTIT_CTL:
3218 +- if (pt_mode != PT_MODE_HOST_GUEST)
3219 +- return 1;
3220 +- msr_info->data = vmx->pt_desc.guest.ctl;
3221 +- break;
3222 +- case MSR_IA32_RTIT_STATUS:
3223 +- if (pt_mode != PT_MODE_HOST_GUEST)
3224 +- return 1;
3225 +- msr_info->data = vmx->pt_desc.guest.status;
3226 +- break;
3227 +- case MSR_IA32_RTIT_CR3_MATCH:
3228 +- if ((pt_mode != PT_MODE_HOST_GUEST) ||
3229 +- !intel_pt_validate_cap(vmx->pt_desc.caps,
3230 +- PT_CAP_cr3_filtering))
3231 +- return 1;
3232 +- msr_info->data = vmx->pt_desc.guest.cr3_match;
3233 +- break;
3234 +- case MSR_IA32_RTIT_OUTPUT_BASE:
3235 +- if ((pt_mode != PT_MODE_HOST_GUEST) ||
3236 +- (!intel_pt_validate_cap(vmx->pt_desc.caps,
3237 +- PT_CAP_topa_output) &&
3238 +- !intel_pt_validate_cap(vmx->pt_desc.caps,
3239 +- PT_CAP_single_range_output)))
3240 +- return 1;
3241 +- msr_info->data = vmx->pt_desc.guest.output_base;
3242 +- break;
3243 +- case MSR_IA32_RTIT_OUTPUT_MASK:
3244 +- if ((pt_mode != PT_MODE_HOST_GUEST) ||
3245 +- (!intel_pt_validate_cap(vmx->pt_desc.caps,
3246 +- PT_CAP_topa_output) &&
3247 +- !intel_pt_validate_cap(vmx->pt_desc.caps,
3248 +- PT_CAP_single_range_output)))
3249 +- return 1;
3250 +- msr_info->data = vmx->pt_desc.guest.output_mask;
3251 +- break;
3252 +- case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
3253 +- index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
3254 +- if ((pt_mode != PT_MODE_HOST_GUEST) ||
3255 +- (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
3256 +- PT_CAP_num_address_ranges)))
3257 +- return 1;
3258 +- if (is_noncanonical_address(data, vcpu))
3259 +- return 1;
3260 +- if (index % 2)
3261 +- msr_info->data = vmx->pt_desc.guest.addr_b[index / 2];
3262 +- else
3263 +- msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
3264 +- break;
3265 +- case MSR_TSC_AUX:
3266 +- if (!msr_info->host_initiated &&
3267 +- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
3268 +- return 1;
3269 +- goto find_shared_msr;
3270 +- default:
3271 +- find_shared_msr:
3272 +- msr = find_msr_entry(vmx, msr_info->index);
3273 +- if (msr) {
3274 +- msr_info->data = msr->data;
3275 +- break;
3276 +- }
3277 +- return kvm_get_msr_common(vcpu, msr_info);
3278 +- }
3279 +-
3280 +- return 0;
3281 +-}
3282 +-
3283 +-/*
3284 +- * Writes msr value into the appropriate "register".
3285 +- * Returns 0 on success, non-0 otherwise.
3286 +- * Assumes vcpu_load() was already called.
3287 +- */
3288 +-static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3289 +-{
3290 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
3291 +- struct shared_msr_entry *msr;
3292 +- int ret = 0;
3293 +- u32 msr_index = msr_info->index;
3294 +- u64 data = msr_info->data;
3295 +- u32 index;
3296 +-
3297 +- switch (msr_index) {
3298 +- case MSR_EFER:
3299 +- ret = kvm_set_msr_common(vcpu, msr_info);
3300 +- break;
3301 +-#ifdef CONFIG_X86_64
3302 +- case MSR_FS_BASE:
3303 +- vmx_segment_cache_clear(vmx);
3304 +- vmcs_writel(GUEST_FS_BASE, data);
3305 +- break;
3306 +- case MSR_GS_BASE:
3307 +- vmx_segment_cache_clear(vmx);
3308 +- vmcs_writel(GUEST_GS_BASE, data);
3309 +- break;
3310 +- case MSR_KERNEL_GS_BASE:
3311 +- vmx_write_guest_kernel_gs_base(vmx, data);
3312 +- break;
3313 +-#endif
3314 +- case MSR_IA32_SYSENTER_CS:
3315 +- if (is_guest_mode(vcpu))
3316 +- get_vmcs12(vcpu)->guest_sysenter_cs = data;
3317 +- vmcs_write32(GUEST_SYSENTER_CS, data);
3318 +- break;
3319 +- case MSR_IA32_SYSENTER_EIP:
3320 +- if (is_guest_mode(vcpu))
3321 +- get_vmcs12(vcpu)->guest_sysenter_eip = data;
3322 +- vmcs_writel(GUEST_SYSENTER_EIP, data);
3323 +- break;
3324 +- case MSR_IA32_SYSENTER_ESP:
3325 +- if (is_guest_mode(vcpu))
3326 +- get_vmcs12(vcpu)->guest_sysenter_esp = data;
3327 +- vmcs_writel(GUEST_SYSENTER_ESP, data);
3328 +- break;
3329 +- case MSR_IA32_DEBUGCTLMSR:
3330 +- if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
3331 +- VM_EXIT_SAVE_DEBUG_CONTROLS)
3332 +- get_vmcs12(vcpu)->guest_ia32_debugctl = data;
3333 +-
3334 +- ret = kvm_set_msr_common(vcpu, msr_info);
3335 +- break;
3336 +-
3337 +- case MSR_IA32_BNDCFGS:
3338 +- if (!kvm_mpx_supported() ||
3339 +- (!msr_info->host_initiated &&
3340 +- !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
3341 +- return 1;
3342 +- if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
3343 +- (data & MSR_IA32_BNDCFGS_RSVD))
3344 +- return 1;
3345 +- vmcs_write64(GUEST_BNDCFGS, data);
3346 +- break;
3347 +- case MSR_IA32_UMWAIT_CONTROL:
3348 +- if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
3349 +- return 1;
3350 +-
3351 +- /* The reserved bit 1 and non-32 bit [63:32] should be zero */
3352 +- if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32)))
3353 +- return 1;
3354 +-
3355 +- vmx->msr_ia32_umwait_control = data;
3356 +- break;
3357 +- case MSR_IA32_SPEC_CTRL:
3358 +- if (!msr_info->host_initiated &&
3359 +- !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
3360 +- return 1;
3361 +-
3362 +- /* The STIBP bit doesn't fault even if it's not advertised */
3363 +- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
3364 +- return 1;
3365 +-
3366 +- vmx->spec_ctrl = data;
3367 +-
3368 +- if (!data)
3369 +- break;
3370 +-
3371 +- /*
3372 +- * For non-nested:
3373 +- * When it's written (to non-zero) for the first time, pass
3374 +- * it through.
3375 +- *
3376 +- * For nested:
3377 +- * The handling of the MSR bitmap for L2 guests is done in
3378 +- * nested_vmx_prepare_msr_bitmap. We should not touch the
3379 +- * vmcs02.msr_bitmap here since it gets completely overwritten
3380 +- * in the merging. We update the vmcs01 here for L1 as well
3381 +- * since it will end up touching the MSR anyway now.
3382 +- */
3383 +- vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
3384 +- MSR_IA32_SPEC_CTRL,
3385 +- MSR_TYPE_RW);
3386 +- break;
3387 +- case MSR_IA32_TSX_CTRL:
3388 +- if (!msr_info->host_initiated &&
3389 +- !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
3390 +- return 1;
3391 +- if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
3392 +- return 1;
3393 +- goto find_shared_msr;
3394 +- case MSR_IA32_PRED_CMD:
3395 +- if (!msr_info->host_initiated &&
3396 +- !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
3397 +- return 1;
3398 +-
3399 +- if (data & ~PRED_CMD_IBPB)
3400 +- return 1;
3401 +-
3402 +- if (!data)
3403 +- break;
3404 +-
3405 +- wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
3406 +-
3407 +- /*
3408 +- * For non-nested:
3409 +- * When it's written (to non-zero) for the first time, pass
3410 +- * it through.
3411 +- *
3412 +- * For nested:
3413 +- * The handling of the MSR bitmap for L2 guests is done in
3414 +- * nested_vmx_prepare_msr_bitmap. We should not touch the
3415 +- * vmcs02.msr_bitmap here since it gets completely overwritten
3416 +- * in the merging.
3417 +- */
3418 +- vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
3419 +- MSR_TYPE_W);
3420 +- break;
3421 +- case MSR_IA32_CR_PAT:
3422 +- if (!kvm_pat_valid(data))
3423 +- return 1;
3424 +-
3425 +- if (is_guest_mode(vcpu) &&
3426 +- get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
3427 +- get_vmcs12(vcpu)->guest_ia32_pat = data;
3428 +-
3429 +- if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
3430 +- vmcs_write64(GUEST_IA32_PAT, data);
3431 +- vcpu->arch.pat = data;
3432 +- break;
3433 +- }
3434 +- ret = kvm_set_msr_common(vcpu, msr_info);
3435 +- break;
3436 +- case MSR_IA32_TSC_ADJUST:
3437 +- ret = kvm_set_msr_common(vcpu, msr_info);
3438 +- break;
3439 +- case MSR_IA32_MCG_EXT_CTL:
3440 +- if ((!msr_info->host_initiated &&
3441 +- !(to_vmx(vcpu)->msr_ia32_feature_control &
3442 +- FEATURE_CONTROL_LMCE)) ||
3443 +- (data & ~MCG_EXT_CTL_LMCE_EN))
3444 +- return 1;
3445 +- vcpu->arch.mcg_ext_ctl = data;
3446 +- break;
3447 +- case MSR_IA32_FEATURE_CONTROL:
3448 +- if (!vmx_feature_control_msr_valid(vcpu, data) ||
3449 +- (to_vmx(vcpu)->msr_ia32_feature_control &
3450 +- FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
3451 +- return 1;
3452 +- vmx->msr_ia32_feature_control = data;
3453 +- if (msr_info->host_initiated && data == 0)
3454 +- vmx_leave_nested(vcpu);
3455 +- break;
3456 +- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
3457 +- if (!msr_info->host_initiated)
3458 +- return 1; /* they are read-only */
3459 +- if (!nested_vmx_allowed(vcpu))
3460 +- return 1;
3461 +- return vmx_set_vmx_msr(vcpu, msr_index, data);
3462 +- case MSR_IA32_RTIT_CTL:
3463 +- if ((pt_mode != PT_MODE_HOST_GUEST) ||
3464 +- vmx_rtit_ctl_check(vcpu, data) ||
3465 +- vmx->nested.vmxon)
3466 +- return 1;
3467 +- vmcs_write64(GUEST_IA32_RTIT_CTL, data);
3468 +- vmx->pt_desc.guest.ctl = data;
3469 +- pt_update_intercept_for_msr(vmx);
3470 +- break;
3471 +- case MSR_IA32_RTIT_STATUS:
3472 +- if ((pt_mode != PT_MODE_HOST_GUEST) ||
3473 +- (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
3474 +- (data & MSR_IA32_RTIT_STATUS_MASK))
3475 +- return 1;
3476 +- vmx->pt_desc.guest.status = data;
3477 +- break;
3478 +- case MSR_IA32_RTIT_CR3_MATCH:
3479 +- if ((pt_mode != PT_MODE_HOST_GUEST) ||
3480 +- (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
3481 +- !intel_pt_validate_cap(vmx->pt_desc.caps,
3482 +- PT_CAP_cr3_filtering))
3483 +- return 1;
3484 +- vmx->pt_desc.guest.cr3_match = data;
3485 +- break;
3486 +- case MSR_IA32_RTIT_OUTPUT_BASE:
3487 +- if ((pt_mode != PT_MODE_HOST_GUEST) ||
3488 +- (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
3489 +- (!intel_pt_validate_cap(vmx->pt_desc.caps,
3490 +- PT_CAP_topa_output) &&
3491 +- !intel_pt_validate_cap(vmx->pt_desc.caps,
3492 +- PT_CAP_single_range_output)) ||
3493 +- (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK))
3494 +- return 1;
3495 +- vmx->pt_desc.guest.output_base = data;
3496 +- break;
3497 +- case MSR_IA32_RTIT_OUTPUT_MASK:
3498 +- if ((pt_mode != PT_MODE_HOST_GUEST) ||
3499 +- (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
3500 +- (!intel_pt_validate_cap(vmx->pt_desc.caps,
3501 +- PT_CAP_topa_output) &&
3502 +- !intel_pt_validate_cap(vmx->pt_desc.caps,
3503 +- PT_CAP_single_range_output)))
3504 +- return 1;
3505 +- vmx->pt_desc.guest.output_mask = data;
3506 +- break;
3507 +- case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
3508 +- index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
3509 +- if ((pt_mode != PT_MODE_HOST_GUEST) ||
3510 +- (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
3511 +- (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
3512 +- PT_CAP_num_address_ranges)))
3513 +- return 1;
3514 +- if (is_noncanonical_address(data, vcpu))
3515 +- return 1;
3516 +- if (index % 2)
3517 +- vmx->pt_desc.guest.addr_b[index / 2] = data;
3518 +- else
3519 +- vmx->pt_desc.guest.addr_a[index / 2] = data;
3520 +- break;
3521 +- case MSR_TSC_AUX:
3522 +- if (!msr_info->host_initiated &&
3523 +- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
3524 +- return 1;
3525 +- /* Check reserved bit, higher 32 bits should be zero */
3526 +- if ((data >> 32) != 0)
3527 +- return 1;
3528 +- goto find_shared_msr;
3529 +-
3530 +- default:
3531 +- find_shared_msr:
3532 +- msr = find_msr_entry(vmx, msr_index);
3533 +- if (msr)
3534 +- ret = vmx_set_guest_msr(vmx, msr, data);
3535 +- else
3536 +- ret = kvm_set_msr_common(vcpu, msr_info);
3537 +- }
3538 +-
3539 +- return ret;
3540 +-}
3541 +-
3542 +-static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
3543 +-{
3544 +- kvm_register_mark_available(vcpu, reg);
3545 +-
3546 +- switch (reg) {
3547 +- case VCPU_REGS_RSP:
3548 +- vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
3549 +- break;
3550 +- case VCPU_REGS_RIP:
3551 +- vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
3552 +- break;
3553 +- case VCPU_EXREG_PDPTR:
3554 +- if (enable_ept)
3555 +- ept_save_pdptrs(vcpu);
3556 +- break;
3557 +- case VCPU_EXREG_CR3:
3558 +- if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
3559 +- vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
3560 +- break;
3561 +- default:
3562 +- WARN_ON_ONCE(1);
3563 +- break;
3564 +- }
3565 +-}
3566 +-
3567 +-static __init int cpu_has_kvm_support(void)
3568 +-{
3569 +- return cpu_has_vmx();
3570 +-}
3571 +-
3572 +-static __init int vmx_disabled_by_bios(void)
3573 +-{
3574 +- u64 msr;
3575 +-
3576 +- rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
3577 +- if (msr & FEATURE_CONTROL_LOCKED) {
3578 +- /* launched w/ TXT and VMX disabled */
3579 +- if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
3580 +- && tboot_enabled())
3581 +- return 1;
3582 +- /* launched w/o TXT and VMX only enabled w/ TXT */
3583 +- if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
3584 +- && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
3585 +- && !tboot_enabled()) {
3586 +- printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
3587 +- "activate TXT before enabling KVM\n");
3588 +- return 1;
3589 +- }
3590 +- /* launched w/o TXT and VMX disabled */
3591 +- if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
3592 +- && !tboot_enabled())
3593 +- return 1;
3594 +- }
3595 +-
3596 +- return 0;
3597 +-}
3598 +-
3599 +-static void kvm_cpu_vmxon(u64 addr)
3600 +-{
3601 +- cr4_set_bits(X86_CR4_VMXE);
3602 +- intel_pt_handle_vmx(1);
3603 +-
3604 +- asm volatile ("vmxon %0" : : "m"(addr));
3605 +-}
3606 +-
3607 +-static int hardware_enable(void)
3608 +-{
3609 +- int cpu = raw_smp_processor_id();
3610 +- u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
3611 +- u64 old, test_bits;
3612 +-
3613 +- if (cr4_read_shadow() & X86_CR4_VMXE)
3614 +- return -EBUSY;
3615 +-
3616 +- /*
3617 +- * This can happen if we hot-added a CPU but failed to allocate
3618 +- * VP assist page for it.
3619 +- */
3620 +- if (static_branch_unlikely(&enable_evmcs) &&
3621 +- !hv_get_vp_assist_page(cpu))
3622 +- return -EFAULT;
3623 +-
3624 +- INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
3625 +- INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
3626 +- spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
3627 +-
3628 +- /*
3629 +- * Now we can enable the vmclear operation in kdump
3630 +- * since the loaded_vmcss_on_cpu list on this cpu
3631 +- * has been initialized.
3632 +- *
3633 +- * Though the cpu is not in VMX operation now, there
3634 +- * is no problem to enable the vmclear operation
3635 +- * for the loaded_vmcss_on_cpu list is empty!
3636 +- */
3637 +- crash_enable_local_vmclear(cpu);
3638 +-
3639 +- rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
3640 +-
3641 +- test_bits = FEATURE_CONTROL_LOCKED;
3642 +- test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
3643 +- if (tboot_enabled())
3644 +- test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
3645 +-
3646 +- if ((old & test_bits) != test_bits) {
3647 +- /* enable and lock */
3648 +- wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
3649 +- }
3650 +- kvm_cpu_vmxon(phys_addr);
3651 +- if (enable_ept)
3652 +- ept_sync_global();
3653 +-
3654 +- return 0;
3655 +-}
3656 +-
3657 +-static void vmclear_local_loaded_vmcss(void)
3658 +-{
3659 +- int cpu = raw_smp_processor_id();
3660 +- struct loaded_vmcs *v, *n;
3661 +-
3662 +- list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
3663 +- loaded_vmcss_on_cpu_link)
3664 +- __loaded_vmcs_clear(v);
3665 +-}
3666 +-
3667 +-
3668 +-/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
3669 +- * tricks.
3670 +- */
3671 +-static void kvm_cpu_vmxoff(void)
3672 +-{
3673 +- asm volatile (__ex("vmxoff"));
3674 +-
3675 +- intel_pt_handle_vmx(0);
3676 +- cr4_clear_bits(X86_CR4_VMXE);
3677 +-}
3678 +-
3679 +-static void hardware_disable(void)
3680 +-{
3681 +- vmclear_local_loaded_vmcss();
3682 +- kvm_cpu_vmxoff();
3683 +-}
3684 +-
3685 +-static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
3686 +- u32 msr, u32 *result)
3687 +-{
3688 +- u32 vmx_msr_low, vmx_msr_high;
3689 +- u32 ctl = ctl_min | ctl_opt;
3690 +-
3691 +- rdmsr(msr, vmx_msr_low, vmx_msr_high);
3692 +-
3693 +- ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
3694 +- ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */
3695 +-
3696 +- /* Ensure minimum (required) set of control bits are supported. */
3697 +- if (ctl_min & ~ctl)
3698 +- return -EIO;
3699 +-
3700 +- *result = ctl;
3701 +- return 0;
3702 +-}
3703 +-
3704 +-static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
3705 +- struct vmx_capability *vmx_cap)
3706 +-{
3707 +- u32 vmx_msr_low, vmx_msr_high;
3708 +- u32 min, opt, min2, opt2;
3709 +- u32 _pin_based_exec_control = 0;
3710 +- u32 _cpu_based_exec_control = 0;
3711 +- u32 _cpu_based_2nd_exec_control = 0;
3712 +- u32 _vmexit_control = 0;
3713 +- u32 _vmentry_control = 0;
3714 +-
3715 +- memset(vmcs_conf, 0, sizeof(*vmcs_conf));
3716 +- min = CPU_BASED_HLT_EXITING |
3717 +-#ifdef CONFIG_X86_64
3718 +- CPU_BASED_CR8_LOAD_EXITING |
3719 +- CPU_BASED_CR8_STORE_EXITING |
3720 +-#endif
3721 +- CPU_BASED_CR3_LOAD_EXITING |
3722 +- CPU_BASED_CR3_STORE_EXITING |
3723 +- CPU_BASED_UNCOND_IO_EXITING |
3724 +- CPU_BASED_MOV_DR_EXITING |
3725 +- CPU_BASED_USE_TSC_OFFSETTING |
3726 +- CPU_BASED_MWAIT_EXITING |
3727 +- CPU_BASED_MONITOR_EXITING |
3728 +- CPU_BASED_INVLPG_EXITING |
3729 +- CPU_BASED_RDPMC_EXITING;
3730 +-
3731 +- opt = CPU_BASED_TPR_SHADOW |
3732 +- CPU_BASED_USE_MSR_BITMAPS |
3733 +- CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
3734 +- if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
3735 +- &_cpu_based_exec_control) < 0)
3736 +- return -EIO;
3737 +-#ifdef CONFIG_X86_64
3738 +- if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
3739 +- _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
3740 +- ~CPU_BASED_CR8_STORE_EXITING;
3741 +-#endif
3742 +- if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
3743 +- min2 = 0;
3744 +- opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
3745 +- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
3746 +- SECONDARY_EXEC_WBINVD_EXITING |
3747 +- SECONDARY_EXEC_ENABLE_VPID |
3748 +- SECONDARY_EXEC_ENABLE_EPT |
3749 +- SECONDARY_EXEC_UNRESTRICTED_GUEST |
3750 +- SECONDARY_EXEC_PAUSE_LOOP_EXITING |
3751 +- SECONDARY_EXEC_DESC |
3752 +- SECONDARY_EXEC_RDTSCP |
3753 +- SECONDARY_EXEC_ENABLE_INVPCID |
3754 +- SECONDARY_EXEC_APIC_REGISTER_VIRT |
3755 +- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
3756 +- SECONDARY_EXEC_SHADOW_VMCS |
3757 +- SECONDARY_EXEC_XSAVES |
3758 +- SECONDARY_EXEC_RDSEED_EXITING |
3759 +- SECONDARY_EXEC_RDRAND_EXITING |
3760 +- SECONDARY_EXEC_ENABLE_PML |
3761 +- SECONDARY_EXEC_TSC_SCALING |
3762 +- SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
3763 +- SECONDARY_EXEC_PT_USE_GPA |
3764 +- SECONDARY_EXEC_PT_CONCEAL_VMX |
3765 +- SECONDARY_EXEC_ENABLE_VMFUNC |
3766 +- SECONDARY_EXEC_ENCLS_EXITING;
3767 +- if (adjust_vmx_controls(min2, opt2,
3768 +- MSR_IA32_VMX_PROCBASED_CTLS2,
3769 +- &_cpu_based_2nd_exec_control) < 0)
3770 +- return -EIO;
3771 +- }
3772 +-#ifndef CONFIG_X86_64
3773 +- if (!(_cpu_based_2nd_exec_control &
3774 +- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
3775 +- _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
3776 +-#endif
3777 +-
3778 +- if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
3779 +- _cpu_based_2nd_exec_control &= ~(
3780 +- SECONDARY_EXEC_APIC_REGISTER_VIRT |
3781 +- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
3782 +- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
3783 +-
3784 +- rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
3785 +- &vmx_cap->ept, &vmx_cap->vpid);
3786 +-
3787 +- if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
3788 +- /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
3789 +- enabled */
3790 +- _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
3791 +- CPU_BASED_CR3_STORE_EXITING |
3792 +- CPU_BASED_INVLPG_EXITING);
3793 +- } else if (vmx_cap->ept) {
3794 +- vmx_cap->ept = 0;
3795 +- pr_warn_once("EPT CAP should not exist if not support "
3796 +- "1-setting enable EPT VM-execution control\n");
3797 +- }
3798 +- if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
3799 +- vmx_cap->vpid) {
3800 +- vmx_cap->vpid = 0;
3801 +- pr_warn_once("VPID CAP should not exist if not support "
3802 +- "1-setting enable VPID VM-execution control\n");
3803 +- }
3804 +-
3805 +- min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
3806 +-#ifdef CONFIG_X86_64
3807 +- min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
3808 +-#endif
3809 +- opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
3810 +- VM_EXIT_LOAD_IA32_PAT |
3811 +- VM_EXIT_LOAD_IA32_EFER |
3812 +- VM_EXIT_CLEAR_BNDCFGS |
3813 +- VM_EXIT_PT_CONCEAL_PIP |
3814 +- VM_EXIT_CLEAR_IA32_RTIT_CTL;
3815 +- if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
3816 +- &_vmexit_control) < 0)
3817 +- return -EIO;
3818 +-
3819 +- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
3820 +- opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
3821 +- PIN_BASED_VMX_PREEMPTION_TIMER;
3822 +- if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
3823 +- &_pin_based_exec_control) < 0)
3824 +- return -EIO;
3825 +-
3826 +- if (cpu_has_broken_vmx_preemption_timer())
3827 +- _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
3828 +- if (!(_cpu_based_2nd_exec_control &
3829 +- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
3830 +- _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
3831 +-
3832 +- min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
3833 +- opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
3834 +- VM_ENTRY_LOAD_IA32_PAT |
3835 +- VM_ENTRY_LOAD_IA32_EFER |
3836 +- VM_ENTRY_LOAD_BNDCFGS |
3837 +- VM_ENTRY_PT_CONCEAL_PIP |
3838 +- VM_ENTRY_LOAD_IA32_RTIT_CTL;
3839 +- if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
3840 +- &_vmentry_control) < 0)
3841 +- return -EIO;
3842 +-
3843 +- /*
3844 +- * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
3845 +- * can't be used due to an errata where VM Exit may incorrectly clear
3846 +- * IA32_PERF_GLOBAL_CTRL[34:32]. Workaround the errata by using the
3847 +- * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL.
3848 +- */
3849 +- if (boot_cpu_data.x86 == 0x6) {
3850 +- switch (boot_cpu_data.x86_model) {
3851 +- case 26: /* AAK155 */
3852 +- case 30: /* AAP115 */
3853 +- case 37: /* AAT100 */
3854 +- case 44: /* BC86,AAY89,BD102 */
3855 +- case 46: /* BA97 */
3856 +- _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
3857 +- _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
3858 +- pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
3859 +- "does not work properly. Using workaround\n");
3860 +- break;
3861 +- default:
3862 +- break;
3863 +- }
3864 +- }
3865 +-
3866 +-
3867 +- rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
3868 +-
3869 +- /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
3870 +- if ((vmx_msr_high & 0x1fff) > PAGE_SIZE)
3871 +- return -EIO;
3872 +-
3873 +-#ifdef CONFIG_X86_64
3874 +- /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
3875 +- if (vmx_msr_high & (1u<<16))
3876 +- return -EIO;
3877 +-#endif
3878 +-
3879 +- /* Require Write-Back (WB) memory type for VMCS accesses. */
3880 +- if (((vmx_msr_high >> 18) & 15) != 6)
3881 +- return -EIO;
3882 +-
3883 +- vmcs_conf->size = vmx_msr_high & 0x1fff;
3884 +- vmcs_conf->order = get_order(vmcs_conf->size);
3885 +- vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
3886 +-
3887 +- vmcs_conf->revision_id = vmx_msr_low;
3888 +-
3889 +- vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
3890 +- vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
3891 +- vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
3892 +- vmcs_conf->vmexit_ctrl = _vmexit_control;
3893 +- vmcs_conf->vmentry_ctrl = _vmentry_control;
3894 +-
3895 +- if (static_branch_unlikely(&enable_evmcs))
3896 +- evmcs_sanitize_exec_ctrls(vmcs_conf);
3897 +-
3898 +- return 0;
3899 +-}
3900 +-
3901 +-struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
3902 +-{
3903 +- int node = cpu_to_node(cpu);
3904 +- struct page *pages;
3905 +- struct vmcs *vmcs;
3906 +-
3907 +- pages = __alloc_pages_node(node, flags, vmcs_config.order);
3908 +- if (!pages)
3909 +- return NULL;
3910 +- vmcs = page_address(pages);
3911 +- memset(vmcs, 0, vmcs_config.size);
3912 +-
3913 +- /* KVM supports Enlightened VMCS v1 only */
3914 +- if (static_branch_unlikely(&enable_evmcs))
3915 +- vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
3916 +- else
3917 +- vmcs->hdr.revision_id = vmcs_config.revision_id;
3918 +-
3919 +- if (shadow)
3920 +- vmcs->hdr.shadow_vmcs = 1;
3921 +- return vmcs;
3922 +-}
3923 +-
3924 +-void free_vmcs(struct vmcs *vmcs)
3925 +-{
3926 +- free_pages((unsigned long)vmcs, vmcs_config.order);
3927 +-}
3928 +-
3929 +-/*
3930 +- * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
3931 +- */
3932 +-void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
3933 +-{
3934 +- if (!loaded_vmcs->vmcs)
3935 +- return;
3936 +- loaded_vmcs_clear(loaded_vmcs);
3937 +- free_vmcs(loaded_vmcs->vmcs);
3938 +- loaded_vmcs->vmcs = NULL;
3939 +- if (loaded_vmcs->msr_bitmap)
3940 +- free_page((unsigned long)loaded_vmcs->msr_bitmap);
3941 +- WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
3942 +-}
3943 +-
3944 +-int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
3945 +-{
3946 +- loaded_vmcs->vmcs = alloc_vmcs(false);
3947 +- if (!loaded_vmcs->vmcs)
3948 +- return -ENOMEM;
3949 +-
3950 +- loaded_vmcs->shadow_vmcs = NULL;
3951 +- loaded_vmcs->hv_timer_soft_disabled = false;
3952 +- loaded_vmcs_init(loaded_vmcs);
3953 +-
3954 +- if (cpu_has_vmx_msr_bitmap()) {
3955 +- loaded_vmcs->msr_bitmap = (unsigned long *)
3956 +- __get_free_page(GFP_KERNEL_ACCOUNT);
3957 +- if (!loaded_vmcs->msr_bitmap)
3958 +- goto out_vmcs;
3959 +- memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
3960 +-
3961 +- if (IS_ENABLED(CONFIG_HYPERV) &&
3962 +- static_branch_unlikely(&enable_evmcs) &&
3963 +- (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
3964 +- struct hv_enlightened_vmcs *evmcs =
3965 +- (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
3966 +-
3967 +- evmcs->hv_enlightenments_control.msr_bitmap = 1;
3968 +- }
3969 +- }
3970 +-
3971 +- memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
3972 +- memset(&loaded_vmcs->controls_shadow, 0,
3973 +- sizeof(struct vmcs_controls_shadow));
3974 +-
3975 +- return 0;
3976 +-
3977 +-out_vmcs:
3978 +- free_loaded_vmcs(loaded_vmcs);
3979 +- return -ENOMEM;
3980 +-}
3981 +-
3982 +-static void free_kvm_area(void)
3983 +-{
3984 +- int cpu;
3985 +-
3986 +- for_each_possible_cpu(cpu) {
3987 +- free_vmcs(per_cpu(vmxarea, cpu));
3988 +- per_cpu(vmxarea, cpu) = NULL;
3989 +- }
3990 +-}
3991 +-
3992 +-static __init int alloc_kvm_area(void)
3993 +-{
3994 +- int cpu;
3995 +-
3996 +- for_each_possible_cpu(cpu) {
3997 +- struct vmcs *vmcs;
3998 +-
3999 +- vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL);
4000 +- if (!vmcs) {
4001 +- free_kvm_area();
4002 +- return -ENOMEM;
4003 +- }
4004 +-
4005 +- /*
4006 +- * When eVMCS is enabled, alloc_vmcs_cpu() sets
4007 +- * vmcs->revision_id to KVM_EVMCS_VERSION instead of
4008 +- * revision_id reported by MSR_IA32_VMX_BASIC.
4009 +- *
4010 +- * However, even though not explicitly documented by
4011 +- * TLFS, VMXArea passed as VMXON argument should
4012 +- * still be marked with revision_id reported by
4013 +- * physical CPU.
4014 +- */
4015 +- if (static_branch_unlikely(&enable_evmcs))
4016 +- vmcs->hdr.revision_id = vmcs_config.revision_id;
4017 +-
4018 +- per_cpu(vmxarea, cpu) = vmcs;
4019 +- }
4020 +- return 0;
4021 +-}
4022 +-
4023 +-static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
4024 +- struct kvm_segment *save)
4025 +-{
4026 +- if (!emulate_invalid_guest_state) {
4027 +- /*
4028 +- * CS and SS RPL should be equal during guest entry according
4029 +- * to VMX spec, but in reality it is not always so. Since vcpu
4030 +- * is in the middle of the transition from real mode to
4031 +- * protected mode it is safe to assume that RPL 0 is a good
4032 +- * default value.
4033 +- */
4034 +- if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
4035 +- save->selector &= ~SEGMENT_RPL_MASK;
4036 +- save->dpl = save->selector & SEGMENT_RPL_MASK;
4037 +- save->s = 1;
4038 +- }
4039 +- vmx_set_segment(vcpu, save, seg);
4040 +-}
4041 +-
4042 +-static void enter_pmode(struct kvm_vcpu *vcpu)
4043 +-{
4044 +- unsigned long flags;
4045 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
4046 +-
4047 +- /*
4048 +- * Update real mode segment cache. It may be not up-to-date if sement
4049 +- * register was written while vcpu was in a guest mode.
4050 +- */
4051 +- vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
4052 +- vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
4053 +- vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
4054 +- vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
4055 +- vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
4056 +- vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
4057 +-
4058 +- vmx->rmode.vm86_active = 0;
4059 +-
4060 +- vmx_segment_cache_clear(vmx);
4061 +-
4062 +- vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
4063 +-
4064 +- flags = vmcs_readl(GUEST_RFLAGS);
4065 +- flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
4066 +- flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
4067 +- vmcs_writel(GUEST_RFLAGS, flags);
4068 +-
4069 +- vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
4070 +- (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
4071 +-
4072 +- update_exception_bitmap(vcpu);
4073 +-
4074 +- fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
4075 +- fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
4076 +- fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
4077 +- fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
4078 +- fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
4079 +- fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
4080 +-}
4081 +-
4082 +-static void fix_rmode_seg(int seg, struct kvm_segment *save)
4083 +-{
4084 +- const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
4085 +- struct kvm_segment var = *save;
4086 +-
4087 +- var.dpl = 0x3;
4088 +- if (seg == VCPU_SREG_CS)
4089 +- var.type = 0x3;
4090 +-
4091 +- if (!emulate_invalid_guest_state) {
4092 +- var.selector = var.base >> 4;
4093 +- var.base = var.base & 0xffff0;
4094 +- var.limit = 0xffff;
4095 +- var.g = 0;
4096 +- var.db = 0;
4097 +- var.present = 1;
4098 +- var.s = 1;
4099 +- var.l = 0;
4100 +- var.unusable = 0;
4101 +- var.type = 0x3;
4102 +- var.avl = 0;
4103 +- if (save->base & 0xf)
4104 +- printk_once(KERN_WARNING "kvm: segment base is not "
4105 +- "paragraph aligned when entering "
4106 +- "protected mode (seg=%d)", seg);
4107 +- }
4108 +-
4109 +- vmcs_write16(sf->selector, var.selector);
4110 +- vmcs_writel(sf->base, var.base);
4111 +- vmcs_write32(sf->limit, var.limit);
4112 +- vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
4113 +-}
4114 +-
4115 +-static void enter_rmode(struct kvm_vcpu *vcpu)
4116 +-{
4117 +- unsigned long flags;
4118 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
4119 +- struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
4120 +-
4121 +- vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
4122 +- vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
4123 +- vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
4124 +- vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
4125 +- vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
4126 +- vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
4127 +- vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
4128 +-
4129 +- vmx->rmode.vm86_active = 1;
4130 +-
4131 +- /*
4132 +- * Very old userspace does not call KVM_SET_TSS_ADDR before entering
4133 +- * vcpu. Warn the user that an update is overdue.
4134 +- */
4135 +- if (!kvm_vmx->tss_addr)
4136 +- printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
4137 +- "called before entering vcpu\n");
4138 +-
4139 +- vmx_segment_cache_clear(vmx);
4140 +-
4141 +- vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
4142 +- vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
4143 +- vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
4144 +-
4145 +- flags = vmcs_readl(GUEST_RFLAGS);
4146 +- vmx->rmode.save_rflags = flags;
4147 +-
4148 +- flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
4149 +-
4150 +- vmcs_writel(GUEST_RFLAGS, flags);
4151 +- vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
4152 +- update_exception_bitmap(vcpu);
4153 +-
4154 +- fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
4155 +- fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
4156 +- fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
4157 +- fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
4158 +- fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
4159 +- fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
4160 +-
4161 +- kvm_mmu_reset_context(vcpu);
4162 +-}
4163 +-
4164 +-void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
4165 +-{
4166 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
4167 +- struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
4168 +-
4169 +- if (!msr)
4170 +- return;
4171 +-
4172 +- vcpu->arch.efer = efer;
4173 +- if (efer & EFER_LMA) {
4174 +- vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
4175 +- msr->data = efer;
4176 +- } else {
4177 +- vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
4178 +-
4179 +- msr->data = efer & ~EFER_LME;
4180 +- }
4181 +- setup_msrs(vmx);
4182 +-}
4183 +-
4184 +-#ifdef CONFIG_X86_64
4185 +-
4186 +-static void enter_lmode(struct kvm_vcpu *vcpu)
4187 +-{
4188 +- u32 guest_tr_ar;
4189 +-
4190 +- vmx_segment_cache_clear(to_vmx(vcpu));
4191 +-
4192 +- guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
4193 +- if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
4194 +- pr_debug_ratelimited("%s: tss fixup for long mode. \n",
4195 +- __func__);
4196 +- vmcs_write32(GUEST_TR_AR_BYTES,
4197 +- (guest_tr_ar & ~VMX_AR_TYPE_MASK)
4198 +- | VMX_AR_TYPE_BUSY_64_TSS);
4199 +- }
4200 +- vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
4201 +-}
4202 +-
4203 +-static void exit_lmode(struct kvm_vcpu *vcpu)
4204 +-{
4205 +- vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
4206 +- vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
4207 +-}
4208 +-
4209 +-#endif
4210 +-
4211 +-static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
4212 +-{
4213 +- int vpid = to_vmx(vcpu)->vpid;
4214 +-
4215 +- if (!vpid_sync_vcpu_addr(vpid, addr))
4216 +- vpid_sync_context(vpid);
4217 +-
4218 +- /*
4219 +- * If VPIDs are not supported or enabled, then the above is a no-op.
4220 +- * But we don't really need a TLB flush in that case anyway, because
4221 +- * each VM entry/exit includes an implicit flush when VPID is 0.
4222 +- */
4223 +-}
4224 +-
4225 +-static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
4226 +-{
4227 +- ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
4228 +-
4229 +- vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
4230 +- vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
4231 +-}
4232 +-
4233 +-static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
4234 +-{
4235 +- ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
4236 +-
4237 +- vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
4238 +- vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
4239 +-}
4240 +-
4241 +-static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
4242 +-{
4243 +- struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
4244 +-
4245 +- if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR))
4246 +- return;
4247 +-
4248 +- if (is_pae_paging(vcpu)) {
4249 +- vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
4250 +- vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
4251 +- vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
4252 +- vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
4253 +- }
4254 +-}
4255 +-
4256 +-void ept_save_pdptrs(struct kvm_vcpu *vcpu)
4257 +-{
4258 +- struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
4259 +-
4260 +- if (is_pae_paging(vcpu)) {
4261 +- mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
4262 +- mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
4263 +- mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
4264 +- mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
4265 +- }
4266 +-
4267 +- kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
4268 +-}
4269 +-
4270 +-static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
4271 +- unsigned long cr0,
4272 +- struct kvm_vcpu *vcpu)
4273 +-{
4274 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
4275 +-
4276 +- if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
4277 +- vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
4278 +- if (!(cr0 & X86_CR0_PG)) {
4279 +- /* From paging/starting to nonpaging */
4280 +- exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
4281 +- CPU_BASED_CR3_STORE_EXITING);
4282 +- vcpu->arch.cr0 = cr0;
4283 +- vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
4284 +- } else if (!is_paging(vcpu)) {
4285 +- /* From nonpaging to paging */
4286 +- exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
4287 +- CPU_BASED_CR3_STORE_EXITING);
4288 +- vcpu->arch.cr0 = cr0;
4289 +- vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
4290 +- }
4291 +-
4292 +- if (!(cr0 & X86_CR0_WP))
4293 +- *hw_cr0 &= ~X86_CR0_WP;
4294 +-}
4295 +-
4296 +-void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
4297 +-{
4298 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
4299 +- unsigned long hw_cr0;
4300 +-
4301 +- hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
4302 +- if (enable_unrestricted_guest)
4303 +- hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
4304 +- else {
4305 +- hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
4306 +-
4307 +- if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
4308 +- enter_pmode(vcpu);
4309 +-
4310 +- if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
4311 +- enter_rmode(vcpu);
4312 +- }
4313 +-
4314 +-#ifdef CONFIG_X86_64
4315 +- if (vcpu->arch.efer & EFER_LME) {
4316 +- if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
4317 +- enter_lmode(vcpu);
4318 +- if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
4319 +- exit_lmode(vcpu);
4320 +- }
4321 +-#endif
4322 +-
4323 +- if (enable_ept && !enable_unrestricted_guest)
4324 +- ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
4325 +-
4326 +- vmcs_writel(CR0_READ_SHADOW, cr0);
4327 +- vmcs_writel(GUEST_CR0, hw_cr0);
4328 +- vcpu->arch.cr0 = cr0;
4329 +-
4330 +- /* depends on vcpu->arch.cr0 to be set to a new value */
4331 +- vmx->emulation_required = emulation_required(vcpu);
4332 +-}
4333 +-
4334 +-static int get_ept_level(struct kvm_vcpu *vcpu)
4335 +-{
4336 +- if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
4337 +- return 5;
4338 +- return 4;
4339 +-}
4340 +-
4341 +-u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
4342 +-{
4343 +- u64 eptp = VMX_EPTP_MT_WB;
4344 +-
4345 +- eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
4346 +-
4347 +- if (enable_ept_ad_bits &&
4348 +- (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
4349 +- eptp |= VMX_EPTP_AD_ENABLE_BIT;
4350 +- eptp |= (root_hpa & PAGE_MASK);
4351 +-
4352 +- return eptp;
4353 +-}
4354 +-
4355 +-void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
4356 +-{
4357 +- struct kvm *kvm = vcpu->kvm;
4358 +- bool update_guest_cr3 = true;
4359 +- unsigned long guest_cr3;
4360 +- u64 eptp;
4361 +-
4362 +- guest_cr3 = cr3;
4363 +- if (enable_ept) {
4364 +- eptp = construct_eptp(vcpu, cr3);
4365 +- vmcs_write64(EPT_POINTER, eptp);
4366 +-
4367 +- if (kvm_x86_ops->tlb_remote_flush) {
4368 +- spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
4369 +- to_vmx(vcpu)->ept_pointer = eptp;
4370 +- to_kvm_vmx(kvm)->ept_pointers_match
4371 +- = EPT_POINTERS_CHECK;
4372 +- spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
4373 +- }
4374 +-
4375 +- /* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */
4376 +- if (is_guest_mode(vcpu))
4377 +- update_guest_cr3 = false;
4378 +- else if (!enable_unrestricted_guest && !is_paging(vcpu))
4379 +- guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
4380 +- else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
4381 +- guest_cr3 = vcpu->arch.cr3;
4382 +- else /* vmcs01.GUEST_CR3 is already up-to-date. */
4383 +- update_guest_cr3 = false;
4384 +- ept_load_pdptrs(vcpu);
4385 +- }
4386 +-
4387 +- if (update_guest_cr3)
4388 +- vmcs_writel(GUEST_CR3, guest_cr3);
4389 +-}
4390 +-
4391 +-int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
4392 +-{
4393 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
4394 +- /*
4395 +- * Pass through host's Machine Check Enable value to hw_cr4, which
4396 +- * is in force while we are in guest mode. Do not let guests control
4397 +- * this bit, even if host CR4.MCE == 0.
4398 +- */
4399 +- unsigned long hw_cr4;
4400 +-
4401 +- hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
4402 +- if (enable_unrestricted_guest)
4403 +- hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
4404 +- else if (vmx->rmode.vm86_active)
4405 +- hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
4406 +- else
4407 +- hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
4408 +-
4409 +- if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
4410 +- if (cr4 & X86_CR4_UMIP) {
4411 +- secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
4412 +- hw_cr4 &= ~X86_CR4_UMIP;
4413 +- } else if (!is_guest_mode(vcpu) ||
4414 +- !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) {
4415 +- secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC);
4416 +- }
4417 +- }
4418 +-
4419 +- if (cr4 & X86_CR4_VMXE) {
4420 +- /*
4421 +- * To use VMXON (and later other VMX instructions), a guest
4422 +- * must first be able to turn on cr4.VMXE (see handle_vmon()).
4423 +- * So basically the check on whether to allow nested VMX
4424 +- * is here. We operate under the default treatment of SMM,
4425 +- * so VMX cannot be enabled under SMM.
4426 +- */
4427 +- if (!nested_vmx_allowed(vcpu) || is_smm(vcpu))
4428 +- return 1;
4429 +- }
4430 +-
4431 +- if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
4432 +- return 1;
4433 +-
4434 +- vcpu->arch.cr4 = cr4;
4435 +-
4436 +- if (!enable_unrestricted_guest) {
4437 +- if (enable_ept) {
4438 +- if (!is_paging(vcpu)) {
4439 +- hw_cr4 &= ~X86_CR4_PAE;
4440 +- hw_cr4 |= X86_CR4_PSE;
4441 +- } else if (!(cr4 & X86_CR4_PAE)) {
4442 +- hw_cr4 &= ~X86_CR4_PAE;
4443 +- }
4444 +- }
4445 +-
4446 +- /*
4447 +- * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in
4448 +- * hardware. To emulate this behavior, SMEP/SMAP/PKU needs
4449 +- * to be manually disabled when guest switches to non-paging
4450 +- * mode.
4451 +- *
4452 +- * If !enable_unrestricted_guest, the CPU is always running
4453 +- * with CR0.PG=1 and CR4 needs to be modified.
4454 +- * If enable_unrestricted_guest, the CPU automatically
4455 +- * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0.
4456 +- */
4457 +- if (!is_paging(vcpu))
4458 +- hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
4459 +- }
4460 +-
4461 +- vmcs_writel(CR4_READ_SHADOW, cr4);
4462 +- vmcs_writel(GUEST_CR4, hw_cr4);
4463 +- return 0;
4464 +-}
4465 +-
4466 +-void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
4467 +-{
4468 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
4469 +- u32 ar;
4470 +-
4471 +- if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
4472 +- *var = vmx->rmode.segs[seg];
4473 +- if (seg == VCPU_SREG_TR
4474 +- || var->selector == vmx_read_guest_seg_selector(vmx, seg))
4475 +- return;
4476 +- var->base = vmx_read_guest_seg_base(vmx, seg);
4477 +- var->selector = vmx_read_guest_seg_selector(vmx, seg);
4478 +- return;
4479 +- }
4480 +- var->base = vmx_read_guest_seg_base(vmx, seg);
4481 +- var->limit = vmx_read_guest_seg_limit(vmx, seg);
4482 +- var->selector = vmx_read_guest_seg_selector(vmx, seg);
4483 +- ar = vmx_read_guest_seg_ar(vmx, seg);
4484 +- var->unusable = (ar >> 16) & 1;
4485 +- var->type = ar & 15;
4486 +- var->s = (ar >> 4) & 1;
4487 +- var->dpl = (ar >> 5) & 3;
4488 +- /*
4489 +- * Some userspaces do not preserve unusable property. Since usable
4490 +- * segment has to be present according to VMX spec we can use present
4491 +- * property to amend userspace bug by making unusable segment always
4492 +- * nonpresent. vmx_segment_access_rights() already marks nonpresent
4493 +- * segment as unusable.
4494 +- */
4495 +- var->present = !var->unusable;
4496 +- var->avl = (ar >> 12) & 1;
4497 +- var->l = (ar >> 13) & 1;
4498 +- var->db = (ar >> 14) & 1;
4499 +- var->g = (ar >> 15) & 1;
4500 +-}
4501 +-
4502 +-static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
4503 +-{
4504 +- struct kvm_segment s;
4505 +-
4506 +- if (to_vmx(vcpu)->rmode.vm86_active) {
4507 +- vmx_get_segment(vcpu, &s, seg);
4508 +- return s.base;
4509 +- }
4510 +- return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
4511 +-}
4512 +-
4513 +-int vmx_get_cpl(struct kvm_vcpu *vcpu)
4514 +-{
4515 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
4516 +-
4517 +- if (unlikely(vmx->rmode.vm86_active))
4518 +- return 0;
4519 +- else {
4520 +- int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
4521 +- return VMX_AR_DPL(ar);
4522 +- }
4523 +-}
4524 +-
4525 +-static u32 vmx_segment_access_rights(struct kvm_segment *var)
4526 +-{
4527 +- u32 ar;
4528 +-
4529 +- if (var->unusable || !var->present)
4530 +- ar = 1 << 16;
4531 +- else {
4532 +- ar = var->type & 15;
4533 +- ar |= (var->s & 1) << 4;
4534 +- ar |= (var->dpl & 3) << 5;
4535 +- ar |= (var->present & 1) << 7;
4536 +- ar |= (var->avl & 1) << 12;
4537 +- ar |= (var->l & 1) << 13;
4538 +- ar |= (var->db & 1) << 14;
4539 +- ar |= (var->g & 1) << 15;
4540 +- }
4541 +-
4542 +- return ar;
4543 +-}
4544 +-
4545 +-void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
4546 +-{
4547 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
4548 +- const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
4549 +-
4550 +- vmx_segment_cache_clear(vmx);
4551 +-
4552 +- if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
4553 +- vmx->rmode.segs[seg] = *var;
4554 +- if (seg == VCPU_SREG_TR)
4555 +- vmcs_write16(sf->selector, var->selector);
4556 +- else if (var->s)
4557 +- fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
4558 +- goto out;
4559 +- }
4560 +-
4561 +- vmcs_writel(sf->base, var->base);
4562 +- vmcs_write32(sf->limit, var->limit);
4563 +- vmcs_write16(sf->selector, var->selector);
4564 +-
4565 +- /*
4566 +- * Fix the "Accessed" bit in AR field of segment registers for older
4567 +- * qemu binaries.
4568 +- * IA32 arch specifies that at the time of processor reset the
4569 +- * "Accessed" bit in the AR field of segment registers is 1. And qemu
4570 +- * is setting it to 0 in the userland code. This causes invalid guest
4571 +- * state vmexit when "unrestricted guest" mode is turned on.
4572 +- * Fix for this setup issue in cpu_reset is being pushed in the qemu
4573 +- * tree. Newer qemu binaries with that qemu fix would not need this
4574 +- * kvm hack.
4575 +- */
4576 +- if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
4577 +- var->type |= 0x1; /* Accessed */
4578 +-
4579 +- vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
4580 +-
4581 +-out:
4582 +- vmx->emulation_required = emulation_required(vcpu);
4583 +-}
4584 +-
4585 +-static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
4586 +-{
4587 +- u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
4588 +-
4589 +- *db = (ar >> 14) & 1;
4590 +- *l = (ar >> 13) & 1;
4591 +-}
4592 +-
4593 +-static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
4594 +-{
4595 +- dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
4596 +- dt->address = vmcs_readl(GUEST_IDTR_BASE);
4597 +-}
4598 +-
4599 +-static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
4600 +-{
4601 +- vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
4602 +- vmcs_writel(GUEST_IDTR_BASE, dt->address);
4603 +-}
4604 +-
4605 +-static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
4606 +-{
4607 +- dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
4608 +- dt->address = vmcs_readl(GUEST_GDTR_BASE);
4609 +-}
4610 +-
4611 +-static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
4612 +-{
4613 +- vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
4614 +- vmcs_writel(GUEST_GDTR_BASE, dt->address);
4615 +-}
4616 +-
4617 +-static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
4618 +-{
4619 +- struct kvm_segment var;
4620 +- u32 ar;
4621 +-
4622 +- vmx_get_segment(vcpu, &var, seg);
4623 +- var.dpl = 0x3;
4624 +- if (seg == VCPU_SREG_CS)
4625 +- var.type = 0x3;
4626 +- ar = vmx_segment_access_rights(&var);
4627 +-
4628 +- if (var.base != (var.selector << 4))
4629 +- return false;
4630 +- if (var.limit != 0xffff)
4631 +- return false;
4632 +- if (ar != 0xf3)
4633 +- return false;
4634 +-
4635 +- return true;
4636 +-}
4637 +-
4638 +-static bool code_segment_valid(struct kvm_vcpu *vcpu)
4639 +-{
4640 +- struct kvm_segment cs;
4641 +- unsigned int cs_rpl;
4642 +-
4643 +- vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
4644 +- cs_rpl = cs.selector & SEGMENT_RPL_MASK;
4645 +-
4646 +- if (cs.unusable)
4647 +- return false;
4648 +- if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
4649 +- return false;
4650 +- if (!cs.s)
4651 +- return false;
4652 +- if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
4653 +- if (cs.dpl > cs_rpl)
4654 +- return false;
4655 +- } else {
4656 +- if (cs.dpl != cs_rpl)
4657 +- return false;
4658 +- }
4659 +- if (!cs.present)
4660 +- return false;
4661 +-
4662 +- /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */
4663 +- return true;
4664 +-}
4665 +-
4666 +-static bool stack_segment_valid(struct kvm_vcpu *vcpu)
4667 +-{
4668 +- struct kvm_segment ss;
4669 +- unsigned int ss_rpl;
4670 +-
4671 +- vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
4672 +- ss_rpl = ss.selector & SEGMENT_RPL_MASK;
4673 +-
4674 +- if (ss.unusable)
4675 +- return true;
4676 +- if (ss.type != 3 && ss.type != 7)
4677 +- return false;
4678 +- if (!ss.s)
4679 +- return false;
4680 +- if (ss.dpl != ss_rpl) /* DPL != RPL */
4681 +- return false;
4682 +- if (!ss.present)
4683 +- return false;
4684 +-
4685 +- return true;
4686 +-}
4687 +-
4688 +-static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
4689 +-{
4690 +- struct kvm_segment var;
4691 +- unsigned int rpl;
4692 +-
4693 +- vmx_get_segment(vcpu, &var, seg);
4694 +- rpl = var.selector & SEGMENT_RPL_MASK;
4695 +-
4696 +- if (var.unusable)
4697 +- return true;
4698 +- if (!var.s)
4699 +- return false;
4700 +- if (!var.present)
4701 +- return false;
4702 +- if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
4703 +- if (var.dpl < rpl) /* DPL < RPL */
4704 +- return false;
4705 +- }
4706 +-
4707 +- /* TODO: Add other members to kvm_segment_field to allow checking for other access
4708 +- * rights flags
4709 +- */
4710 +- return true;
4711 +-}
4712 +-
4713 +-static bool tr_valid(struct kvm_vcpu *vcpu)
4714 +-{
4715 +- struct kvm_segment tr;
4716 +-
4717 +- vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
4718 +-
4719 +- if (tr.unusable)
4720 +- return false;
4721 +- if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */
4722 +- return false;
4723 +- if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
4724 +- return false;
4725 +- if (!tr.present)
4726 +- return false;
4727 +-
4728 +- return true;
4729 +-}
4730 +-
4731 +-static bool ldtr_valid(struct kvm_vcpu *vcpu)
4732 +-{
4733 +- struct kvm_segment ldtr;
4734 +-
4735 +- vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
4736 +-
4737 +- if (ldtr.unusable)
4738 +- return true;
4739 +- if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */
4740 +- return false;
4741 +- if (ldtr.type != 2)
4742 +- return false;
4743 +- if (!ldtr.present)
4744 +- return false;
4745 +-
4746 +- return true;
4747 +-}
4748 +-
4749 +-static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
4750 +-{
4751 +- struct kvm_segment cs, ss;
4752 +-
4753 +- vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
4754 +- vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
4755 +-
4756 +- return ((cs.selector & SEGMENT_RPL_MASK) ==
4757 +- (ss.selector & SEGMENT_RPL_MASK));
4758 +-}
4759 +-
4760 +-/*
4761 +- * Check if guest state is valid. Returns true if valid, false if
4762 +- * not.
4763 +- * We assume that registers are always usable
4764 +- */
4765 +-static bool guest_state_valid(struct kvm_vcpu *vcpu)
4766 +-{
4767 +- if (enable_unrestricted_guest)
4768 +- return true;
4769 +-
4770 +- /* real mode guest state checks */
4771 +- if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
4772 +- if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
4773 +- return false;
4774 +- if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
4775 +- return false;
4776 +- if (!rmode_segment_valid(vcpu, VCPU_SREG_DS))
4777 +- return false;
4778 +- if (!rmode_segment_valid(vcpu, VCPU_SREG_ES))
4779 +- return false;
4780 +- if (!rmode_segment_valid(vcpu, VCPU_SREG_FS))
4781 +- return false;
4782 +- if (!rmode_segment_valid(vcpu, VCPU_SREG_GS))
4783 +- return false;
4784 +- } else {
4785 +- /* protected mode guest state checks */
4786 +- if (!cs_ss_rpl_check(vcpu))
4787 +- return false;
4788 +- if (!code_segment_valid(vcpu))
4789 +- return false;
4790 +- if (!stack_segment_valid(vcpu))
4791 +- return false;
4792 +- if (!data_segment_valid(vcpu, VCPU_SREG_DS))
4793 +- return false;
4794 +- if (!data_segment_valid(vcpu, VCPU_SREG_ES))
4795 +- return false;
4796 +- if (!data_segment_valid(vcpu, VCPU_SREG_FS))
4797 +- return false;
4798 +- if (!data_segment_valid(vcpu, VCPU_SREG_GS))
4799 +- return false;
4800 +- if (!tr_valid(vcpu))
4801 +- return false;
4802 +- if (!ldtr_valid(vcpu))
4803 +- return false;
4804 +- }
4805 +- /* TODO:
4806 +- * - Add checks on RIP
4807 +- * - Add checks on RFLAGS
4808 +- */
4809 +-
4810 +- return true;
4811 +-}
4812 +-
4813 +-static int init_rmode_tss(struct kvm *kvm)
4814 +-{
4815 +- gfn_t fn;
4816 +- u16 data = 0;
4817 +- int idx, r;
4818 +-
4819 +- idx = srcu_read_lock(&kvm->srcu);
4820 +- fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
4821 +- r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
4822 +- if (r < 0)
4823 +- goto out;
4824 +- data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
4825 +- r = kvm_write_guest_page(kvm, fn++, &data,
4826 +- TSS_IOPB_BASE_OFFSET, sizeof(u16));
4827 +- if (r < 0)
4828 +- goto out;
4829 +- r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
4830 +- if (r < 0)
4831 +- goto out;
4832 +- r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
4833 +- if (r < 0)
4834 +- goto out;
4835 +- data = ~0;
4836 +- r = kvm_write_guest_page(kvm, fn, &data,
4837 +- RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
4838 +- sizeof(u8));
4839 +-out:
4840 +- srcu_read_unlock(&kvm->srcu, idx);
4841 +- return r;
4842 +-}
4843 +-
4844 +-static int init_rmode_identity_map(struct kvm *kvm)
4845 +-{
4846 +- struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
4847 +- int i, idx, r = 0;
4848 +- kvm_pfn_t identity_map_pfn;
4849 +- u32 tmp;
4850 +-
4851 +- /* Protect kvm_vmx->ept_identity_pagetable_done. */
4852 +- mutex_lock(&kvm->slots_lock);
4853 +-
4854 +- if (likely(kvm_vmx->ept_identity_pagetable_done))
4855 +- goto out2;
4856 +-
4857 +- if (!kvm_vmx->ept_identity_map_addr)
4858 +- kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
4859 +- identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
4860 +-
4861 +- r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
4862 +- kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
4863 +- if (r < 0)
4864 +- goto out2;
4865 +-
4866 +- idx = srcu_read_lock(&kvm->srcu);
4867 +- r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
4868 +- if (r < 0)
4869 +- goto out;
4870 +- /* Set up identity-mapping pagetable for EPT in real mode */
4871 +- for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
4872 +- tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
4873 +- _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
4874 +- r = kvm_write_guest_page(kvm, identity_map_pfn,
4875 +- &tmp, i * sizeof(tmp), sizeof(tmp));
4876 +- if (r < 0)
4877 +- goto out;
4878 +- }
4879 +- kvm_vmx->ept_identity_pagetable_done = true;
4880 +-
4881 +-out:
4882 +- srcu_read_unlock(&kvm->srcu, idx);
4883 +-
4884 +-out2:
4885 +- mutex_unlock(&kvm->slots_lock);
4886 +- return r;
4887 +-}
4888 +-
4889 +-static void seg_setup(int seg)
4890 +-{
4891 +- const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
4892 +- unsigned int ar;
4893 +-
4894 +- vmcs_write16(sf->selector, 0);
4895 +- vmcs_writel(sf->base, 0);
4896 +- vmcs_write32(sf->limit, 0xffff);
4897 +- ar = 0x93;
4898 +- if (seg == VCPU_SREG_CS)
4899 +- ar |= 0x08; /* code segment */
4900 +-
4901 +- vmcs_write32(sf->ar_bytes, ar);
4902 +-}
4903 +-
4904 +-static int alloc_apic_access_page(struct kvm *kvm)
4905 +-{
4906 +- struct page *page;
4907 +- int r = 0;
4908 +-
4909 +- mutex_lock(&kvm->slots_lock);
4910 +- if (kvm->arch.apic_access_page_done)
4911 +- goto out;
4912 +- r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
4913 +- APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
4914 +- if (r)
4915 +- goto out;
4916 +-
4917 +- page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
4918 +- if (is_error_page(page)) {
4919 +- r = -EFAULT;
4920 +- goto out;
4921 +- }
4922 +-
4923 +- /*
4924 +- * Do not pin the page in memory, so that memory hot-unplug
4925 +- * is able to migrate it.
4926 +- */
4927 +- put_page(page);
4928 +- kvm->arch.apic_access_page_done = true;
4929 +-out:
4930 +- mutex_unlock(&kvm->slots_lock);
4931 +- return r;
4932 +-}
4933 +-
4934 +-int allocate_vpid(void)
4935 +-{
4936 +- int vpid;
4937 +-
4938 +- if (!enable_vpid)
4939 +- return 0;
4940 +- spin_lock(&vmx_vpid_lock);
4941 +- vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
4942 +- if (vpid < VMX_NR_VPIDS)
4943 +- __set_bit(vpid, vmx_vpid_bitmap);
4944 +- else
4945 +- vpid = 0;
4946 +- spin_unlock(&vmx_vpid_lock);
4947 +- return vpid;
4948 +-}
4949 +-
4950 +-void free_vpid(int vpid)
4951 +-{
4952 +- if (!enable_vpid || vpid == 0)
4953 +- return;
4954 +- spin_lock(&vmx_vpid_lock);
4955 +- __clear_bit(vpid, vmx_vpid_bitmap);
4956 +- spin_unlock(&vmx_vpid_lock);
4957 +-}
4958 +-
4959 +-static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
4960 +- u32 msr, int type)
4961 +-{
4962 +- int f = sizeof(unsigned long);
4963 +-
4964 +- if (!cpu_has_vmx_msr_bitmap())
4965 +- return;
4966 +-
4967 +- if (static_branch_unlikely(&enable_evmcs))
4968 +- evmcs_touch_msr_bitmap();
4969 +-
4970 +- /*
4971 +- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
4972 +- * have the write-low and read-high bitmap offsets the wrong way round.
4973 +- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
4974 +- */
4975 +- if (msr <= 0x1fff) {
4976 +- if (type & MSR_TYPE_R)
4977 +- /* read-low */
4978 +- __clear_bit(msr, msr_bitmap + 0x000 / f);
4979 +-
4980 +- if (type & MSR_TYPE_W)
4981 +- /* write-low */
4982 +- __clear_bit(msr, msr_bitmap + 0x800 / f);
4983 +-
4984 +- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
4985 +- msr &= 0x1fff;
4986 +- if (type & MSR_TYPE_R)
4987 +- /* read-high */
4988 +- __clear_bit(msr, msr_bitmap + 0x400 / f);
4989 +-
4990 +- if (type & MSR_TYPE_W)
4991 +- /* write-high */
4992 +- __clear_bit(msr, msr_bitmap + 0xc00 / f);
4993 +-
4994 +- }
4995 +-}
4996 +-
4997 +-static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
4998 +- u32 msr, int type)
4999 +-{
5000 +- int f = sizeof(unsigned long);
5001 +-
5002 +- if (!cpu_has_vmx_msr_bitmap())
5003 +- return;
5004 +-
5005 +- if (static_branch_unlikely(&enable_evmcs))
5006 +- evmcs_touch_msr_bitmap();
5007 +-
5008 +- /*
5009 +- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
5010 +- * have the write-low and read-high bitmap offsets the wrong way round.
5011 +- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
5012 +- */
5013 +- if (msr <= 0x1fff) {
5014 +- if (type & MSR_TYPE_R)
5015 +- /* read-low */
5016 +- __set_bit(msr, msr_bitmap + 0x000 / f);
5017 +-
5018 +- if (type & MSR_TYPE_W)
5019 +- /* write-low */
5020 +- __set_bit(msr, msr_bitmap + 0x800 / f);
5021 +-
5022 +- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
5023 +- msr &= 0x1fff;
5024 +- if (type & MSR_TYPE_R)
5025 +- /* read-high */
5026 +- __set_bit(msr, msr_bitmap + 0x400 / f);
5027 +-
5028 +- if (type & MSR_TYPE_W)
5029 +- /* write-high */
5030 +- __set_bit(msr, msr_bitmap + 0xc00 / f);
5031 +-
5032 +- }
5033 +-}
5034 +-
5035 +-static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
5036 +- u32 msr, int type, bool value)
5037 +-{
5038 +- if (value)
5039 +- vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
5040 +- else
5041 +- vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
5042 +-}
5043 +-
5044 +-static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
5045 +-{
5046 +- u8 mode = 0;
5047 +-
5048 +- if (cpu_has_secondary_exec_ctrls() &&
5049 +- (secondary_exec_controls_get(to_vmx(vcpu)) &
5050 +- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
5051 +- mode |= MSR_BITMAP_MODE_X2APIC;
5052 +- if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
5053 +- mode |= MSR_BITMAP_MODE_X2APIC_APICV;
5054 +- }
5055 +-
5056 +- return mode;
5057 +-}
5058 +-
5059 +-static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
5060 +- u8 mode)
5061 +-{
5062 +- int msr;
5063 +-
5064 +- for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
5065 +- unsigned word = msr / BITS_PER_LONG;
5066 +- msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
5067 +- msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
5068 +- }
5069 +-
5070 +- if (mode & MSR_BITMAP_MODE_X2APIC) {
5071 +- /*
5072 +- * TPR reads and writes can be virtualized even if virtual interrupt
5073 +- * delivery is not in use.
5074 +- */
5075 +- vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
5076 +- if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
5077 +- vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
5078 +- vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
5079 +- vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
5080 +- }
5081 +- }
5082 +-}
5083 +-
5084 +-void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
5085 +-{
5086 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
5087 +- unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
5088 +- u8 mode = vmx_msr_bitmap_mode(vcpu);
5089 +- u8 changed = mode ^ vmx->msr_bitmap_mode;
5090 +-
5091 +- if (!changed)
5092 +- return;
5093 +-
5094 +- if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
5095 +- vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
5096 +-
5097 +- vmx->msr_bitmap_mode = mode;
5098 +-}
5099 +-
5100 +-void pt_update_intercept_for_msr(struct vcpu_vmx *vmx)
5101 +-{
5102 +- unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
5103 +- bool flag = !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
5104 +- u32 i;
5105 +-
5106 +- vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_STATUS,
5107 +- MSR_TYPE_RW, flag);
5108 +- vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_BASE,
5109 +- MSR_TYPE_RW, flag);
5110 +- vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_MASK,
5111 +- MSR_TYPE_RW, flag);
5112 +- vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_CR3_MATCH,
5113 +- MSR_TYPE_RW, flag);
5114 +- for (i = 0; i < vmx->pt_desc.addr_range; i++) {
5115 +- vmx_set_intercept_for_msr(msr_bitmap,
5116 +- MSR_IA32_RTIT_ADDR0_A + i * 2, MSR_TYPE_RW, flag);
5117 +- vmx_set_intercept_for_msr(msr_bitmap,
5118 +- MSR_IA32_RTIT_ADDR0_B + i * 2, MSR_TYPE_RW, flag);
5119 +- }
5120 +-}
5121 +-
5122 +-static bool vmx_get_enable_apicv(struct kvm *kvm)
5123 +-{
5124 +- return enable_apicv;
5125 +-}
5126 +-
5127 +-static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
5128 +-{
5129 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
5130 +- void *vapic_page;
5131 +- u32 vppr;
5132 +- int rvi;
5133 +-
5134 +- if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
5135 +- !nested_cpu_has_vid(get_vmcs12(vcpu)) ||
5136 +- WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn))
5137 +- return false;
5138 +-
5139 +- rvi = vmx_get_rvi();
5140 +-
5141 +- vapic_page = vmx->nested.virtual_apic_map.hva;
5142 +- vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
5143 +-
5144 +- return ((rvi & 0xf0) > (vppr & 0xf0));
5145 +-}
5146 +-
5147 +-static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
5148 +- bool nested)
5149 +-{
5150 +-#ifdef CONFIG_SMP
5151 +- int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
5152 +-
5153 +- if (vcpu->mode == IN_GUEST_MODE) {
5154 +- /*
5155 +- * The vector of interrupt to be delivered to vcpu had
5156 +- * been set in PIR before this function.
5157 +- *
5158 +- * Following cases will be reached in this block, and
5159 +- * we always send a notification event in all cases as
5160 +- * explained below.
5161 +- *
5162 +- * Case 1: vcpu keeps in non-root mode. Sending a
5163 +- * notification event posts the interrupt to vcpu.
5164 +- *
5165 +- * Case 2: vcpu exits to root mode and is still
5166 +- * runnable. PIR will be synced to vIRR before the
5167 +- * next vcpu entry. Sending a notification event in
5168 +- * this case has no effect, as vcpu is not in root
5169 +- * mode.
5170 +- *
5171 +- * Case 3: vcpu exits to root mode and is blocked.
5172 +- * vcpu_block() has already synced PIR to vIRR and
5173 +- * never blocks vcpu if vIRR is not cleared. Therefore,
5174 +- * a blocked vcpu here does not wait for any requested
5175 +- * interrupts in PIR, and sending a notification event
5176 +- * which has no effect is safe here.
5177 +- */
5178 +-
5179 +- apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
5180 +- return true;
5181 +- }
5182 +-#endif
5183 +- return false;
5184 +-}
5185 +-
5186 +-static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
5187 +- int vector)
5188 +-{
5189 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
5190 +-
5191 +- if (is_guest_mode(vcpu) &&
5192 +- vector == vmx->nested.posted_intr_nv) {
5193 +- /*
5194 +- * If a posted intr is not recognized by hardware,
5195 +- * we will accomplish it in the next vmentry.
5196 +- */
5197 +- vmx->nested.pi_pending = true;
5198 +- kvm_make_request(KVM_REQ_EVENT, vcpu);
5199 +- /* the PIR and ON have been set by L1. */
5200 +- if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
5201 +- kvm_vcpu_kick(vcpu);
5202 +- return 0;
5203 +- }
5204 +- return -1;
5205 +-}
5206 +-/*
5207 +- * Send interrupt to vcpu via posted interrupt way.
5208 +- * 1. If target vcpu is running(non-root mode), send posted interrupt
5209 +- * notification to vcpu and hardware will sync PIR to vIRR atomically.
5210 +- * 2. If target vcpu isn't running(root mode), kick it to pick up the
5211 +- * interrupt from PIR in next vmentry.
5212 +- */
5213 +-static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
5214 +-{
5215 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
5216 +- int r;
5217 +-
5218 +- r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
5219 +- if (!r)
5220 +- return;
5221 +-
5222 +- if (pi_test_and_set_pir(vector, &vmx->pi_desc))
5223 +- return;
5224 +-
5225 +- /* If a previous notification has sent the IPI, nothing to do. */
5226 +- if (pi_test_and_set_on(&vmx->pi_desc))
5227 +- return;
5228 +-
5229 +- if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
5230 +- kvm_vcpu_kick(vcpu);
5231 +-}
5232 +-
5233 +-/*
5234 +- * Set up the vmcs's constant host-state fields, i.e., host-state fields that
5235 +- * will not change in the lifetime of the guest.
5236 +- * Note that host-state that does change is set elsewhere. E.g., host-state
5237 +- * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
5238 +- */
5239 +-void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
5240 +-{
5241 +- u32 low32, high32;
5242 +- unsigned long tmpl;
5243 +- unsigned long cr0, cr3, cr4;
5244 +-
5245 +- cr0 = read_cr0();
5246 +- WARN_ON(cr0 & X86_CR0_TS);
5247 +- vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */
5248 +-
5249 +- /*
5250 +- * Save the most likely value for this task's CR3 in the VMCS.
5251 +- * We can't use __get_current_cr3_fast() because we're not atomic.
5252 +- */
5253 +- cr3 = __read_cr3();
5254 +- vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
5255 +- vmx->loaded_vmcs->host_state.cr3 = cr3;
5256 +-
5257 +- /* Save the most likely value for this task's CR4 in the VMCS. */
5258 +- cr4 = cr4_read_shadow();
5259 +- vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
5260 +- vmx->loaded_vmcs->host_state.cr4 = cr4;
5261 +-
5262 +- vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
5263 +-#ifdef CONFIG_X86_64
5264 +- /*
5265 +- * Load null selectors, so we can avoid reloading them in
5266 +- * vmx_prepare_switch_to_host(), in case userspace uses
5267 +- * the null selectors too (the expected case).
5268 +- */
5269 +- vmcs_write16(HOST_DS_SELECTOR, 0);
5270 +- vmcs_write16(HOST_ES_SELECTOR, 0);
5271 +-#else
5272 +- vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
5273 +- vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
5274 +-#endif
5275 +- vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
5276 +- vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
5277 +-
5278 +- vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */
5279 +-
5280 +- vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */
5281 +-
5282 +- rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
5283 +- vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
5284 +- rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
5285 +- vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */
5286 +-
5287 +- if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
5288 +- rdmsr(MSR_IA32_CR_PAT, low32, high32);
5289 +- vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32));
5290 +- }
5291 +-
5292 +- if (cpu_has_load_ia32_efer())
5293 +- vmcs_write64(HOST_IA32_EFER, host_efer);
5294 +-}
5295 +-
5296 +-void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
5297 +-{
5298 +- vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
5299 +- if (enable_ept)
5300 +- vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
5301 +- if (is_guest_mode(&vmx->vcpu))
5302 +- vmx->vcpu.arch.cr4_guest_owned_bits &=
5303 +- ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask;
5304 +- vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
5305 +-}
5306 +-
5307 +-u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
5308 +-{
5309 +- u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
5310 +-
5311 +- if (!kvm_vcpu_apicv_active(&vmx->vcpu))
5312 +- pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
5313 +-
5314 +- if (!enable_vnmi)
5315 +- pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
5316 +-
5317 +- if (!enable_preemption_timer)
5318 +- pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
5319 +-
5320 +- return pin_based_exec_ctrl;
5321 +-}
5322 +-
5323 +-static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
5324 +-{
5325 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
5326 +-
5327 +- pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
5328 +- if (cpu_has_secondary_exec_ctrls()) {
5329 +- if (kvm_vcpu_apicv_active(vcpu))
5330 +- secondary_exec_controls_setbit(vmx,
5331 +- SECONDARY_EXEC_APIC_REGISTER_VIRT |
5332 +- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
5333 +- else
5334 +- secondary_exec_controls_clearbit(vmx,
5335 +- SECONDARY_EXEC_APIC_REGISTER_VIRT |
5336 +- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
5337 +- }
5338 +-
5339 +- if (cpu_has_vmx_msr_bitmap())
5340 +- vmx_update_msr_bitmap(vcpu);
5341 +-}
5342 +-
5343 +-u32 vmx_exec_control(struct vcpu_vmx *vmx)
5344 +-{
5345 +- u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
5346 +-
5347 +- if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
5348 +- exec_control &= ~CPU_BASED_MOV_DR_EXITING;
5349 +-
5350 +- if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
5351 +- exec_control &= ~CPU_BASED_TPR_SHADOW;
5352 +-#ifdef CONFIG_X86_64
5353 +- exec_control |= CPU_BASED_CR8_STORE_EXITING |
5354 +- CPU_BASED_CR8_LOAD_EXITING;
5355 +-#endif
5356 +- }
5357 +- if (!enable_ept)
5358 +- exec_control |= CPU_BASED_CR3_STORE_EXITING |
5359 +- CPU_BASED_CR3_LOAD_EXITING |
5360 +- CPU_BASED_INVLPG_EXITING;
5361 +- if (kvm_mwait_in_guest(vmx->vcpu.kvm))
5362 +- exec_control &= ~(CPU_BASED_MWAIT_EXITING |
5363 +- CPU_BASED_MONITOR_EXITING);
5364 +- if (kvm_hlt_in_guest(vmx->vcpu.kvm))
5365 +- exec_control &= ~CPU_BASED_HLT_EXITING;
5366 +- return exec_control;
5367 +-}
5368 +-
5369 +-
5370 +-static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
5371 +-{
5372 +- struct kvm_vcpu *vcpu = &vmx->vcpu;
5373 +-
5374 +- u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
5375 +-
5376 +- if (pt_mode == PT_MODE_SYSTEM)
5377 +- exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX);
5378 +- if (!cpu_need_virtualize_apic_accesses(vcpu))
5379 +- exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
5380 +- if (vmx->vpid == 0)
5381 +- exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
5382 +- if (!enable_ept) {
5383 +- exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
5384 +- enable_unrestricted_guest = 0;
5385 +- }
5386 +- if (!enable_unrestricted_guest)
5387 +- exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
5388 +- if (kvm_pause_in_guest(vmx->vcpu.kvm))
5389 +- exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
5390 +- if (!kvm_vcpu_apicv_active(vcpu))
5391 +- exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
5392 +- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
5393 +- exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
5394 +-
5395 +- /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP,
5396 +- * in vmx_set_cr4. */
5397 +- exec_control &= ~SECONDARY_EXEC_DESC;
5398 +-
5399 +- /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
5400 +- (handle_vmptrld).
5401 +- We can NOT enable shadow_vmcs here because we don't have yet
5402 +- a current VMCS12
5403 +- */
5404 +- exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
5405 +-
5406 +- if (!enable_pml)
5407 +- exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
5408 +-
5409 +- if (vmx_xsaves_supported()) {
5410 +- /* Exposing XSAVES only when XSAVE is exposed */
5411 +- bool xsaves_enabled =
5412 +- guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
5413 +- guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
5414 +-
5415 +- vcpu->arch.xsaves_enabled = xsaves_enabled;
5416 +-
5417 +- if (!xsaves_enabled)
5418 +- exec_control &= ~SECONDARY_EXEC_XSAVES;
5419 +-
5420 +- if (nested) {
5421 +- if (xsaves_enabled)
5422 +- vmx->nested.msrs.secondary_ctls_high |=
5423 +- SECONDARY_EXEC_XSAVES;
5424 +- else
5425 +- vmx->nested.msrs.secondary_ctls_high &=
5426 +- ~SECONDARY_EXEC_XSAVES;
5427 +- }
5428 +- }
5429 +-
5430 +- if (vmx_rdtscp_supported()) {
5431 +- bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP);
5432 +- if (!rdtscp_enabled)
5433 +- exec_control &= ~SECONDARY_EXEC_RDTSCP;
5434 +-
5435 +- if (nested) {
5436 +- if (rdtscp_enabled)
5437 +- vmx->nested.msrs.secondary_ctls_high |=
5438 +- SECONDARY_EXEC_RDTSCP;
5439 +- else
5440 +- vmx->nested.msrs.secondary_ctls_high &=
5441 +- ~SECONDARY_EXEC_RDTSCP;
5442 +- }
5443 +- }
5444 +-
5445 +- if (vmx_invpcid_supported()) {
5446 +- /* Exposing INVPCID only when PCID is exposed */
5447 +- bool invpcid_enabled =
5448 +- guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) &&
5449 +- guest_cpuid_has(vcpu, X86_FEATURE_PCID);
5450 +-
5451 +- if (!invpcid_enabled) {
5452 +- exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
5453 +- guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
5454 +- }
5455 +-
5456 +- if (nested) {
5457 +- if (invpcid_enabled)
5458 +- vmx->nested.msrs.secondary_ctls_high |=
5459 +- SECONDARY_EXEC_ENABLE_INVPCID;
5460 +- else
5461 +- vmx->nested.msrs.secondary_ctls_high &=
5462 +- ~SECONDARY_EXEC_ENABLE_INVPCID;
5463 +- }
5464 +- }
5465 +-
5466 +- if (vmx_rdrand_supported()) {
5467 +- bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
5468 +- if (rdrand_enabled)
5469 +- exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING;
5470 +-
5471 +- if (nested) {
5472 +- if (rdrand_enabled)
5473 +- vmx->nested.msrs.secondary_ctls_high |=
5474 +- SECONDARY_EXEC_RDRAND_EXITING;
5475 +- else
5476 +- vmx->nested.msrs.secondary_ctls_high &=
5477 +- ~SECONDARY_EXEC_RDRAND_EXITING;
5478 +- }
5479 +- }
5480 +-
5481 +- if (vmx_rdseed_supported()) {
5482 +- bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
5483 +- if (rdseed_enabled)
5484 +- exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING;
5485 +-
5486 +- if (nested) {
5487 +- if (rdseed_enabled)
5488 +- vmx->nested.msrs.secondary_ctls_high |=
5489 +- SECONDARY_EXEC_RDSEED_EXITING;
5490 +- else
5491 +- vmx->nested.msrs.secondary_ctls_high &=
5492 +- ~SECONDARY_EXEC_RDSEED_EXITING;
5493 +- }
5494 +- }
5495 +-
5496 +- if (vmx_waitpkg_supported()) {
5497 +- bool waitpkg_enabled =
5498 +- guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG);
5499 +-
5500 +- if (!waitpkg_enabled)
5501 +- exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
5502 +-
5503 +- if (nested) {
5504 +- if (waitpkg_enabled)
5505 +- vmx->nested.msrs.secondary_ctls_high |=
5506 +- SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
5507 +- else
5508 +- vmx->nested.msrs.secondary_ctls_high &=
5509 +- ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
5510 +- }
5511 +- }
5512 +-
5513 +- vmx->secondary_exec_control = exec_control;
5514 +-}
5515 +-
5516 +-static void ept_set_mmio_spte_mask(void)
5517 +-{
5518 +- /*
5519 +- * EPT Misconfigurations can be generated if the value of bits 2:0
5520 +- * of an EPT paging-structure entry is 110b (write/execute).
5521 +- */
5522 +- kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
5523 +- VMX_EPT_MISCONFIG_WX_VALUE, 0);
5524 +-}
5525 +-
5526 +-#define VMX_XSS_EXIT_BITMAP 0
5527 +-
5528 +-/*
5529 +- * Noting that the initialization of Guest-state Area of VMCS is in
5530 +- * vmx_vcpu_reset().
5531 +- */
5532 +-static void init_vmcs(struct vcpu_vmx *vmx)
5533 +-{
5534 +- if (nested)
5535 +- nested_vmx_set_vmcs_shadowing_bitmap();
5536 +-
5537 +- if (cpu_has_vmx_msr_bitmap())
5538 +- vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
5539 +-
5540 +- vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
5541 +-
5542 +- /* Control */
5543 +- pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
5544 +-
5545 +- exec_controls_set(vmx, vmx_exec_control(vmx));
5546 +-
5547 +- if (cpu_has_secondary_exec_ctrls()) {
5548 +- vmx_compute_secondary_exec_control(vmx);
5549 +- secondary_exec_controls_set(vmx, vmx->secondary_exec_control);
5550 +- }
5551 +-
5552 +- if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
5553 +- vmcs_write64(EOI_EXIT_BITMAP0, 0);
5554 +- vmcs_write64(EOI_EXIT_BITMAP1, 0);
5555 +- vmcs_write64(EOI_EXIT_BITMAP2, 0);
5556 +- vmcs_write64(EOI_EXIT_BITMAP3, 0);
5557 +-
5558 +- vmcs_write16(GUEST_INTR_STATUS, 0);
5559 +-
5560 +- vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
5561 +- vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
5562 +- }
5563 +-
5564 +- if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
5565 +- vmcs_write32(PLE_GAP, ple_gap);
5566 +- vmx->ple_window = ple_window;
5567 +- vmx->ple_window_dirty = true;
5568 +- }
5569 +-
5570 +- vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
5571 +- vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
5572 +- vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
5573 +-
5574 +- vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
5575 +- vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
5576 +- vmx_set_constant_host_state(vmx);
5577 +- vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
5578 +- vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
5579 +-
5580 +- if (cpu_has_vmx_vmfunc())
5581 +- vmcs_write64(VM_FUNCTION_CONTROL, 0);
5582 +-
5583 +- vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
5584 +- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
5585 +- vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
5586 +- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
5587 +- vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
5588 +-
5589 +- if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
5590 +- vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
5591 +-
5592 +- vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
5593 +-
5594 +- /* 22.2.1, 20.8.1 */
5595 +- vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
5596 +-
5597 +- vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
5598 +- vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
5599 +-
5600 +- set_cr4_guest_host_mask(vmx);
5601 +-
5602 +- if (vmx->vpid != 0)
5603 +- vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
5604 +-
5605 +- if (vmx_xsaves_supported())
5606 +- vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
5607 +-
5608 +- if (enable_pml) {
5609 +- vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
5610 +- vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
5611 +- }
5612 +-
5613 +- if (cpu_has_vmx_encls_vmexit())
5614 +- vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
5615 +-
5616 +- if (pt_mode == PT_MODE_HOST_GUEST) {
5617 +- memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc));
5618 +- /* Bit[6~0] are forced to 1, writes are ignored. */
5619 +- vmx->pt_desc.guest.output_mask = 0x7F;
5620 +- vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
5621 +- }
5622 +-}
5623 +-
5624 +-static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
5625 +-{
5626 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
5627 +- struct msr_data apic_base_msr;
5628 +- u64 cr0;
5629 +-
5630 +- vmx->rmode.vm86_active = 0;
5631 +- vmx->spec_ctrl = 0;
5632 +-
5633 +- vmx->msr_ia32_umwait_control = 0;
5634 +-
5635 +- vcpu->arch.microcode_version = 0x100000000ULL;
5636 +- vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
5637 +- vmx->hv_deadline_tsc = -1;
5638 +- kvm_set_cr8(vcpu, 0);
5639 +-
5640 +- if (!init_event) {
5641 +- apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
5642 +- MSR_IA32_APICBASE_ENABLE;
5643 +- if (kvm_vcpu_is_reset_bsp(vcpu))
5644 +- apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
5645 +- apic_base_msr.host_initiated = true;
5646 +- kvm_set_apic_base(vcpu, &apic_base_msr);
5647 +- }
5648 +-
5649 +- vmx_segment_cache_clear(vmx);
5650 +-
5651 +- seg_setup(VCPU_SREG_CS);
5652 +- vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
5653 +- vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
5654 +-
5655 +- seg_setup(VCPU_SREG_DS);
5656 +- seg_setup(VCPU_SREG_ES);
5657 +- seg_setup(VCPU_SREG_FS);
5658 +- seg_setup(VCPU_SREG_GS);
5659 +- seg_setup(VCPU_SREG_SS);
5660 +-
5661 +- vmcs_write16(GUEST_TR_SELECTOR, 0);
5662 +- vmcs_writel(GUEST_TR_BASE, 0);
5663 +- vmcs_write32(GUEST_TR_LIMIT, 0xffff);
5664 +- vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
5665 +-
5666 +- vmcs_write16(GUEST_LDTR_SELECTOR, 0);
5667 +- vmcs_writel(GUEST_LDTR_BASE, 0);
5668 +- vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
5669 +- vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
5670 +-
5671 +- if (!init_event) {
5672 +- vmcs_write32(GUEST_SYSENTER_CS, 0);
5673 +- vmcs_writel(GUEST_SYSENTER_ESP, 0);
5674 +- vmcs_writel(GUEST_SYSENTER_EIP, 0);
5675 +- vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
5676 +- }
5677 +-
5678 +- kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
5679 +- kvm_rip_write(vcpu, 0xfff0);
5680 +-
5681 +- vmcs_writel(GUEST_GDTR_BASE, 0);
5682 +- vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
5683 +-
5684 +- vmcs_writel(GUEST_IDTR_BASE, 0);
5685 +- vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
5686 +-
5687 +- vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
5688 +- vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
5689 +- vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
5690 +- if (kvm_mpx_supported())
5691 +- vmcs_write64(GUEST_BNDCFGS, 0);
5692 +-
5693 +- setup_msrs(vmx);
5694 +-
5695 +- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
5696 +-
5697 +- if (cpu_has_vmx_tpr_shadow() && !init_event) {
5698 +- vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
5699 +- if (cpu_need_tpr_shadow(vcpu))
5700 +- vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
5701 +- __pa(vcpu->arch.apic->regs));
5702 +- vmcs_write32(TPR_THRESHOLD, 0);
5703 +- }
5704 +-
5705 +- kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
5706 +-
5707 +- cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
5708 +- vmx->vcpu.arch.cr0 = cr0;
5709 +- vmx_set_cr0(vcpu, cr0); /* enter rmode */
5710 +- vmx_set_cr4(vcpu, 0);
5711 +- vmx_set_efer(vcpu, 0);
5712 +-
5713 +- update_exception_bitmap(vcpu);
5714 +-
5715 +- vpid_sync_context(vmx->vpid);
5716 +- if (init_event)
5717 +- vmx_clear_hlt(vcpu);
5718 +-}
5719 +-
5720 +-static void enable_irq_window(struct kvm_vcpu *vcpu)
5721 +-{
5722 +- exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING);
5723 +-}
5724 +-
5725 +-static void enable_nmi_window(struct kvm_vcpu *vcpu)
5726 +-{
5727 +- if (!enable_vnmi ||
5728 +- vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
5729 +- enable_irq_window(vcpu);
5730 +- return;
5731 +- }
5732 +-
5733 +- exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
5734 +-}
5735 +-
5736 +-static void vmx_inject_irq(struct kvm_vcpu *vcpu)
5737 +-{
5738 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
5739 +- uint32_t intr;
5740 +- int irq = vcpu->arch.interrupt.nr;
5741 +-
5742 +- trace_kvm_inj_virq(irq);
5743 +-
5744 +- ++vcpu->stat.irq_injections;
5745 +- if (vmx->rmode.vm86_active) {
5746 +- int inc_eip = 0;
5747 +- if (vcpu->arch.interrupt.soft)
5748 +- inc_eip = vcpu->arch.event_exit_inst_len;
5749 +- kvm_inject_realmode_interrupt(vcpu, irq, inc_eip);
5750 +- return;
5751 +- }
5752 +- intr = irq | INTR_INFO_VALID_MASK;
5753 +- if (vcpu->arch.interrupt.soft) {
5754 +- intr |= INTR_TYPE_SOFT_INTR;
5755 +- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
5756 +- vmx->vcpu.arch.event_exit_inst_len);
5757 +- } else
5758 +- intr |= INTR_TYPE_EXT_INTR;
5759 +- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
5760 +-
5761 +- vmx_clear_hlt(vcpu);
5762 +-}
5763 +-
5764 +-static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
5765 +-{
5766 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
5767 +-
5768 +- if (!enable_vnmi) {
5769 +- /*
5770 +- * Tracking the NMI-blocked state in software is built upon
5771 +- * finding the next open IRQ window. This, in turn, depends on
5772 +- * well-behaving guests: They have to keep IRQs disabled at
5773 +- * least as long as the NMI handler runs. Otherwise we may
5774 +- * cause NMI nesting, maybe breaking the guest. But as this is
5775 +- * highly unlikely, we can live with the residual risk.
5776 +- */
5777 +- vmx->loaded_vmcs->soft_vnmi_blocked = 1;
5778 +- vmx->loaded_vmcs->vnmi_blocked_time = 0;
5779 +- }
5780 +-
5781 +- ++vcpu->stat.nmi_injections;
5782 +- vmx->loaded_vmcs->nmi_known_unmasked = false;
5783 +-
5784 +- if (vmx->rmode.vm86_active) {
5785 +- kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0);
5786 +- return;
5787 +- }
5788 +-
5789 +- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
5790 +- INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
5791 +-
5792 +- vmx_clear_hlt(vcpu);
5793 +-}
5794 +-
5795 +-bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
5796 +-{
5797 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
5798 +- bool masked;
5799 +-
5800 +- if (!enable_vnmi)
5801 +- return vmx->loaded_vmcs->soft_vnmi_blocked;
5802 +- if (vmx->loaded_vmcs->nmi_known_unmasked)
5803 +- return false;
5804 +- masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
5805 +- vmx->loaded_vmcs->nmi_known_unmasked = !masked;
5806 +- return masked;
5807 +-}
5808 +-
5809 +-void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
5810 +-{
5811 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
5812 +-
5813 +- if (!enable_vnmi) {
5814 +- if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
5815 +- vmx->loaded_vmcs->soft_vnmi_blocked = masked;
5816 +- vmx->loaded_vmcs->vnmi_blocked_time = 0;
5817 +- }
5818 +- } else {
5819 +- vmx->loaded_vmcs->nmi_known_unmasked = !masked;
5820 +- if (masked)
5821 +- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
5822 +- GUEST_INTR_STATE_NMI);
5823 +- else
5824 +- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
5825 +- GUEST_INTR_STATE_NMI);
5826 +- }
5827 +-}
5828 +-
5829 +-static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
5830 +-{
5831 +- if (to_vmx(vcpu)->nested.nested_run_pending)
5832 +- return 0;
5833 +-
5834 +- if (!enable_vnmi &&
5835 +- to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
5836 +- return 0;
5837 +-
5838 +- return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
5839 +- (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
5840 +- | GUEST_INTR_STATE_NMI));
5841 +-}
5842 +-
5843 +-static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
5844 +-{
5845 +- return (!to_vmx(vcpu)->nested.nested_run_pending &&
5846 +- vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
5847 +- !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
5848 +- (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
5849 +-}
5850 +-
5851 +-static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
5852 +-{
5853 +- int ret;
5854 +-
5855 +- if (enable_unrestricted_guest)
5856 +- return 0;
5857 +-
5858 +- ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
5859 +- PAGE_SIZE * 3);
5860 +- if (ret)
5861 +- return ret;
5862 +- to_kvm_vmx(kvm)->tss_addr = addr;
5863 +- return init_rmode_tss(kvm);
5864 +-}
5865 +-
5866 +-static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
5867 +-{
5868 +- to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
5869 +- return 0;
5870 +-}
5871 +-
5872 +-static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
5873 +-{
5874 +- switch (vec) {
5875 +- case BP_VECTOR:
5876 +- /*
5877 +- * Update instruction length as we may reinject the exception
5878 +- * from user space while in guest debugging mode.
5879 +- */
5880 +- to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
5881 +- vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
5882 +- if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
5883 +- return false;
5884 +- /* fall through */
5885 +- case DB_VECTOR:
5886 +- if (vcpu->guest_debug &
5887 +- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
5888 +- return false;
5889 +- /* fall through */
5890 +- case DE_VECTOR:
5891 +- case OF_VECTOR:
5892 +- case BR_VECTOR:
5893 +- case UD_VECTOR:
5894 +- case DF_VECTOR:
5895 +- case SS_VECTOR:
5896 +- case GP_VECTOR:
5897 +- case MF_VECTOR:
5898 +- return true;
5899 +- break;
5900 +- }
5901 +- return false;
5902 +-}
5903 +-
5904 +-static int handle_rmode_exception(struct kvm_vcpu *vcpu,
5905 +- int vec, u32 err_code)
5906 +-{
5907 +- /*
5908 +- * Instruction with address size override prefix opcode 0x67
5909 +- * Cause the #SS fault with 0 error code in VM86 mode.
5910 +- */
5911 +- if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
5912 +- if (kvm_emulate_instruction(vcpu, 0)) {
5913 +- if (vcpu->arch.halt_request) {
5914 +- vcpu->arch.halt_request = 0;
5915 +- return kvm_vcpu_halt(vcpu);
5916 +- }
5917 +- return 1;
5918 +- }
5919 +- return 0;
5920 +- }
5921 +-
5922 +- /*
5923 +- * Forward all other exceptions that are valid in real mode.
5924 +- * FIXME: Breaks guest debugging in real mode, needs to be fixed with
5925 +- * the required debugging infrastructure rework.
5926 +- */
5927 +- kvm_queue_exception(vcpu, vec);
5928 +- return 1;
5929 +-}
5930 +-
5931 +-/*
5932 +- * Trigger machine check on the host. We assume all the MSRs are already set up
5933 +- * by the CPU and that we still run on the same CPU as the MCE occurred on.
5934 +- * We pass a fake environment to the machine check handler because we want
5935 +- * the guest to be always treated like user space, no matter what context
5936 +- * it used internally.
5937 +- */
5938 +-static void kvm_machine_check(void)
5939 +-{
5940 +-#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
5941 +- struct pt_regs regs = {
5942 +- .cs = 3, /* Fake ring 3 no matter what the guest ran on */
5943 +- .flags = X86_EFLAGS_IF,
5944 +- };
5945 +-
5946 +- do_machine_check(&regs, 0);
5947 +-#endif
5948 +-}
5949 +-
5950 +-static int handle_machine_check(struct kvm_vcpu *vcpu)
5951 +-{
5952 +- /* handled by vmx_vcpu_run() */
5953 +- return 1;
5954 +-}
5955 +-
5956 +-static int handle_exception_nmi(struct kvm_vcpu *vcpu)
5957 +-{
5958 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
5959 +- struct kvm_run *kvm_run = vcpu->run;
5960 +- u32 intr_info, ex_no, error_code;
5961 +- unsigned long cr2, rip, dr6;
5962 +- u32 vect_info;
5963 +-
5964 +- vect_info = vmx->idt_vectoring_info;
5965 +- intr_info = vmx->exit_intr_info;
5966 +-
5967 +- if (is_machine_check(intr_info) || is_nmi(intr_info))
5968 +- return 1; /* handled by handle_exception_nmi_irqoff() */
5969 +-
5970 +- if (is_invalid_opcode(intr_info))
5971 +- return handle_ud(vcpu);
5972 +-
5973 +- error_code = 0;
5974 +- if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
5975 +- error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
5976 +-
5977 +- if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
5978 +- WARN_ON_ONCE(!enable_vmware_backdoor);
5979 +-
5980 +- /*
5981 +- * VMware backdoor emulation on #GP interception only handles
5982 +- * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero
5983 +- * error code on #GP.
5984 +- */
5985 +- if (error_code) {
5986 +- kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
5987 +- return 1;
5988 +- }
5989 +- return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
5990 +- }
5991 +-
5992 +- /*
5993 +- * The #PF with PFEC.RSVD = 1 indicates the guest is accessing
5994 +- * MMIO, it is better to report an internal error.
5995 +- * See the comments in vmx_handle_exit.
5996 +- */
5997 +- if ((vect_info & VECTORING_INFO_VALID_MASK) &&
5998 +- !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
5999 +- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6000 +- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
6001 +- vcpu->run->internal.ndata = 3;
6002 +- vcpu->run->internal.data[0] = vect_info;
6003 +- vcpu->run->internal.data[1] = intr_info;
6004 +- vcpu->run->internal.data[2] = error_code;
6005 +- return 0;
6006 +- }
6007 +-
6008 +- if (is_page_fault(intr_info)) {
6009 +- cr2 = vmcs_readl(EXIT_QUALIFICATION);
6010 +- /* EPT won't cause page fault directly */
6011 +- WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
6012 +- return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
6013 +- }
6014 +-
6015 +- ex_no = intr_info & INTR_INFO_VECTOR_MASK;
6016 +-
6017 +- if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
6018 +- return handle_rmode_exception(vcpu, ex_no, error_code);
6019 +-
6020 +- switch (ex_no) {
6021 +- case AC_VECTOR:
6022 +- kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
6023 +- return 1;
6024 +- case DB_VECTOR:
6025 +- dr6 = vmcs_readl(EXIT_QUALIFICATION);
6026 +- if (!(vcpu->guest_debug &
6027 +- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
6028 +- vcpu->arch.dr6 &= ~DR_TRAP_BITS;
6029 +- vcpu->arch.dr6 |= dr6 | DR6_RTM;
6030 +- if (is_icebp(intr_info))
6031 +- WARN_ON(!skip_emulated_instruction(vcpu));
6032 +-
6033 +- kvm_queue_exception(vcpu, DB_VECTOR);
6034 +- return 1;
6035 +- }
6036 +- kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
6037 +- kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
6038 +- /* fall through */
6039 +- case BP_VECTOR:
6040 +- /*
6041 +- * Update instruction length as we may reinject #BP from
6042 +- * user space while in guest debugging mode. Reading it for
6043 +- * #DB as well causes no harm, it is not used in that case.
6044 +- */
6045 +- vmx->vcpu.arch.event_exit_inst_len =
6046 +- vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
6047 +- kvm_run->exit_reason = KVM_EXIT_DEBUG;
6048 +- rip = kvm_rip_read(vcpu);
6049 +- kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
6050 +- kvm_run->debug.arch.exception = ex_no;
6051 +- break;
6052 +- default:
6053 +- kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
6054 +- kvm_run->ex.exception = ex_no;
6055 +- kvm_run->ex.error_code = error_code;
6056 +- break;
6057 +- }
6058 +- return 0;
6059 +-}
6060 +-
6061 +-static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu)
6062 +-{
6063 +- ++vcpu->stat.irq_exits;
6064 +- return 1;
6065 +-}
6066 +-
6067 +-static int handle_triple_fault(struct kvm_vcpu *vcpu)
6068 +-{
6069 +- vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
6070 +- vcpu->mmio_needed = 0;
6071 +- return 0;
6072 +-}
6073 +-
6074 +-static int handle_io(struct kvm_vcpu *vcpu)
6075 +-{
6076 +- unsigned long exit_qualification;
6077 +- int size, in, string;
6078 +- unsigned port;
6079 +-
6080 +- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6081 +- string = (exit_qualification & 16) != 0;
6082 +-
6083 +- ++vcpu->stat.io_exits;
6084 +-
6085 +- if (string)
6086 +- return kvm_emulate_instruction(vcpu, 0);
6087 +-
6088 +- port = exit_qualification >> 16;
6089 +- size = (exit_qualification & 7) + 1;
6090 +- in = (exit_qualification & 8) != 0;
6091 +-
6092 +- return kvm_fast_pio(vcpu, size, port, in);
6093 +-}
6094 +-
6095 +-static void
6096 +-vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
6097 +-{
6098 +- /*
6099 +- * Patch in the VMCALL instruction:
6100 +- */
6101 +- hypercall[0] = 0x0f;
6102 +- hypercall[1] = 0x01;
6103 +- hypercall[2] = 0xc1;
6104 +-}
6105 +-
6106 +-/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
6107 +-static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
6108 +-{
6109 +- if (is_guest_mode(vcpu)) {
6110 +- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6111 +- unsigned long orig_val = val;
6112 +-
6113 +- /*
6114 +- * We get here when L2 changed cr0 in a way that did not change
6115 +- * any of L1's shadowed bits (see nested_vmx_exit_handled_cr),
6116 +- * but did change L0 shadowed bits. So we first calculate the
6117 +- * effective cr0 value that L1 would like to write into the
6118 +- * hardware. It consists of the L2-owned bits from the new
6119 +- * value combined with the L1-owned bits from L1's guest_cr0.
6120 +- */
6121 +- val = (val & ~vmcs12->cr0_guest_host_mask) |
6122 +- (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
6123 +-
6124 +- if (!nested_guest_cr0_valid(vcpu, val))
6125 +- return 1;
6126 +-
6127 +- if (kvm_set_cr0(vcpu, val))
6128 +- return 1;
6129 +- vmcs_writel(CR0_READ_SHADOW, orig_val);
6130 +- return 0;
6131 +- } else {
6132 +- if (to_vmx(vcpu)->nested.vmxon &&
6133 +- !nested_host_cr0_valid(vcpu, val))
6134 +- return 1;
6135 +-
6136 +- return kvm_set_cr0(vcpu, val);
6137 +- }
6138 +-}
6139 +-
6140 +-static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
6141 +-{
6142 +- if (is_guest_mode(vcpu)) {
6143 +- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6144 +- unsigned long orig_val = val;
6145 +-
6146 +- /* analogously to handle_set_cr0 */
6147 +- val = (val & ~vmcs12->cr4_guest_host_mask) |
6148 +- (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
6149 +- if (kvm_set_cr4(vcpu, val))
6150 +- return 1;
6151 +- vmcs_writel(CR4_READ_SHADOW, orig_val);
6152 +- return 0;
6153 +- } else
6154 +- return kvm_set_cr4(vcpu, val);
6155 +-}
6156 +-
6157 +-static int handle_desc(struct kvm_vcpu *vcpu)
6158 +-{
6159 +- WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
6160 +- return kvm_emulate_instruction(vcpu, 0);
6161 +-}
6162 +-
6163 +-static int handle_cr(struct kvm_vcpu *vcpu)
6164 +-{
6165 +- unsigned long exit_qualification, val;
6166 +- int cr;
6167 +- int reg;
6168 +- int err;
6169 +- int ret;
6170 +-
6171 +- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6172 +- cr = exit_qualification & 15;
6173 +- reg = (exit_qualification >> 8) & 15;
6174 +- switch ((exit_qualification >> 4) & 3) {
6175 +- case 0: /* mov to cr */
6176 +- val = kvm_register_readl(vcpu, reg);
6177 +- trace_kvm_cr_write(cr, val);
6178 +- switch (cr) {
6179 +- case 0:
6180 +- err = handle_set_cr0(vcpu, val);
6181 +- return kvm_complete_insn_gp(vcpu, err);
6182 +- case 3:
6183 +- WARN_ON_ONCE(enable_unrestricted_guest);
6184 +- err = kvm_set_cr3(vcpu, val);
6185 +- return kvm_complete_insn_gp(vcpu, err);
6186 +- case 4:
6187 +- err = handle_set_cr4(vcpu, val);
6188 +- return kvm_complete_insn_gp(vcpu, err);
6189 +- case 8: {
6190 +- u8 cr8_prev = kvm_get_cr8(vcpu);
6191 +- u8 cr8 = (u8)val;
6192 +- err = kvm_set_cr8(vcpu, cr8);
6193 +- ret = kvm_complete_insn_gp(vcpu, err);
6194 +- if (lapic_in_kernel(vcpu))
6195 +- return ret;
6196 +- if (cr8_prev <= cr8)
6197 +- return ret;
6198 +- /*
6199 +- * TODO: we might be squashing a
6200 +- * KVM_GUESTDBG_SINGLESTEP-triggered
6201 +- * KVM_EXIT_DEBUG here.
6202 +- */
6203 +- vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
6204 +- return 0;
6205 +- }
6206 +- }
6207 +- break;
6208 +- case 2: /* clts */
6209 +- WARN_ONCE(1, "Guest should always own CR0.TS");
6210 +- vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
6211 +- trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
6212 +- return kvm_skip_emulated_instruction(vcpu);
6213 +- case 1: /*mov from cr*/
6214 +- switch (cr) {
6215 +- case 3:
6216 +- WARN_ON_ONCE(enable_unrestricted_guest);
6217 +- val = kvm_read_cr3(vcpu);
6218 +- kvm_register_write(vcpu, reg, val);
6219 +- trace_kvm_cr_read(cr, val);
6220 +- return kvm_skip_emulated_instruction(vcpu);
6221 +- case 8:
6222 +- val = kvm_get_cr8(vcpu);
6223 +- kvm_register_write(vcpu, reg, val);
6224 +- trace_kvm_cr_read(cr, val);
6225 +- return kvm_skip_emulated_instruction(vcpu);
6226 +- }
6227 +- break;
6228 +- case 3: /* lmsw */
6229 +- val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
6230 +- trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
6231 +- kvm_lmsw(vcpu, val);
6232 +-
6233 +- return kvm_skip_emulated_instruction(vcpu);
6234 +- default:
6235 +- break;
6236 +- }
6237 +- vcpu->run->exit_reason = 0;
6238 +- vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
6239 +- (int)(exit_qualification >> 4) & 3, cr);
6240 +- return 0;
6241 +-}
6242 +-
6243 +-static int handle_dr(struct kvm_vcpu *vcpu)
6244 +-{
6245 +- unsigned long exit_qualification;
6246 +- int dr, dr7, reg;
6247 +-
6248 +- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6249 +- dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
6250 +-
6251 +- /* First, if DR does not exist, trigger UD */
6252 +- if (!kvm_require_dr(vcpu, dr))
6253 +- return 1;
6254 +-
6255 +- /* Do not handle if the CPL > 0, will trigger GP on re-entry */
6256 +- if (!kvm_require_cpl(vcpu, 0))
6257 +- return 1;
6258 +- dr7 = vmcs_readl(GUEST_DR7);
6259 +- if (dr7 & DR7_GD) {
6260 +- /*
6261 +- * As the vm-exit takes precedence over the debug trap, we
6262 +- * need to emulate the latter, either for the host or the
6263 +- * guest debugging itself.
6264 +- */
6265 +- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
6266 +- vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
6267 +- vcpu->run->debug.arch.dr7 = dr7;
6268 +- vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
6269 +- vcpu->run->debug.arch.exception = DB_VECTOR;
6270 +- vcpu->run->exit_reason = KVM_EXIT_DEBUG;
6271 +- return 0;
6272 +- } else {
6273 +- vcpu->arch.dr6 &= ~DR_TRAP_BITS;
6274 +- vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
6275 +- kvm_queue_exception(vcpu, DB_VECTOR);
6276 +- return 1;
6277 +- }
6278 +- }
6279 +-
6280 +- if (vcpu->guest_debug == 0) {
6281 +- exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
6282 +-
6283 +- /*
6284 +- * No more DR vmexits; force a reload of the debug registers
6285 +- * and reenter on this instruction. The next vmexit will
6286 +- * retrieve the full state of the debug registers.
6287 +- */
6288 +- vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
6289 +- return 1;
6290 +- }
6291 +-
6292 +- reg = DEBUG_REG_ACCESS_REG(exit_qualification);
6293 +- if (exit_qualification & TYPE_MOV_FROM_DR) {
6294 +- unsigned long val;
6295 +-
6296 +- if (kvm_get_dr(vcpu, dr, &val))
6297 +- return 1;
6298 +- kvm_register_write(vcpu, reg, val);
6299 +- } else
6300 +- if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
6301 +- return 1;
6302 +-
6303 +- return kvm_skip_emulated_instruction(vcpu);
6304 +-}
6305 +-
6306 +-static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
6307 +-{
6308 +- return vcpu->arch.dr6;
6309 +-}
6310 +-
6311 +-static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
6312 +-{
6313 +-}
6314 +-
6315 +-static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
6316 +-{
6317 +- get_debugreg(vcpu->arch.db[0], 0);
6318 +- get_debugreg(vcpu->arch.db[1], 1);
6319 +- get_debugreg(vcpu->arch.db[2], 2);
6320 +- get_debugreg(vcpu->arch.db[3], 3);
6321 +- get_debugreg(vcpu->arch.dr6, 6);
6322 +- vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
6323 +-
6324 +- vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
6325 +- exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
6326 +-}
6327 +-
6328 +-static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
6329 +-{
6330 +- vmcs_writel(GUEST_DR7, val);
6331 +-}
6332 +-
6333 +-static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
6334 +-{
6335 +- kvm_apic_update_ppr(vcpu);
6336 +- return 1;
6337 +-}
6338 +-
6339 +-static int handle_interrupt_window(struct kvm_vcpu *vcpu)
6340 +-{
6341 +- exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING);
6342 +-
6343 +- kvm_make_request(KVM_REQ_EVENT, vcpu);
6344 +-
6345 +- ++vcpu->stat.irq_window_exits;
6346 +- return 1;
6347 +-}
6348 +-
6349 +-static int handle_vmcall(struct kvm_vcpu *vcpu)
6350 +-{
6351 +- return kvm_emulate_hypercall(vcpu);
6352 +-}
6353 +-
6354 +-static int handle_invd(struct kvm_vcpu *vcpu)
6355 +-{
6356 +- return kvm_emulate_instruction(vcpu, 0);
6357 +-}
6358 +-
6359 +-static int handle_invlpg(struct kvm_vcpu *vcpu)
6360 +-{
6361 +- unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6362 +-
6363 +- kvm_mmu_invlpg(vcpu, exit_qualification);
6364 +- return kvm_skip_emulated_instruction(vcpu);
6365 +-}
6366 +-
6367 +-static int handle_rdpmc(struct kvm_vcpu *vcpu)
6368 +-{
6369 +- int err;
6370 +-
6371 +- err = kvm_rdpmc(vcpu);
6372 +- return kvm_complete_insn_gp(vcpu, err);
6373 +-}
6374 +-
6375 +-static int handle_wbinvd(struct kvm_vcpu *vcpu)
6376 +-{
6377 +- return kvm_emulate_wbinvd(vcpu);
6378 +-}
6379 +-
6380 +-static int handle_xsetbv(struct kvm_vcpu *vcpu)
6381 +-{
6382 +- u64 new_bv = kvm_read_edx_eax(vcpu);
6383 +- u32 index = kvm_rcx_read(vcpu);
6384 +-
6385 +- if (kvm_set_xcr(vcpu, index, new_bv) == 0)
6386 +- return kvm_skip_emulated_instruction(vcpu);
6387 +- return 1;
6388 +-}
6389 +-
6390 +-static int handle_apic_access(struct kvm_vcpu *vcpu)
6391 +-{
6392 +- if (likely(fasteoi)) {
6393 +- unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6394 +- int access_type, offset;
6395 +-
6396 +- access_type = exit_qualification & APIC_ACCESS_TYPE;
6397 +- offset = exit_qualification & APIC_ACCESS_OFFSET;
6398 +- /*
6399 +- * Sane guest uses MOV to write EOI, with written value
6400 +- * not cared. So make a short-circuit here by avoiding
6401 +- * heavy instruction emulation.
6402 +- */
6403 +- if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
6404 +- (offset == APIC_EOI)) {
6405 +- kvm_lapic_set_eoi(vcpu);
6406 +- return kvm_skip_emulated_instruction(vcpu);
6407 +- }
6408 +- }
6409 +- return kvm_emulate_instruction(vcpu, 0);
6410 +-}
6411 +-
6412 +-static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
6413 +-{
6414 +- unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6415 +- int vector = exit_qualification & 0xff;
6416 +-
6417 +- /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
6418 +- kvm_apic_set_eoi_accelerated(vcpu, vector);
6419 +- return 1;
6420 +-}
6421 +-
6422 +-static int handle_apic_write(struct kvm_vcpu *vcpu)
6423 +-{
6424 +- unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6425 +- u32 offset = exit_qualification & 0xfff;
6426 +-
6427 +- /* APIC-write VM exit is trap-like and thus no need to adjust IP */
6428 +- kvm_apic_write_nodecode(vcpu, offset);
6429 +- return 1;
6430 +-}
6431 +-
6432 +-static int handle_task_switch(struct kvm_vcpu *vcpu)
6433 +-{
6434 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
6435 +- unsigned long exit_qualification;
6436 +- bool has_error_code = false;
6437 +- u32 error_code = 0;
6438 +- u16 tss_selector;
6439 +- int reason, type, idt_v, idt_index;
6440 +-
6441 +- idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
6442 +- idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
6443 +- type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
6444 +-
6445 +- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6446 +-
6447 +- reason = (u32)exit_qualification >> 30;
6448 +- if (reason == TASK_SWITCH_GATE && idt_v) {
6449 +- switch (type) {
6450 +- case INTR_TYPE_NMI_INTR:
6451 +- vcpu->arch.nmi_injected = false;
6452 +- vmx_set_nmi_mask(vcpu, true);
6453 +- break;
6454 +- case INTR_TYPE_EXT_INTR:
6455 +- case INTR_TYPE_SOFT_INTR:
6456 +- kvm_clear_interrupt_queue(vcpu);
6457 +- break;
6458 +- case INTR_TYPE_HARD_EXCEPTION:
6459 +- if (vmx->idt_vectoring_info &
6460 +- VECTORING_INFO_DELIVER_CODE_MASK) {
6461 +- has_error_code = true;
6462 +- error_code =
6463 +- vmcs_read32(IDT_VECTORING_ERROR_CODE);
6464 +- }
6465 +- /* fall through */
6466 +- case INTR_TYPE_SOFT_EXCEPTION:
6467 +- kvm_clear_exception_queue(vcpu);
6468 +- break;
6469 +- default:
6470 +- break;
6471 +- }
6472 +- }
6473 +- tss_selector = exit_qualification;
6474 +-
6475 +- if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
6476 +- type != INTR_TYPE_EXT_INTR &&
6477 +- type != INTR_TYPE_NMI_INTR))
6478 +- WARN_ON(!skip_emulated_instruction(vcpu));
6479 +-
6480 +- /*
6481 +- * TODO: What about debug traps on tss switch?
6482 +- * Are we supposed to inject them and update dr6?
6483 +- */
6484 +- return kvm_task_switch(vcpu, tss_selector,
6485 +- type == INTR_TYPE_SOFT_INTR ? idt_index : -1,
6486 +- reason, has_error_code, error_code);
6487 +-}
6488 +-
6489 +-static int handle_ept_violation(struct kvm_vcpu *vcpu)
6490 +-{
6491 +- unsigned long exit_qualification;
6492 +- gpa_t gpa;
6493 +- u64 error_code;
6494 +-
6495 +- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6496 +-
6497 +- /*
6498 +- * EPT violation happened while executing iret from NMI,
6499 +- * "blocked by NMI" bit has to be set before next VM entry.
6500 +- * There are errata that may cause this bit to not be set:
6501 +- * AAK134, BY25.
6502 +- */
6503 +- if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
6504 +- enable_vnmi &&
6505 +- (exit_qualification & INTR_INFO_UNBLOCK_NMI))
6506 +- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
6507 +-
6508 +- gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
6509 +- trace_kvm_page_fault(gpa, exit_qualification);
6510 +-
6511 +- /* Is it a read fault? */
6512 +- error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
6513 +- ? PFERR_USER_MASK : 0;
6514 +- /* Is it a write fault? */
6515 +- error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
6516 +- ? PFERR_WRITE_MASK : 0;
6517 +- /* Is it a fetch fault? */
6518 +- error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
6519 +- ? PFERR_FETCH_MASK : 0;
6520 +- /* ept page table entry is present? */
6521 +- error_code |= (exit_qualification &
6522 +- (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE |
6523 +- EPT_VIOLATION_EXECUTABLE))
6524 +- ? PFERR_PRESENT_MASK : 0;
6525 +-
6526 +- error_code |= (exit_qualification & 0x100) != 0 ?
6527 +- PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
6528 +-
6529 +- vcpu->arch.exit_qualification = exit_qualification;
6530 +- return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
6531 +-}
6532 +-
6533 +-static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
6534 +-{
6535 +- gpa_t gpa;
6536 +-
6537 +- /*
6538 +- * A nested guest cannot optimize MMIO vmexits, because we have an
6539 +- * nGPA here instead of the required GPA.
6540 +- */
6541 +- gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
6542 +- if (!is_guest_mode(vcpu) &&
6543 +- !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
6544 +- trace_kvm_fast_mmio(gpa);
6545 +- return kvm_skip_emulated_instruction(vcpu);
6546 +- }
6547 +-
6548 +- return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
6549 +-}
6550 +-
6551 +-static int handle_nmi_window(struct kvm_vcpu *vcpu)
6552 +-{
6553 +- WARN_ON_ONCE(!enable_vnmi);
6554 +- exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
6555 +- ++vcpu->stat.nmi_window_exits;
6556 +- kvm_make_request(KVM_REQ_EVENT, vcpu);
6557 +-
6558 +- return 1;
6559 +-}
6560 +-
6561 +-static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
6562 +-{
6563 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
6564 +- bool intr_window_requested;
6565 +- unsigned count = 130;
6566 +-
6567 +- /*
6568 +- * We should never reach the point where we are emulating L2
6569 +- * due to invalid guest state as that means we incorrectly
6570 +- * allowed a nested VMEntry with an invalid vmcs12.
6571 +- */
6572 +- WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
6573 +-
6574 +- intr_window_requested = exec_controls_get(vmx) &
6575 +- CPU_BASED_INTR_WINDOW_EXITING;
6576 +-
6577 +- while (vmx->emulation_required && count-- != 0) {
6578 +- if (intr_window_requested && vmx_interrupt_allowed(vcpu))
6579 +- return handle_interrupt_window(&vmx->vcpu);
6580 +-
6581 +- if (kvm_test_request(KVM_REQ_EVENT, vcpu))
6582 +- return 1;
6583 +-
6584 +- if (!kvm_emulate_instruction(vcpu, 0))
6585 +- return 0;
6586 +-
6587 +- if (vmx->emulation_required && !vmx->rmode.vm86_active &&
6588 +- vcpu->arch.exception.pending) {
6589 +- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6590 +- vcpu->run->internal.suberror =
6591 +- KVM_INTERNAL_ERROR_EMULATION;
6592 +- vcpu->run->internal.ndata = 0;
6593 +- return 0;
6594 +- }
6595 +-
6596 +- if (vcpu->arch.halt_request) {
6597 +- vcpu->arch.halt_request = 0;
6598 +- return kvm_vcpu_halt(vcpu);
6599 +- }
6600 +-
6601 +- /*
6602 +- * Note, return 1 and not 0, vcpu_run() is responsible for
6603 +- * morphing the pending signal into the proper return code.
6604 +- */
6605 +- if (signal_pending(current))
6606 +- return 1;
6607 +-
6608 +- if (need_resched())
6609 +- schedule();
6610 +- }
6611 +-
6612 +- return 1;
6613 +-}
6614 +-
6615 +-static void grow_ple_window(struct kvm_vcpu *vcpu)
6616 +-{
6617 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
6618 +- unsigned int old = vmx->ple_window;
6619 +-
6620 +- vmx->ple_window = __grow_ple_window(old, ple_window,
6621 +- ple_window_grow,
6622 +- ple_window_max);
6623 +-
6624 +- if (vmx->ple_window != old) {
6625 +- vmx->ple_window_dirty = true;
6626 +- trace_kvm_ple_window_update(vcpu->vcpu_id,
6627 +- vmx->ple_window, old);
6628 +- }
6629 +-}
6630 +-
6631 +-static void shrink_ple_window(struct kvm_vcpu *vcpu)
6632 +-{
6633 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
6634 +- unsigned int old = vmx->ple_window;
6635 +-
6636 +- vmx->ple_window = __shrink_ple_window(old, ple_window,
6637 +- ple_window_shrink,
6638 +- ple_window);
6639 +-
6640 +- if (vmx->ple_window != old) {
6641 +- vmx->ple_window_dirty = true;
6642 +- trace_kvm_ple_window_update(vcpu->vcpu_id,
6643 +- vmx->ple_window, old);
6644 +- }
6645 +-}
6646 +-
6647 +-/*
6648 +- * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
6649 +- */
6650 +-static void wakeup_handler(void)
6651 +-{
6652 +- struct kvm_vcpu *vcpu;
6653 +- int cpu = smp_processor_id();
6654 +-
6655 +- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
6656 +- list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
6657 +- blocked_vcpu_list) {
6658 +- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
6659 +-
6660 +- if (pi_test_on(pi_desc) == 1)
6661 +- kvm_vcpu_kick(vcpu);
6662 +- }
6663 +- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
6664 +-}
6665 +-
6666 +-static void vmx_enable_tdp(void)
6667 +-{
6668 +- kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
6669 +- enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull,
6670 +- enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
6671 +- 0ull, VMX_EPT_EXECUTABLE_MASK,
6672 +- cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
6673 +- VMX_EPT_RWX_MASK, 0ull);
6674 +-
6675 +- ept_set_mmio_spte_mask();
6676 +- kvm_enable_tdp();
6677 +-}
6678 +-
6679 +-/*
6680 +- * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
6681 +- * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
6682 +- */
6683 +-static int handle_pause(struct kvm_vcpu *vcpu)
6684 +-{
6685 +- if (!kvm_pause_in_guest(vcpu->kvm))
6686 +- grow_ple_window(vcpu);
6687 +-
6688 +- /*
6689 +- * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting"
6690 +- * VM-execution control is ignored if CPL > 0. OTOH, KVM
6691 +- * never set PAUSE_EXITING and just set PLE if supported,
6692 +- * so the vcpu must be CPL=0 if it gets a PAUSE exit.
6693 +- */
6694 +- kvm_vcpu_on_spin(vcpu, true);
6695 +- return kvm_skip_emulated_instruction(vcpu);
6696 +-}
6697 +-
6698 +-static int handle_nop(struct kvm_vcpu *vcpu)
6699 +-{
6700 +- return kvm_skip_emulated_instruction(vcpu);
6701 +-}
6702 +-
6703 +-static int handle_mwait(struct kvm_vcpu *vcpu)
6704 +-{
6705 +- printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
6706 +- return handle_nop(vcpu);
6707 +-}
6708 +-
6709 +-static int handle_invalid_op(struct kvm_vcpu *vcpu)
6710 +-{
6711 +- kvm_queue_exception(vcpu, UD_VECTOR);
6712 +- return 1;
6713 +-}
6714 +-
6715 +-static int handle_monitor_trap(struct kvm_vcpu *vcpu)
6716 +-{
6717 +- return 1;
6718 +-}
6719 +-
6720 +-static int handle_monitor(struct kvm_vcpu *vcpu)
6721 +-{
6722 +- printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
6723 +- return handle_nop(vcpu);
6724 +-}
6725 +-
6726 +-static int handle_invpcid(struct kvm_vcpu *vcpu)
6727 +-{
6728 +- u32 vmx_instruction_info;
6729 +- unsigned long type;
6730 +- bool pcid_enabled;
6731 +- gva_t gva;
6732 +- struct x86_exception e;
6733 +- unsigned i;
6734 +- unsigned long roots_to_free = 0;
6735 +- struct {
6736 +- u64 pcid;
6737 +- u64 gla;
6738 +- } operand;
6739 +-
6740 +- if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
6741 +- kvm_queue_exception(vcpu, UD_VECTOR);
6742 +- return 1;
6743 +- }
6744 +-
6745 +- vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
6746 +- type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
6747 +-
6748 +- if (type > 3) {
6749 +- kvm_inject_gp(vcpu, 0);
6750 +- return 1;
6751 +- }
6752 +-
6753 +- /* According to the Intel instruction reference, the memory operand
6754 +- * is read even if it isn't needed (e.g., for type==all)
6755 +- */
6756 +- if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
6757 +- vmx_instruction_info, false,
6758 +- sizeof(operand), &gva))
6759 +- return 1;
6760 +-
6761 +- if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
6762 +- kvm_inject_page_fault(vcpu, &e);
6763 +- return 1;
6764 +- }
6765 +-
6766 +- if (operand.pcid >> 12 != 0) {
6767 +- kvm_inject_gp(vcpu, 0);
6768 +- return 1;
6769 +- }
6770 +-
6771 +- pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
6772 +-
6773 +- switch (type) {
6774 +- case INVPCID_TYPE_INDIV_ADDR:
6775 +- if ((!pcid_enabled && (operand.pcid != 0)) ||
6776 +- is_noncanonical_address(operand.gla, vcpu)) {
6777 +- kvm_inject_gp(vcpu, 0);
6778 +- return 1;
6779 +- }
6780 +- kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
6781 +- return kvm_skip_emulated_instruction(vcpu);
6782 +-
6783 +- case INVPCID_TYPE_SINGLE_CTXT:
6784 +- if (!pcid_enabled && (operand.pcid != 0)) {
6785 +- kvm_inject_gp(vcpu, 0);
6786 +- return 1;
6787 +- }
6788 +-
6789 +- if (kvm_get_active_pcid(vcpu) == operand.pcid) {
6790 +- kvm_mmu_sync_roots(vcpu);
6791 +- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
6792 +- }
6793 +-
6794 +- for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
6795 +- if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3)
6796 +- == operand.pcid)
6797 +- roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
6798 +-
6799 +- kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
6800 +- /*
6801 +- * If neither the current cr3 nor any of the prev_roots use the
6802 +- * given PCID, then nothing needs to be done here because a
6803 +- * resync will happen anyway before switching to any other CR3.
6804 +- */
6805 +-
6806 +- return kvm_skip_emulated_instruction(vcpu);
6807 +-
6808 +- case INVPCID_TYPE_ALL_NON_GLOBAL:
6809 +- /*
6810 +- * Currently, KVM doesn't mark global entries in the shadow
6811 +- * page tables, so a non-global flush just degenerates to a
6812 +- * global flush. If needed, we could optimize this later by
6813 +- * keeping track of global entries in shadow page tables.
6814 +- */
6815 +-
6816 +- /* fall-through */
6817 +- case INVPCID_TYPE_ALL_INCL_GLOBAL:
6818 +- kvm_mmu_unload(vcpu);
6819 +- return kvm_skip_emulated_instruction(vcpu);
6820 +-
6821 +- default:
6822 +- BUG(); /* We have already checked above that type <= 3 */
6823 +- }
6824 +-}
6825 +-
6826 +-static int handle_pml_full(struct kvm_vcpu *vcpu)
6827 +-{
6828 +- unsigned long exit_qualification;
6829 +-
6830 +- trace_kvm_pml_full(vcpu->vcpu_id);
6831 +-
6832 +- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6833 +-
6834 +- /*
6835 +- * PML buffer FULL happened while executing iret from NMI,
6836 +- * "blocked by NMI" bit has to be set before next VM entry.
6837 +- */
6838 +- if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
6839 +- enable_vnmi &&
6840 +- (exit_qualification & INTR_INFO_UNBLOCK_NMI))
6841 +- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
6842 +- GUEST_INTR_STATE_NMI);
6843 +-
6844 +- /*
6845 +- * PML buffer already flushed at beginning of VMEXIT. Nothing to do
6846 +- * here.., and there's no userspace involvement needed for PML.
6847 +- */
6848 +- return 1;
6849 +-}
6850 +-
6851 +-static int handle_preemption_timer(struct kvm_vcpu *vcpu)
6852 +-{
6853 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
6854 +-
6855 +- if (!vmx->req_immediate_exit &&
6856 +- !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
6857 +- kvm_lapic_expired_hv_timer(vcpu);
6858 +-
6859 +- return 1;
6860 +-}
6861 +-
6862 +-/*
6863 +- * When nested=0, all VMX instruction VM Exits filter here. The handlers
6864 +- * are overwritten by nested_vmx_setup() when nested=1.
6865 +- */
6866 +-static int handle_vmx_instruction(struct kvm_vcpu *vcpu)
6867 +-{
6868 +- kvm_queue_exception(vcpu, UD_VECTOR);
6869 +- return 1;
6870 +-}
6871 +-
6872 +-static int handle_encls(struct kvm_vcpu *vcpu)
6873 +-{
6874 +- /*
6875 +- * SGX virtualization is not yet supported. There is no software
6876 +- * enable bit for SGX, so we have to trap ENCLS and inject a #UD
6877 +- * to prevent the guest from executing ENCLS.
6878 +- */
6879 +- kvm_queue_exception(vcpu, UD_VECTOR);
6880 +- return 1;
6881 +-}
6882 +-
6883 +-/*
6884 +- * The exit handlers return 1 if the exit was handled fully and guest execution
6885 +- * may resume. Otherwise they set the kvm_run parameter to indicate what needs
6886 +- * to be done to userspace and return 0.
6887 +- */
6888 +-static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
6889 +- [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi,
6890 +- [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
6891 +- [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
6892 +- [EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
6893 +- [EXIT_REASON_IO_INSTRUCTION] = handle_io,
6894 +- [EXIT_REASON_CR_ACCESS] = handle_cr,
6895 +- [EXIT_REASON_DR_ACCESS] = handle_dr,
6896 +- [EXIT_REASON_CPUID] = kvm_emulate_cpuid,
6897 +- [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr,
6898 +- [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr,
6899 +- [EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window,
6900 +- [EXIT_REASON_HLT] = kvm_emulate_halt,
6901 +- [EXIT_REASON_INVD] = handle_invd,
6902 +- [EXIT_REASON_INVLPG] = handle_invlpg,
6903 +- [EXIT_REASON_RDPMC] = handle_rdpmc,
6904 +- [EXIT_REASON_VMCALL] = handle_vmcall,
6905 +- [EXIT_REASON_VMCLEAR] = handle_vmx_instruction,
6906 +- [EXIT_REASON_VMLAUNCH] = handle_vmx_instruction,
6907 +- [EXIT_REASON_VMPTRLD] = handle_vmx_instruction,
6908 +- [EXIT_REASON_VMPTRST] = handle_vmx_instruction,
6909 +- [EXIT_REASON_VMREAD] = handle_vmx_instruction,
6910 +- [EXIT_REASON_VMRESUME] = handle_vmx_instruction,
6911 +- [EXIT_REASON_VMWRITE] = handle_vmx_instruction,
6912 +- [EXIT_REASON_VMOFF] = handle_vmx_instruction,
6913 +- [EXIT_REASON_VMON] = handle_vmx_instruction,
6914 +- [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
6915 +- [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
6916 +- [EXIT_REASON_APIC_WRITE] = handle_apic_write,
6917 +- [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
6918 +- [EXIT_REASON_WBINVD] = handle_wbinvd,
6919 +- [EXIT_REASON_XSETBV] = handle_xsetbv,
6920 +- [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
6921 +- [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
6922 +- [EXIT_REASON_GDTR_IDTR] = handle_desc,
6923 +- [EXIT_REASON_LDTR_TR] = handle_desc,
6924 +- [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
6925 +- [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
6926 +- [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
6927 +- [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
6928 +- [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap,
6929 +- [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
6930 +- [EXIT_REASON_INVEPT] = handle_vmx_instruction,
6931 +- [EXIT_REASON_INVVPID] = handle_vmx_instruction,
6932 +- [EXIT_REASON_RDRAND] = handle_invalid_op,
6933 +- [EXIT_REASON_RDSEED] = handle_invalid_op,
6934 +- [EXIT_REASON_PML_FULL] = handle_pml_full,
6935 +- [EXIT_REASON_INVPCID] = handle_invpcid,
6936 +- [EXIT_REASON_VMFUNC] = handle_vmx_instruction,
6937 +- [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
6938 +- [EXIT_REASON_ENCLS] = handle_encls,
6939 +-};
6940 +-
6941 +-static const int kvm_vmx_max_exit_handlers =
6942 +- ARRAY_SIZE(kvm_vmx_exit_handlers);
6943 +-
6944 +-static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
6945 +-{
6946 +- *info1 = vmcs_readl(EXIT_QUALIFICATION);
6947 +- *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
6948 +-}
6949 +-
6950 +-static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
6951 +-{
6952 +- if (vmx->pml_pg) {
6953 +- __free_page(vmx->pml_pg);
6954 +- vmx->pml_pg = NULL;
6955 +- }
6956 +-}
6957 +-
6958 +-static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
6959 +-{
6960 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
6961 +- u64 *pml_buf;
6962 +- u16 pml_idx;
6963 +-
6964 +- pml_idx = vmcs_read16(GUEST_PML_INDEX);
6965 +-
6966 +- /* Do nothing if PML buffer is empty */
6967 +- if (pml_idx == (PML_ENTITY_NUM - 1))
6968 +- return;
6969 +-
6970 +- /* PML index always points to next available PML buffer entity */
6971 +- if (pml_idx >= PML_ENTITY_NUM)
6972 +- pml_idx = 0;
6973 +- else
6974 +- pml_idx++;
6975 +-
6976 +- pml_buf = page_address(vmx->pml_pg);
6977 +- for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
6978 +- u64 gpa;
6979 +-
6980 +- gpa = pml_buf[pml_idx];
6981 +- WARN_ON(gpa & (PAGE_SIZE - 1));
6982 +- kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
6983 +- }
6984 +-
6985 +- /* reset PML index */
6986 +- vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
6987 +-}
6988 +-
6989 +-/*
6990 +- * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
6991 +- * Called before reporting dirty_bitmap to userspace.
6992 +- */
6993 +-static void kvm_flush_pml_buffers(struct kvm *kvm)
6994 +-{
6995 +- int i;
6996 +- struct kvm_vcpu *vcpu;
6997 +- /*
6998 +- * We only need to kick vcpu out of guest mode here, as PML buffer
6999 +- * is flushed at beginning of all VMEXITs, and it's obvious that only
7000 +- * vcpus running in guest are possible to have unflushed GPAs in PML
7001 +- * buffer.
7002 +- */
7003 +- kvm_for_each_vcpu(i, vcpu, kvm)
7004 +- kvm_vcpu_kick(vcpu);
7005 +-}
7006 +-
7007 +-static void vmx_dump_sel(char *name, uint32_t sel)
7008 +-{
7009 +- pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
7010 +- name, vmcs_read16(sel),
7011 +- vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
7012 +- vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
7013 +- vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
7014 +-}
7015 +-
7016 +-static void vmx_dump_dtsel(char *name, uint32_t limit)
7017 +-{
7018 +- pr_err("%s limit=0x%08x, base=0x%016lx\n",
7019 +- name, vmcs_read32(limit),
7020 +- vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
7021 +-}
7022 +-
7023 +-void dump_vmcs(void)
7024 +-{
7025 +- u32 vmentry_ctl, vmexit_ctl;
7026 +- u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control;
7027 +- unsigned long cr4;
7028 +- u64 efer;
7029 +- int i, n;
7030 +-
7031 +- if (!dump_invalid_vmcs) {
7032 +- pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n");
7033 +- return;
7034 +- }
7035 +-
7036 +- vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
7037 +- vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
7038 +- cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
7039 +- pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
7040 +- cr4 = vmcs_readl(GUEST_CR4);
7041 +- efer = vmcs_read64(GUEST_IA32_EFER);
7042 +- secondary_exec_control = 0;
7043 +- if (cpu_has_secondary_exec_ctrls())
7044 +- secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
7045 +-
7046 +- pr_err("*** Guest State ***\n");
7047 +- pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
7048 +- vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
7049 +- vmcs_readl(CR0_GUEST_HOST_MASK));
7050 +- pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
7051 +- cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
7052 +- pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
7053 +- if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
7054 +- (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
7055 +- {
7056 +- pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n",
7057 +- vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
7058 +- pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n",
7059 +- vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
7060 +- }
7061 +- pr_err("RSP = 0x%016lx RIP = 0x%016lx\n",
7062 +- vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
7063 +- pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n",
7064 +- vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
7065 +- pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
7066 +- vmcs_readl(GUEST_SYSENTER_ESP),
7067 +- vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
7068 +- vmx_dump_sel("CS: ", GUEST_CS_SELECTOR);
7069 +- vmx_dump_sel("DS: ", GUEST_DS_SELECTOR);
7070 +- vmx_dump_sel("SS: ", GUEST_SS_SELECTOR);
7071 +- vmx_dump_sel("ES: ", GUEST_ES_SELECTOR);
7072 +- vmx_dump_sel("FS: ", GUEST_FS_SELECTOR);
7073 +- vmx_dump_sel("GS: ", GUEST_GS_SELECTOR);
7074 +- vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
7075 +- vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
7076 +- vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
7077 +- vmx_dump_sel("TR: ", GUEST_TR_SELECTOR);
7078 +- if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
7079 +- (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
7080 +- pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
7081 +- efer, vmcs_read64(GUEST_IA32_PAT));
7082 +- pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n",
7083 +- vmcs_read64(GUEST_IA32_DEBUGCTL),
7084 +- vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
7085 +- if (cpu_has_load_perf_global_ctrl() &&
7086 +- vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
7087 +- pr_err("PerfGlobCtl = 0x%016llx\n",
7088 +- vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
7089 +- if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
7090 +- pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
7091 +- pr_err("Interruptibility = %08x ActivityState = %08x\n",
7092 +- vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
7093 +- vmcs_read32(GUEST_ACTIVITY_STATE));
7094 +- if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
7095 +- pr_err("InterruptStatus = %04x\n",
7096 +- vmcs_read16(GUEST_INTR_STATUS));
7097 +-
7098 +- pr_err("*** Host State ***\n");
7099 +- pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
7100 +- vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
7101 +- pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
7102 +- vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
7103 +- vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
7104 +- vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
7105 +- vmcs_read16(HOST_TR_SELECTOR));
7106 +- pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
7107 +- vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
7108 +- vmcs_readl(HOST_TR_BASE));
7109 +- pr_err("GDTBase=%016lx IDTBase=%016lx\n",
7110 +- vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
7111 +- pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
7112 +- vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
7113 +- vmcs_readl(HOST_CR4));
7114 +- pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
7115 +- vmcs_readl(HOST_IA32_SYSENTER_ESP),
7116 +- vmcs_read32(HOST_IA32_SYSENTER_CS),
7117 +- vmcs_readl(HOST_IA32_SYSENTER_EIP));
7118 +- if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
7119 +- pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
7120 +- vmcs_read64(HOST_IA32_EFER),
7121 +- vmcs_read64(HOST_IA32_PAT));
7122 +- if (cpu_has_load_perf_global_ctrl() &&
7123 +- vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
7124 +- pr_err("PerfGlobCtl = 0x%016llx\n",
7125 +- vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
7126 +-
7127 +- pr_err("*** Control State ***\n");
7128 +- pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
7129 +- pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
7130 +- pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
7131 +- pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
7132 +- vmcs_read32(EXCEPTION_BITMAP),
7133 +- vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
7134 +- vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
7135 +- pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
7136 +- vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
7137 +- vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
7138 +- vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
7139 +- pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
7140 +- vmcs_read32(VM_EXIT_INTR_INFO),
7141 +- vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
7142 +- vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
7143 +- pr_err(" reason=%08x qualification=%016lx\n",
7144 +- vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
7145 +- pr_err("IDTVectoring: info=%08x errcode=%08x\n",
7146 +- vmcs_read32(IDT_VECTORING_INFO_FIELD),
7147 +- vmcs_read32(IDT_VECTORING_ERROR_CODE));
7148 +- pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
7149 +- if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
7150 +- pr_err("TSC Multiplier = 0x%016llx\n",
7151 +- vmcs_read64(TSC_MULTIPLIER));
7152 +- if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) {
7153 +- if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
7154 +- u16 status = vmcs_read16(GUEST_INTR_STATUS);
7155 +- pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff);
7156 +- }
7157 +- pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
7158 +- if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
7159 +- pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR));
7160 +- pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR));
7161 +- }
7162 +- if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
7163 +- pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
7164 +- if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
7165 +- pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
7166 +- n = vmcs_read32(CR3_TARGET_COUNT);
7167 +- for (i = 0; i + 1 < n; i += 4)
7168 +- pr_err("CR3 target%u=%016lx target%u=%016lx\n",
7169 +- i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
7170 +- i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
7171 +- if (i < n)
7172 +- pr_err("CR3 target%u=%016lx\n",
7173 +- i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
7174 +- if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
7175 +- pr_err("PLE Gap=%08x Window=%08x\n",
7176 +- vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
7177 +- if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
7178 +- pr_err("Virtual processor ID = 0x%04x\n",
7179 +- vmcs_read16(VIRTUAL_PROCESSOR_ID));
7180 +-}
7181 +-
7182 +-/*
7183 +- * The guest has exited. See if we can fix it or if we need userspace
7184 +- * assistance.
7185 +- */
7186 +-static int vmx_handle_exit(struct kvm_vcpu *vcpu,
7187 +- enum exit_fastpath_completion exit_fastpath)
7188 +-{
7189 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
7190 +- u32 exit_reason = vmx->exit_reason;
7191 +- u32 vectoring_info = vmx->idt_vectoring_info;
7192 +-
7193 +- trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
7194 +-
7195 +- /*
7196 +- * Flush logged GPAs PML buffer, this will make dirty_bitmap more
7197 +- * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
7198 +- * querying dirty_bitmap, we only need to kick all vcpus out of guest
7199 +- * mode as if vcpus is in root mode, the PML buffer must has been
7200 +- * flushed already.
7201 +- */
7202 +- if (enable_pml)
7203 +- vmx_flush_pml_buffer(vcpu);
7204 +-
7205 +- /* If guest state is invalid, start emulating */
7206 +- if (vmx->emulation_required)
7207 +- return handle_invalid_guest_state(vcpu);
7208 +-
7209 +- if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason))
7210 +- return nested_vmx_reflect_vmexit(vcpu, exit_reason);
7211 +-
7212 +- if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
7213 +- dump_vmcs();
7214 +- vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
7215 +- vcpu->run->fail_entry.hardware_entry_failure_reason
7216 +- = exit_reason;
7217 +- return 0;
7218 +- }
7219 +-
7220 +- if (unlikely(vmx->fail)) {
7221 +- dump_vmcs();
7222 +- vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
7223 +- vcpu->run->fail_entry.hardware_entry_failure_reason
7224 +- = vmcs_read32(VM_INSTRUCTION_ERROR);
7225 +- return 0;
7226 +- }
7227 +-
7228 +- /*
7229 +- * Note:
7230 +- * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
7231 +- * delivery event since it indicates guest is accessing MMIO.
7232 +- * The vm-exit can be triggered again after return to guest that
7233 +- * will cause infinite loop.
7234 +- */
7235 +- if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
7236 +- (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
7237 +- exit_reason != EXIT_REASON_EPT_VIOLATION &&
7238 +- exit_reason != EXIT_REASON_PML_FULL &&
7239 +- exit_reason != EXIT_REASON_TASK_SWITCH)) {
7240 +- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7241 +- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
7242 +- vcpu->run->internal.ndata = 3;
7243 +- vcpu->run->internal.data[0] = vectoring_info;
7244 +- vcpu->run->internal.data[1] = exit_reason;
7245 +- vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
7246 +- if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
7247 +- vcpu->run->internal.ndata++;
7248 +- vcpu->run->internal.data[3] =
7249 +- vmcs_read64(GUEST_PHYSICAL_ADDRESS);
7250 +- }
7251 +- return 0;
7252 +- }
7253 +-
7254 +- if (unlikely(!enable_vnmi &&
7255 +- vmx->loaded_vmcs->soft_vnmi_blocked)) {
7256 +- if (vmx_interrupt_allowed(vcpu)) {
7257 +- vmx->loaded_vmcs->soft_vnmi_blocked = 0;
7258 +- } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
7259 +- vcpu->arch.nmi_pending) {
7260 +- /*
7261 +- * This CPU don't support us in finding the end of an
7262 +- * NMI-blocked window if the guest runs with IRQs
7263 +- * disabled. So we pull the trigger after 1 s of
7264 +- * futile waiting, but inform the user about this.
7265 +- */
7266 +- printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
7267 +- "state on VCPU %d after 1 s timeout\n",
7268 +- __func__, vcpu->vcpu_id);
7269 +- vmx->loaded_vmcs->soft_vnmi_blocked = 0;
7270 +- }
7271 +- }
7272 +-
7273 +- if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) {
7274 +- kvm_skip_emulated_instruction(vcpu);
7275 +- return 1;
7276 +- } else if (exit_reason < kvm_vmx_max_exit_handlers
7277 +- && kvm_vmx_exit_handlers[exit_reason]) {
7278 +-#ifdef CONFIG_RETPOLINE
7279 +- if (exit_reason == EXIT_REASON_MSR_WRITE)
7280 +- return kvm_emulate_wrmsr(vcpu);
7281 +- else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER)
7282 +- return handle_preemption_timer(vcpu);
7283 +- else if (exit_reason == EXIT_REASON_INTERRUPT_WINDOW)
7284 +- return handle_interrupt_window(vcpu);
7285 +- else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
7286 +- return handle_external_interrupt(vcpu);
7287 +- else if (exit_reason == EXIT_REASON_HLT)
7288 +- return kvm_emulate_halt(vcpu);
7289 +- else if (exit_reason == EXIT_REASON_EPT_MISCONFIG)
7290 +- return handle_ept_misconfig(vcpu);
7291 +-#endif
7292 +- return kvm_vmx_exit_handlers[exit_reason](vcpu);
7293 +- } else {
7294 +- vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
7295 +- exit_reason);
7296 +- dump_vmcs();
7297 +- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7298 +- vcpu->run->internal.suberror =
7299 +- KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
7300 +- vcpu->run->internal.ndata = 1;
7301 +- vcpu->run->internal.data[0] = exit_reason;
7302 +- return 0;
7303 +- }
7304 +-}
7305 +-
7306 +-/*
7307 +- * Software based L1D cache flush which is used when microcode providing
7308 +- * the cache control MSR is not loaded.
7309 +- *
7310 +- * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
7311 +- * flush it is required to read in 64 KiB because the replacement algorithm
7312 +- * is not exactly LRU. This could be sized at runtime via topology
7313 +- * information but as all relevant affected CPUs have 32KiB L1D cache size
7314 +- * there is no point in doing so.
7315 +- */
7316 +-static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
7317 +-{
7318 +- int size = PAGE_SIZE << L1D_CACHE_ORDER;
7319 +-
7320 +- /*
7321 +- * This code is only executed when the the flush mode is 'cond' or
7322 +- * 'always'
7323 +- */
7324 +- if (static_branch_likely(&vmx_l1d_flush_cond)) {
7325 +- bool flush_l1d;
7326 +-
7327 +- /*
7328 +- * Clear the per-vcpu flush bit, it gets set again
7329 +- * either from vcpu_run() or from one of the unsafe
7330 +- * VMEXIT handlers.
7331 +- */
7332 +- flush_l1d = vcpu->arch.l1tf_flush_l1d;
7333 +- vcpu->arch.l1tf_flush_l1d = false;
7334 +-
7335 +- /*
7336 +- * Clear the per-cpu flush bit, it gets set again from
7337 +- * the interrupt handlers.
7338 +- */
7339 +- flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
7340 +- kvm_clear_cpu_l1tf_flush_l1d();
7341 +-
7342 +- if (!flush_l1d)
7343 +- return;
7344 +- }
7345 +-
7346 +- vcpu->stat.l1d_flush++;
7347 +-
7348 +- if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
7349 +- wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
7350 +- return;
7351 +- }
7352 +-
7353 +- asm volatile(
7354 +- /* First ensure the pages are in the TLB */
7355 +- "xorl %%eax, %%eax\n"
7356 +- ".Lpopulate_tlb:\n\t"
7357 +- "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
7358 +- "addl $4096, %%eax\n\t"
7359 +- "cmpl %%eax, %[size]\n\t"
7360 +- "jne .Lpopulate_tlb\n\t"
7361 +- "xorl %%eax, %%eax\n\t"
7362 +- "cpuid\n\t"
7363 +- /* Now fill the cache */
7364 +- "xorl %%eax, %%eax\n"
7365 +- ".Lfill_cache:\n"
7366 +- "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
7367 +- "addl $64, %%eax\n\t"
7368 +- "cmpl %%eax, %[size]\n\t"
7369 +- "jne .Lfill_cache\n\t"
7370 +- "lfence\n"
7371 +- :: [flush_pages] "r" (vmx_l1d_flush_pages),
7372 +- [size] "r" (size)
7373 +- : "eax", "ebx", "ecx", "edx");
7374 +-}
7375 +-
7376 +-static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
7377 +-{
7378 +- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7379 +- int tpr_threshold;
7380 +-
7381 +- if (is_guest_mode(vcpu) &&
7382 +- nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
7383 +- return;
7384 +-
7385 +- tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr;
7386 +- if (is_guest_mode(vcpu))
7387 +- to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold;
7388 +- else
7389 +- vmcs_write32(TPR_THRESHOLD, tpr_threshold);
7390 +-}
7391 +-
7392 +-void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
7393 +-{
7394 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
7395 +- u32 sec_exec_control;
7396 +-
7397 +- if (!lapic_in_kernel(vcpu))
7398 +- return;
7399 +-
7400 +- if (!flexpriority_enabled &&
7401 +- !cpu_has_vmx_virtualize_x2apic_mode())
7402 +- return;
7403 +-
7404 +- /* Postpone execution until vmcs01 is the current VMCS. */
7405 +- if (is_guest_mode(vcpu)) {
7406 +- vmx->nested.change_vmcs01_virtual_apic_mode = true;
7407 +- return;
7408 +- }
7409 +-
7410 +- sec_exec_control = secondary_exec_controls_get(vmx);
7411 +- sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
7412 +- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
7413 +-
7414 +- switch (kvm_get_apic_mode(vcpu)) {
7415 +- case LAPIC_MODE_INVALID:
7416 +- WARN_ONCE(true, "Invalid local APIC state");
7417 +- case LAPIC_MODE_DISABLED:
7418 +- break;
7419 +- case LAPIC_MODE_XAPIC:
7420 +- if (flexpriority_enabled) {
7421 +- sec_exec_control |=
7422 +- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
7423 +- vmx_flush_tlb(vcpu, true);
7424 +- }
7425 +- break;
7426 +- case LAPIC_MODE_X2APIC:
7427 +- if (cpu_has_vmx_virtualize_x2apic_mode())
7428 +- sec_exec_control |=
7429 +- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
7430 +- break;
7431 +- }
7432 +- secondary_exec_controls_set(vmx, sec_exec_control);
7433 +-
7434 +- vmx_update_msr_bitmap(vcpu);
7435 +-}
7436 +-
7437 +-static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
7438 +-{
7439 +- if (!is_guest_mode(vcpu)) {
7440 +- vmcs_write64(APIC_ACCESS_ADDR, hpa);
7441 +- vmx_flush_tlb(vcpu, true);
7442 +- }
7443 +-}
7444 +-
7445 +-static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
7446 +-{
7447 +- u16 status;
7448 +- u8 old;
7449 +-
7450 +- if (max_isr == -1)
7451 +- max_isr = 0;
7452 +-
7453 +- status = vmcs_read16(GUEST_INTR_STATUS);
7454 +- old = status >> 8;
7455 +- if (max_isr != old) {
7456 +- status &= 0xff;
7457 +- status |= max_isr << 8;
7458 +- vmcs_write16(GUEST_INTR_STATUS, status);
7459 +- }
7460 +-}
7461 +-
7462 +-static void vmx_set_rvi(int vector)
7463 +-{
7464 +- u16 status;
7465 +- u8 old;
7466 +-
7467 +- if (vector == -1)
7468 +- vector = 0;
7469 +-
7470 +- status = vmcs_read16(GUEST_INTR_STATUS);
7471 +- old = (u8)status & 0xff;
7472 +- if ((u8)vector != old) {
7473 +- status &= ~0xff;
7474 +- status |= (u8)vector;
7475 +- vmcs_write16(GUEST_INTR_STATUS, status);
7476 +- }
7477 +-}
7478 +-
7479 +-static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
7480 +-{
7481 +- /*
7482 +- * When running L2, updating RVI is only relevant when
7483 +- * vmcs12 virtual-interrupt-delivery enabled.
7484 +- * However, it can be enabled only when L1 also
7485 +- * intercepts external-interrupts and in that case
7486 +- * we should not update vmcs02 RVI but instead intercept
7487 +- * interrupt. Therefore, do nothing when running L2.
7488 +- */
7489 +- if (!is_guest_mode(vcpu))
7490 +- vmx_set_rvi(max_irr);
7491 +-}
7492 +-
7493 +-static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
7494 +-{
7495 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
7496 +- int max_irr;
7497 +- bool max_irr_updated;
7498 +-
7499 +- WARN_ON(!vcpu->arch.apicv_active);
7500 +- if (pi_test_on(&vmx->pi_desc)) {
7501 +- pi_clear_on(&vmx->pi_desc);
7502 +- /*
7503 +- * IOMMU can write to PID.ON, so the barrier matters even on UP.
7504 +- * But on x86 this is just a compiler barrier anyway.
7505 +- */
7506 +- smp_mb__after_atomic();
7507 +- max_irr_updated =
7508 +- kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
7509 +-
7510 +- /*
7511 +- * If we are running L2 and L1 has a new pending interrupt
7512 +- * which can be injected, we should re-evaluate
7513 +- * what should be done with this new L1 interrupt.
7514 +- * If L1 intercepts external-interrupts, we should
7515 +- * exit from L2 to L1. Otherwise, interrupt should be
7516 +- * delivered directly to L2.
7517 +- */
7518 +- if (is_guest_mode(vcpu) && max_irr_updated) {
7519 +- if (nested_exit_on_intr(vcpu))
7520 +- kvm_vcpu_exiting_guest_mode(vcpu);
7521 +- else
7522 +- kvm_make_request(KVM_REQ_EVENT, vcpu);
7523 +- }
7524 +- } else {
7525 +- max_irr = kvm_lapic_find_highest_irr(vcpu);
7526 +- }
7527 +- vmx_hwapic_irr_update(vcpu, max_irr);
7528 +- return max_irr;
7529 +-}
7530 +-
7531 +-static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
7532 +-{
7533 +- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
7534 +-
7535 +- return pi_test_on(pi_desc) ||
7536 +- (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
7537 +-}
7538 +-
7539 +-static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
7540 +-{
7541 +- if (!kvm_vcpu_apicv_active(vcpu))
7542 +- return;
7543 +-
7544 +- vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
7545 +- vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
7546 +- vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
7547 +- vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
7548 +-}
7549 +-
7550 +-static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
7551 +-{
7552 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
7553 +-
7554 +- pi_clear_on(&vmx->pi_desc);
7555 +- memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
7556 +-}
7557 +-
7558 +-static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
7559 +-{
7560 +- vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
7561 +-
7562 +- /* if exit due to PF check for async PF */
7563 +- if (is_page_fault(vmx->exit_intr_info))
7564 +- vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
7565 +-
7566 +- /* Handle machine checks before interrupts are enabled */
7567 +- if (is_machine_check(vmx->exit_intr_info))
7568 +- kvm_machine_check();
7569 +-
7570 +- /* We need to handle NMIs before interrupts are enabled */
7571 +- if (is_nmi(vmx->exit_intr_info)) {
7572 +- kvm_before_interrupt(&vmx->vcpu);
7573 +- asm("int $2");
7574 +- kvm_after_interrupt(&vmx->vcpu);
7575 +- }
7576 +-}
7577 +-
7578 +-static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
7579 +-{
7580 +- unsigned int vector;
7581 +- unsigned long entry;
7582 +-#ifdef CONFIG_X86_64
7583 +- unsigned long tmp;
7584 +-#endif
7585 +- gate_desc *desc;
7586 +- u32 intr_info;
7587 +-
7588 +- intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
7589 +- if (WARN_ONCE(!is_external_intr(intr_info),
7590 +- "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
7591 +- return;
7592 +-
7593 +- vector = intr_info & INTR_INFO_VECTOR_MASK;
7594 +- desc = (gate_desc *)host_idt_base + vector;
7595 +- entry = gate_offset(desc);
7596 +-
7597 +- kvm_before_interrupt(vcpu);
7598 +-
7599 +- asm volatile(
7600 +-#ifdef CONFIG_X86_64
7601 +- "mov %%" _ASM_SP ", %[sp]\n\t"
7602 +- "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
7603 +- "push $%c[ss]\n\t"
7604 +- "push %[sp]\n\t"
7605 +-#endif
7606 +- "pushf\n\t"
7607 +- __ASM_SIZE(push) " $%c[cs]\n\t"
7608 +- CALL_NOSPEC
7609 +- :
7610 +-#ifdef CONFIG_X86_64
7611 +- [sp]"=&r"(tmp),
7612 +-#endif
7613 +- ASM_CALL_CONSTRAINT
7614 +- :
7615 +- THUNK_TARGET(entry),
7616 +- [ss]"i"(__KERNEL_DS),
7617 +- [cs]"i"(__KERNEL_CS)
7618 +- );
7619 +-
7620 +- kvm_after_interrupt(vcpu);
7621 +-}
7622 +-STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
7623 +-
7624 +-static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
7625 +- enum exit_fastpath_completion *exit_fastpath)
7626 +-{
7627 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
7628 +-
7629 +- if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
7630 +- handle_external_interrupt_irqoff(vcpu);
7631 +- else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
7632 +- handle_exception_nmi_irqoff(vmx);
7633 +- else if (!is_guest_mode(vcpu) &&
7634 +- vmx->exit_reason == EXIT_REASON_MSR_WRITE)
7635 +- *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
7636 +-}
7637 +-
7638 +-static bool vmx_has_emulated_msr(int index)
7639 +-{
7640 +- switch (index) {
7641 +- case MSR_IA32_SMBASE:
7642 +- /*
7643 +- * We cannot do SMM unless we can run the guest in big
7644 +- * real mode.
7645 +- */
7646 +- return enable_unrestricted_guest || emulate_invalid_guest_state;
7647 +- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
7648 +- return nested;
7649 +- case MSR_AMD64_VIRT_SPEC_CTRL:
7650 +- /* This is AMD only. */
7651 +- return false;
7652 +- default:
7653 +- return true;
7654 +- }
7655 +-}
7656 +-
7657 +-static bool vmx_pt_supported(void)
7658 +-{
7659 +- return pt_mode == PT_MODE_HOST_GUEST;
7660 +-}
7661 +-
7662 +-static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
7663 +-{
7664 +- u32 exit_intr_info;
7665 +- bool unblock_nmi;
7666 +- u8 vector;
7667 +- bool idtv_info_valid;
7668 +-
7669 +- idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
7670 +-
7671 +- if (enable_vnmi) {
7672 +- if (vmx->loaded_vmcs->nmi_known_unmasked)
7673 +- return;
7674 +- /*
7675 +- * Can't use vmx->exit_intr_info since we're not sure what
7676 +- * the exit reason is.
7677 +- */
7678 +- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
7679 +- unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
7680 +- vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
7681 +- /*
7682 +- * SDM 3: 27.7.1.2 (September 2008)
7683 +- * Re-set bit "block by NMI" before VM entry if vmexit caused by
7684 +- * a guest IRET fault.
7685 +- * SDM 3: 23.2.2 (September 2008)
7686 +- * Bit 12 is undefined in any of the following cases:
7687 +- * If the VM exit sets the valid bit in the IDT-vectoring
7688 +- * information field.
7689 +- * If the VM exit is due to a double fault.
7690 +- */
7691 +- if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
7692 +- vector != DF_VECTOR && !idtv_info_valid)
7693 +- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
7694 +- GUEST_INTR_STATE_NMI);
7695 +- else
7696 +- vmx->loaded_vmcs->nmi_known_unmasked =
7697 +- !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
7698 +- & GUEST_INTR_STATE_NMI);
7699 +- } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
7700 +- vmx->loaded_vmcs->vnmi_blocked_time +=
7701 +- ktime_to_ns(ktime_sub(ktime_get(),
7702 +- vmx->loaded_vmcs->entry_time));
7703 +-}
7704 +-
7705 +-static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
7706 +- u32 idt_vectoring_info,
7707 +- int instr_len_field,
7708 +- int error_code_field)
7709 +-{
7710 +- u8 vector;
7711 +- int type;
7712 +- bool idtv_info_valid;
7713 +-
7714 +- idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
7715 +-
7716 +- vcpu->arch.nmi_injected = false;
7717 +- kvm_clear_exception_queue(vcpu);
7718 +- kvm_clear_interrupt_queue(vcpu);
7719 +-
7720 +- if (!idtv_info_valid)
7721 +- return;
7722 +-
7723 +- kvm_make_request(KVM_REQ_EVENT, vcpu);
7724 +-
7725 +- vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
7726 +- type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
7727 +-
7728 +- switch (type) {
7729 +- case INTR_TYPE_NMI_INTR:
7730 +- vcpu->arch.nmi_injected = true;
7731 +- /*
7732 +- * SDM 3: 27.7.1.2 (September 2008)
7733 +- * Clear bit "block by NMI" before VM entry if a NMI
7734 +- * delivery faulted.
7735 +- */
7736 +- vmx_set_nmi_mask(vcpu, false);
7737 +- break;
7738 +- case INTR_TYPE_SOFT_EXCEPTION:
7739 +- vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
7740 +- /* fall through */
7741 +- case INTR_TYPE_HARD_EXCEPTION:
7742 +- if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
7743 +- u32 err = vmcs_read32(error_code_field);
7744 +- kvm_requeue_exception_e(vcpu, vector, err);
7745 +- } else
7746 +- kvm_requeue_exception(vcpu, vector);
7747 +- break;
7748 +- case INTR_TYPE_SOFT_INTR:
7749 +- vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
7750 +- /* fall through */
7751 +- case INTR_TYPE_EXT_INTR:
7752 +- kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
7753 +- break;
7754 +- default:
7755 +- break;
7756 +- }
7757 +-}
7758 +-
7759 +-static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
7760 +-{
7761 +- __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
7762 +- VM_EXIT_INSTRUCTION_LEN,
7763 +- IDT_VECTORING_ERROR_CODE);
7764 +-}
7765 +-
7766 +-static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
7767 +-{
7768 +- __vmx_complete_interrupts(vcpu,
7769 +- vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
7770 +- VM_ENTRY_INSTRUCTION_LEN,
7771 +- VM_ENTRY_EXCEPTION_ERROR_CODE);
7772 +-
7773 +- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
7774 +-}
7775 +-
7776 +-static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
7777 +-{
7778 +- int i, nr_msrs;
7779 +- struct perf_guest_switch_msr *msrs;
7780 +-
7781 +- msrs = perf_guest_get_msrs(&nr_msrs);
7782 +-
7783 +- if (!msrs)
7784 +- return;
7785 +-
7786 +- for (i = 0; i < nr_msrs; i++)
7787 +- if (msrs[i].host == msrs[i].guest)
7788 +- clear_atomic_switch_msr(vmx, msrs[i].msr);
7789 +- else
7790 +- add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
7791 +- msrs[i].host, false);
7792 +-}
7793 +-
7794 +-static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx)
7795 +-{
7796 +- u32 host_umwait_control;
7797 +-
7798 +- if (!vmx_has_waitpkg(vmx))
7799 +- return;
7800 +-
7801 +- host_umwait_control = get_umwait_control_msr();
7802 +-
7803 +- if (vmx->msr_ia32_umwait_control != host_umwait_control)
7804 +- add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL,
7805 +- vmx->msr_ia32_umwait_control,
7806 +- host_umwait_control, false);
7807 +- else
7808 +- clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL);
7809 +-}
7810 +-
7811 +-static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
7812 +-{
7813 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
7814 +- u64 tscl;
7815 +- u32 delta_tsc;
7816 +-
7817 +- if (vmx->req_immediate_exit) {
7818 +- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
7819 +- vmx->loaded_vmcs->hv_timer_soft_disabled = false;
7820 +- } else if (vmx->hv_deadline_tsc != -1) {
7821 +- tscl = rdtsc();
7822 +- if (vmx->hv_deadline_tsc > tscl)
7823 +- /* set_hv_timer ensures the delta fits in 32-bits */
7824 +- delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
7825 +- cpu_preemption_timer_multi);
7826 +- else
7827 +- delta_tsc = 0;
7828 +-
7829 +- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
7830 +- vmx->loaded_vmcs->hv_timer_soft_disabled = false;
7831 +- } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) {
7832 +- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1);
7833 +- vmx->loaded_vmcs->hv_timer_soft_disabled = true;
7834 +- }
7835 +-}
7836 +-
7837 +-void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
7838 +-{
7839 +- if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) {
7840 +- vmx->loaded_vmcs->host_state.rsp = host_rsp;
7841 +- vmcs_writel(HOST_RSP, host_rsp);
7842 +- }
7843 +-}
7844 +-
7845 +-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
7846 +-
7847 +-static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
7848 +-{
7849 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
7850 +- unsigned long cr3, cr4;
7851 +-
7852 +- /* Record the guest's net vcpu time for enforced NMI injections. */
7853 +- if (unlikely(!enable_vnmi &&
7854 +- vmx->loaded_vmcs->soft_vnmi_blocked))
7855 +- vmx->loaded_vmcs->entry_time = ktime_get();
7856 +-
7857 +- /* Don't enter VMX if guest state is invalid, let the exit handler
7858 +- start emulation until we arrive back to a valid state */
7859 +- if (vmx->emulation_required)
7860 +- return;
7861 +-
7862 +- if (vmx->ple_window_dirty) {
7863 +- vmx->ple_window_dirty = false;
7864 +- vmcs_write32(PLE_WINDOW, vmx->ple_window);
7865 +- }
7866 +-
7867 +- if (vmx->nested.need_vmcs12_to_shadow_sync)
7868 +- nested_sync_vmcs12_to_shadow(vcpu);
7869 +-
7870 +- if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP))
7871 +- vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
7872 +- if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
7873 +- vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
7874 +-
7875 +- cr3 = __get_current_cr3_fast();
7876 +- if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
7877 +- vmcs_writel(HOST_CR3, cr3);
7878 +- vmx->loaded_vmcs->host_state.cr3 = cr3;
7879 +- }
7880 +-
7881 +- cr4 = cr4_read_shadow();
7882 +- if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
7883 +- vmcs_writel(HOST_CR4, cr4);
7884 +- vmx->loaded_vmcs->host_state.cr4 = cr4;
7885 +- }
7886 +-
7887 +- /* When single-stepping over STI and MOV SS, we must clear the
7888 +- * corresponding interruptibility bits in the guest state. Otherwise
7889 +- * vmentry fails as it then expects bit 14 (BS) in pending debug
7890 +- * exceptions being set, but that's not correct for the guest debugging
7891 +- * case. */
7892 +- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
7893 +- vmx_set_interrupt_shadow(vcpu, 0);
7894 +-
7895 +- kvm_load_guest_xsave_state(vcpu);
7896 +-
7897 +- if (static_cpu_has(X86_FEATURE_PKU) &&
7898 +- kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
7899 +- vcpu->arch.pkru != vmx->host_pkru)
7900 +- __write_pkru(vcpu->arch.pkru);
7901 +-
7902 +- pt_guest_enter(vmx);
7903 +-
7904 +- atomic_switch_perf_msrs(vmx);
7905 +- atomic_switch_umwait_control_msr(vmx);
7906 +-
7907 +- if (enable_preemption_timer)
7908 +- vmx_update_hv_timer(vcpu);
7909 +-
7910 +- if (lapic_in_kernel(vcpu) &&
7911 +- vcpu->arch.apic->lapic_timer.timer_advance_ns)
7912 +- kvm_wait_lapic_expire(vcpu);
7913 +-
7914 +- /*
7915 +- * If this vCPU has touched SPEC_CTRL, restore the guest's value if
7916 +- * it's non-zero. Since vmentry is serialising on affected CPUs, there
7917 +- * is no need to worry about the conditional branch over the wrmsr
7918 +- * being speculatively taken.
7919 +- */
7920 +- x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
7921 +-
7922 +- /* L1D Flush includes CPU buffer clear to mitigate MDS */
7923 +- if (static_branch_unlikely(&vmx_l1d_should_flush))
7924 +- vmx_l1d_flush(vcpu);
7925 +- else if (static_branch_unlikely(&mds_user_clear))
7926 +- mds_clear_cpu_buffers();
7927 +-
7928 +- if (vcpu->arch.cr2 != read_cr2())
7929 +- write_cr2(vcpu->arch.cr2);
7930 +-
7931 +- vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
7932 +- vmx->loaded_vmcs->launched);
7933 +-
7934 +- vcpu->arch.cr2 = read_cr2();
7935 +-
7936 +- /*
7937 +- * We do not use IBRS in the kernel. If this vCPU has used the
7938 +- * SPEC_CTRL MSR it may have left it on; save the value and
7939 +- * turn it off. This is much more efficient than blindly adding
7940 +- * it to the atomic save/restore list. Especially as the former
7941 +- * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
7942 +- *
7943 +- * For non-nested case:
7944 +- * If the L01 MSR bitmap does not intercept the MSR, then we need to
7945 +- * save it.
7946 +- *
7947 +- * For nested case:
7948 +- * If the L02 MSR bitmap does not intercept the MSR, then we need to
7949 +- * save it.
7950 +- */
7951 +- if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
7952 +- vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
7953 +-
7954 +- x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
7955 +-
7956 +- /* All fields are clean at this point */
7957 +- if (static_branch_unlikely(&enable_evmcs))
7958 +- current_evmcs->hv_clean_fields |=
7959 +- HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
7960 +-
7961 +- if (static_branch_unlikely(&enable_evmcs))
7962 +- current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index;
7963 +-
7964 +- /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
7965 +- if (vmx->host_debugctlmsr)
7966 +- update_debugctlmsr(vmx->host_debugctlmsr);
7967 +-
7968 +-#ifndef CONFIG_X86_64
7969 +- /*
7970 +- * The sysexit path does not restore ds/es, so we must set them to
7971 +- * a reasonable value ourselves.
7972 +- *
7973 +- * We can't defer this to vmx_prepare_switch_to_host() since that
7974 +- * function may be executed in interrupt context, which saves and
7975 +- * restore segments around it, nullifying its effect.
7976 +- */
7977 +- loadsegment(ds, __USER_DS);
7978 +- loadsegment(es, __USER_DS);
7979 +-#endif
7980 +-
7981 +- vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
7982 +- | (1 << VCPU_EXREG_RFLAGS)
7983 +- | (1 << VCPU_EXREG_PDPTR)
7984 +- | (1 << VCPU_EXREG_SEGMENTS)
7985 +- | (1 << VCPU_EXREG_CR3));
7986 +- vcpu->arch.regs_dirty = 0;
7987 +-
7988 +- pt_guest_exit(vmx);
7989 +-
7990 +- /*
7991 +- * eager fpu is enabled if PKEY is supported and CR4 is switched
7992 +- * back on host, so it is safe to read guest PKRU from current
7993 +- * XSAVE.
7994 +- */
7995 +- if (static_cpu_has(X86_FEATURE_PKU) &&
7996 +- kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
7997 +- vcpu->arch.pkru = rdpkru();
7998 +- if (vcpu->arch.pkru != vmx->host_pkru)
7999 +- __write_pkru(vmx->host_pkru);
8000 +- }
8001 +-
8002 +- kvm_load_host_xsave_state(vcpu);
8003 +-
8004 +- vmx->nested.nested_run_pending = 0;
8005 +- vmx->idt_vectoring_info = 0;
8006 +-
8007 +- vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
8008 +- if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
8009 +- kvm_machine_check();
8010 +-
8011 +- if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
8012 +- return;
8013 +-
8014 +- vmx->loaded_vmcs->launched = 1;
8015 +- vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
8016 +-
8017 +- vmx_recover_nmi_blocking(vmx);
8018 +- vmx_complete_interrupts(vmx);
8019 +-}
8020 +-
8021 +-static struct kvm *vmx_vm_alloc(void)
8022 +-{
8023 +- struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx),
8024 +- GFP_KERNEL_ACCOUNT | __GFP_ZERO,
8025 +- PAGE_KERNEL);
8026 +- return &kvm_vmx->kvm;
8027 +-}
8028 +-
8029 +-static void vmx_vm_free(struct kvm *kvm)
8030 +-{
8031 +- kfree(kvm->arch.hyperv.hv_pa_pg);
8032 +- vfree(to_kvm_vmx(kvm));
8033 +-}
8034 +-
8035 +-static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
8036 +-{
8037 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
8038 +-
8039 +- if (enable_pml)
8040 +- vmx_destroy_pml_buffer(vmx);
8041 +- free_vpid(vmx->vpid);
8042 +- nested_vmx_free_vcpu(vcpu);
8043 +- free_loaded_vmcs(vmx->loaded_vmcs);
8044 +- kvm_vcpu_uninit(vcpu);
8045 +- kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
8046 +- kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
8047 +- kmem_cache_free(kvm_vcpu_cache, vmx);
8048 +-}
8049 +-
8050 +-static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
8051 +-{
8052 +- int err;
8053 +- struct vcpu_vmx *vmx;
8054 +- unsigned long *msr_bitmap;
8055 +- int i, cpu;
8056 +-
8057 +- BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
8058 +- "struct kvm_vcpu must be at offset 0 for arch usercopy region");
8059 +-
8060 +- vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
8061 +- if (!vmx)
8062 +- return ERR_PTR(-ENOMEM);
8063 +-
8064 +- vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
8065 +- GFP_KERNEL_ACCOUNT);
8066 +- if (!vmx->vcpu.arch.user_fpu) {
8067 +- printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
8068 +- err = -ENOMEM;
8069 +- goto free_partial_vcpu;
8070 +- }
8071 +-
8072 +- vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
8073 +- GFP_KERNEL_ACCOUNT);
8074 +- if (!vmx->vcpu.arch.guest_fpu) {
8075 +- printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
8076 +- err = -ENOMEM;
8077 +- goto free_user_fpu;
8078 +- }
8079 +-
8080 +- vmx->vpid = allocate_vpid();
8081 +-
8082 +- err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
8083 +- if (err)
8084 +- goto free_vcpu;
8085 +-
8086 +- err = -ENOMEM;
8087 +-
8088 +- /*
8089 +- * If PML is turned on, failure on enabling PML just results in failure
8090 +- * of creating the vcpu, therefore we can simplify PML logic (by
8091 +- * avoiding dealing with cases, such as enabling PML partially on vcpus
8092 +- * for the guest), etc.
8093 +- */
8094 +- if (enable_pml) {
8095 +- vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
8096 +- if (!vmx->pml_pg)
8097 +- goto uninit_vcpu;
8098 +- }
8099 +-
8100 +- BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) != NR_SHARED_MSRS);
8101 +-
8102 +- for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
8103 +- u32 index = vmx_msr_index[i];
8104 +- u32 data_low, data_high;
8105 +- int j = vmx->nmsrs;
8106 +-
8107 +- if (rdmsr_safe(index, &data_low, &data_high) < 0)
8108 +- continue;
8109 +- if (wrmsr_safe(index, data_low, data_high) < 0)
8110 +- continue;
8111 +-
8112 +- vmx->guest_msrs[j].index = i;
8113 +- vmx->guest_msrs[j].data = 0;
8114 +- switch (index) {
8115 +- case MSR_IA32_TSX_CTRL:
8116 +- /*
8117 +- * No need to pass TSX_CTRL_CPUID_CLEAR through, so
8118 +- * let's avoid changing CPUID bits under the host
8119 +- * kernel's feet.
8120 +- */
8121 +- vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
8122 +- break;
8123 +- default:
8124 +- vmx->guest_msrs[j].mask = -1ull;
8125 +- break;
8126 +- }
8127 +- ++vmx->nmsrs;
8128 +- }
8129 +-
8130 +- err = alloc_loaded_vmcs(&vmx->vmcs01);
8131 +- if (err < 0)
8132 +- goto free_pml;
8133 +-
8134 +- msr_bitmap = vmx->vmcs01.msr_bitmap;
8135 +- vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
8136 +- vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
8137 +- vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
8138 +- vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
8139 +- vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
8140 +- vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
8141 +- vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
8142 +- if (kvm_cstate_in_guest(kvm)) {
8143 +- vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
8144 +- vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
8145 +- vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
8146 +- vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
8147 +- }
8148 +- vmx->msr_bitmap_mode = 0;
8149 +-
8150 +- vmx->loaded_vmcs = &vmx->vmcs01;
8151 +- cpu = get_cpu();
8152 +- vmx_vcpu_load(&vmx->vcpu, cpu);
8153 +- vmx->vcpu.cpu = cpu;
8154 +- init_vmcs(vmx);
8155 +- vmx_vcpu_put(&vmx->vcpu);
8156 +- put_cpu();
8157 +- if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
8158 +- err = alloc_apic_access_page(kvm);
8159 +- if (err)
8160 +- goto free_vmcs;
8161 +- }
8162 +-
8163 +- if (enable_ept && !enable_unrestricted_guest) {
8164 +- err = init_rmode_identity_map(kvm);
8165 +- if (err)
8166 +- goto free_vmcs;
8167 +- }
8168 +-
8169 +- if (nested)
8170 +- nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
8171 +- vmx_capability.ept,
8172 +- kvm_vcpu_apicv_active(&vmx->vcpu));
8173 +- else
8174 +- memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
8175 +-
8176 +- vmx->nested.posted_intr_nv = -1;
8177 +- vmx->nested.current_vmptr = -1ull;
8178 +-
8179 +- vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
8180 +-
8181 +- /*
8182 +- * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
8183 +- * or POSTED_INTR_WAKEUP_VECTOR.
8184 +- */
8185 +- vmx->pi_desc.nv = POSTED_INTR_VECTOR;
8186 +- vmx->pi_desc.sn = 1;
8187 +-
8188 +- vmx->ept_pointer = INVALID_PAGE;
8189 +-
8190 +- return &vmx->vcpu;
8191 +-
8192 +-free_vmcs:
8193 +- free_loaded_vmcs(vmx->loaded_vmcs);
8194 +-free_pml:
8195 +- vmx_destroy_pml_buffer(vmx);
8196 +-uninit_vcpu:
8197 +- kvm_vcpu_uninit(&vmx->vcpu);
8198 +-free_vcpu:
8199 +- free_vpid(vmx->vpid);
8200 +- kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
8201 +-free_user_fpu:
8202 +- kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
8203 +-free_partial_vcpu:
8204 +- kmem_cache_free(kvm_vcpu_cache, vmx);
8205 +- return ERR_PTR(err);
8206 +-}
8207 +-
8208 +-#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
8209 +-#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
8210 +-
8211 +-static int vmx_vm_init(struct kvm *kvm)
8212 +-{
8213 +- spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
8214 +-
8215 +- if (!ple_gap)
8216 +- kvm->arch.pause_in_guest = true;
8217 +-
8218 +- if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) {
8219 +- switch (l1tf_mitigation) {
8220 +- case L1TF_MITIGATION_OFF:
8221 +- case L1TF_MITIGATION_FLUSH_NOWARN:
8222 +- /* 'I explicitly don't care' is set */
8223 +- break;
8224 +- case L1TF_MITIGATION_FLUSH:
8225 +- case L1TF_MITIGATION_FLUSH_NOSMT:
8226 +- case L1TF_MITIGATION_FULL:
8227 +- /*
8228 +- * Warn upon starting the first VM in a potentially
8229 +- * insecure environment.
8230 +- */
8231 +- if (sched_smt_active())
8232 +- pr_warn_once(L1TF_MSG_SMT);
8233 +- if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
8234 +- pr_warn_once(L1TF_MSG_L1D);
8235 +- break;
8236 +- case L1TF_MITIGATION_FULL_FORCE:
8237 +- /* Flush is enforced */
8238 +- break;
8239 +- }
8240 +- }
8241 +- return 0;
8242 +-}
8243 +-
8244 +-static int __init vmx_check_processor_compat(void)
8245 +-{
8246 +- struct vmcs_config vmcs_conf;
8247 +- struct vmx_capability vmx_cap;
8248 +-
8249 +- if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
8250 +- return -EIO;
8251 +- if (nested)
8252 +- nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
8253 +- enable_apicv);
8254 +- if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
8255 +- printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
8256 +- smp_processor_id());
8257 +- return -EIO;
8258 +- }
8259 +- return 0;
8260 +-}
8261 +-
8262 +-static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
8263 +-{
8264 +- u8 cache;
8265 +- u64 ipat = 0;
8266 +-
8267 +- /* For VT-d and EPT combination
8268 +- * 1. MMIO: always map as UC
8269 +- * 2. EPT with VT-d:
8270 +- * a. VT-d without snooping control feature: can't guarantee the
8271 +- * result, try to trust guest.
8272 +- * b. VT-d with snooping control feature: snooping control feature of
8273 +- * VT-d engine can guarantee the cache correctness. Just set it
8274 +- * to WB to keep consistent with host. So the same as item 3.
8275 +- * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
8276 +- * consistent with host MTRR
8277 +- */
8278 +- if (is_mmio) {
8279 +- cache = MTRR_TYPE_UNCACHABLE;
8280 +- goto exit;
8281 +- }
8282 +-
8283 +- if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
8284 +- ipat = VMX_EPT_IPAT_BIT;
8285 +- cache = MTRR_TYPE_WRBACK;
8286 +- goto exit;
8287 +- }
8288 +-
8289 +- if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
8290 +- ipat = VMX_EPT_IPAT_BIT;
8291 +- if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
8292 +- cache = MTRR_TYPE_WRBACK;
8293 +- else
8294 +- cache = MTRR_TYPE_UNCACHABLE;
8295 +- goto exit;
8296 +- }
8297 +-
8298 +- cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
8299 +-
8300 +-exit:
8301 +- return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
8302 +-}
8303 +-
8304 +-static int vmx_get_lpage_level(void)
8305 +-{
8306 +- if (enable_ept && !cpu_has_vmx_ept_1g_page())
8307 +- return PT_DIRECTORY_LEVEL;
8308 +- else
8309 +- /* For shadow and EPT supported 1GB page */
8310 +- return PT_PDPE_LEVEL;
8311 +-}
8312 +-
8313 +-static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
8314 +-{
8315 +- /*
8316 +- * These bits in the secondary execution controls field
8317 +- * are dynamic, the others are mostly based on the hypervisor
8318 +- * architecture and the guest's CPUID. Do not touch the
8319 +- * dynamic bits.
8320 +- */
8321 +- u32 mask =
8322 +- SECONDARY_EXEC_SHADOW_VMCS |
8323 +- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
8324 +- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
8325 +- SECONDARY_EXEC_DESC;
8326 +-
8327 +- u32 new_ctl = vmx->secondary_exec_control;
8328 +- u32 cur_ctl = secondary_exec_controls_get(vmx);
8329 +-
8330 +- secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask));
8331 +-}
8332 +-
8333 +-/*
8334 +- * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits
8335 +- * (indicating "allowed-1") if they are supported in the guest's CPUID.
8336 +- */
8337 +-static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
8338 +-{
8339 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
8340 +- struct kvm_cpuid_entry2 *entry;
8341 +-
8342 +- vmx->nested.msrs.cr0_fixed1 = 0xffffffff;
8343 +- vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE;
8344 +-
8345 +-#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \
8346 +- if (entry && (entry->_reg & (_cpuid_mask))) \
8347 +- vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \
8348 +-} while (0)
8349 +-
8350 +- entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
8351 +- cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME));
8352 +- cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME));
8353 +- cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC));
8354 +- cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE));
8355 +- cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE));
8356 +- cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE));
8357 +- cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE));
8358 +- cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE));
8359 +- cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR));
8360 +- cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
8361 +- cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX));
8362 +- cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX));
8363 +- cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID));
8364 +- cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE));
8365 +-
8366 +- entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
8367 +- cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE));
8368 +- cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
8369 +- cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
8370 +- cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
8371 +- cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
8372 +- cr4_fixed1_update(X86_CR4_LA57, ecx, bit(X86_FEATURE_LA57));
8373 +-
8374 +-#undef cr4_fixed1_update
8375 +-}
8376 +-
8377 +-static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
8378 +-{
8379 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
8380 +-
8381 +- if (kvm_mpx_supported()) {
8382 +- bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX);
8383 +-
8384 +- if (mpx_enabled) {
8385 +- vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
8386 +- vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
8387 +- } else {
8388 +- vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS;
8389 +- vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS;
8390 +- }
8391 +- }
8392 +-}
8393 +-
8394 +-static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
8395 +-{
8396 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
8397 +- struct kvm_cpuid_entry2 *best = NULL;
8398 +- int i;
8399 +-
8400 +- for (i = 0; i < PT_CPUID_LEAVES; i++) {
8401 +- best = kvm_find_cpuid_entry(vcpu, 0x14, i);
8402 +- if (!best)
8403 +- return;
8404 +- vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax;
8405 +- vmx->pt_desc.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM] = best->ebx;
8406 +- vmx->pt_desc.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM] = best->ecx;
8407 +- vmx->pt_desc.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM] = best->edx;
8408 +- }
8409 +-
8410 +- /* Get the number of configurable Address Ranges for filtering */
8411 +- vmx->pt_desc.addr_range = intel_pt_validate_cap(vmx->pt_desc.caps,
8412 +- PT_CAP_num_address_ranges);
8413 +-
8414 +- /* Initialize and clear the no dependency bits */
8415 +- vmx->pt_desc.ctl_bitmask = ~(RTIT_CTL_TRACEEN | RTIT_CTL_OS |
8416 +- RTIT_CTL_USR | RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC);
8417 +-
8418 +- /*
8419 +- * If CPUID.(EAX=14H,ECX=0):EBX[0]=1 CR3Filter can be set otherwise
8420 +- * will inject an #GP
8421 +- */
8422 +- if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_cr3_filtering))
8423 +- vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_CR3EN;
8424 +-
8425 +- /*
8426 +- * If CPUID.(EAX=14H,ECX=0):EBX[1]=1 CYCEn, CycThresh and
8427 +- * PSBFreq can be set
8428 +- */
8429 +- if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc))
8430 +- vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_CYCLEACC |
8431 +- RTIT_CTL_CYC_THRESH | RTIT_CTL_PSB_FREQ);
8432 +-
8433 +- /*
8434 +- * If CPUID.(EAX=14H,ECX=0):EBX[3]=1 MTCEn BranchEn and
8435 +- * MTCFreq can be set
8436 +- */
8437 +- if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc))
8438 +- vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_MTC_EN |
8439 +- RTIT_CTL_BRANCH_EN | RTIT_CTL_MTC_RANGE);
8440 +-
8441 +- /* If CPUID.(EAX=14H,ECX=0):EBX[4]=1 FUPonPTW and PTWEn can be set */
8442 +- if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_ptwrite))
8443 +- vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_FUP_ON_PTW |
8444 +- RTIT_CTL_PTW_EN);
8445 +-
8446 +- /* If CPUID.(EAX=14H,ECX=0):EBX[5]=1 PwrEvEn can be set */
8447 +- if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_power_event_trace))
8448 +- vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_PWR_EVT_EN;
8449 +-
8450 +- /* If CPUID.(EAX=14H,ECX=0):ECX[0]=1 ToPA can be set */
8451 +- if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_topa_output))
8452 +- vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_TOPA;
8453 +-
8454 +- /* If CPUID.(EAX=14H,ECX=0):ECX[3]=1 FabircEn can be set */
8455 +- if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_output_subsys))
8456 +- vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_FABRIC_EN;
8457 +-
8458 +- /* unmask address range configure area */
8459 +- for (i = 0; i < vmx->pt_desc.addr_range; i++)
8460 +- vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
8461 +-}
8462 +-
8463 +-static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
8464 +-{
8465 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
8466 +-
8467 +- /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
8468 +- vcpu->arch.xsaves_enabled = false;
8469 +-
8470 +- if (cpu_has_secondary_exec_ctrls()) {
8471 +- vmx_compute_secondary_exec_control(vmx);
8472 +- vmcs_set_secondary_exec_control(vmx);
8473 +- }
8474 +-
8475 +- if (nested_vmx_allowed(vcpu))
8476 +- to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
8477 +- FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX |
8478 +- FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
8479 +- else
8480 +- to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
8481 +- ~(FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX |
8482 +- FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX);
8483 +-
8484 +- if (nested_vmx_allowed(vcpu)) {
8485 +- nested_vmx_cr_fixed1_bits_update(vcpu);
8486 +- nested_vmx_entry_exit_ctls_update(vcpu);
8487 +- }
8488 +-
8489 +- if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
8490 +- guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT))
8491 +- update_intel_pt_cfg(vcpu);
8492 +-
8493 +- if (boot_cpu_has(X86_FEATURE_RTM)) {
8494 +- struct shared_msr_entry *msr;
8495 +- msr = find_msr_entry(vmx, MSR_IA32_TSX_CTRL);
8496 +- if (msr) {
8497 +- bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM);
8498 +- vmx_set_guest_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE);
8499 +- }
8500 +- }
8501 +-}
8502 +-
8503 +-static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
8504 +-{
8505 +- if (func == 1 && nested)
8506 +- entry->ecx |= bit(X86_FEATURE_VMX);
8507 +-}
8508 +-
8509 +-static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
8510 +-{
8511 +- to_vmx(vcpu)->req_immediate_exit = true;
8512 +-}
8513 +-
8514 +-static int vmx_check_intercept(struct kvm_vcpu *vcpu,
8515 +- struct x86_instruction_info *info,
8516 +- enum x86_intercept_stage stage)
8517 +-{
8518 +- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
8519 +- struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
8520 +-
8521 +- /*
8522 +- * RDPID causes #UD if disabled through secondary execution controls.
8523 +- * Because it is marked as EmulateOnUD, we need to intercept it here.
8524 +- */
8525 +- if (info->intercept == x86_intercept_rdtscp &&
8526 +- !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
8527 +- ctxt->exception.vector = UD_VECTOR;
8528 +- ctxt->exception.error_code_valid = false;
8529 +- return X86EMUL_PROPAGATE_FAULT;
8530 +- }
8531 +-
8532 +- /* TODO: check more intercepts... */
8533 +- return X86EMUL_CONTINUE;
8534 +-}
8535 +-
8536 +-#ifdef CONFIG_X86_64
8537 +-/* (a << shift) / divisor, return 1 if overflow otherwise 0 */
8538 +-static inline int u64_shl_div_u64(u64 a, unsigned int shift,
8539 +- u64 divisor, u64 *result)
8540 +-{
8541 +- u64 low = a << shift, high = a >> (64 - shift);
8542 +-
8543 +- /* To avoid the overflow on divq */
8544 +- if (high >= divisor)
8545 +- return 1;
8546 +-
8547 +- /* Low hold the result, high hold rem which is discarded */
8548 +- asm("divq %2\n\t" : "=a" (low), "=d" (high) :
8549 +- "rm" (divisor), "0" (low), "1" (high));
8550 +- *result = low;
8551 +-
8552 +- return 0;
8553 +-}
8554 +-
8555 +-static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
8556 +- bool *expired)
8557 +-{
8558 +- struct vcpu_vmx *vmx;
8559 +- u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
8560 +- struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
8561 +-
8562 +- if (kvm_mwait_in_guest(vcpu->kvm) ||
8563 +- kvm_can_post_timer_interrupt(vcpu))
8564 +- return -EOPNOTSUPP;
8565 +-
8566 +- vmx = to_vmx(vcpu);
8567 +- tscl = rdtsc();
8568 +- guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
8569 +- delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
8570 +- lapic_timer_advance_cycles = nsec_to_cycles(vcpu,
8571 +- ktimer->timer_advance_ns);
8572 +-
8573 +- if (delta_tsc > lapic_timer_advance_cycles)
8574 +- delta_tsc -= lapic_timer_advance_cycles;
8575 +- else
8576 +- delta_tsc = 0;
8577 +-
8578 +- /* Convert to host delta tsc if tsc scaling is enabled */
8579 +- if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
8580 +- delta_tsc && u64_shl_div_u64(delta_tsc,
8581 +- kvm_tsc_scaling_ratio_frac_bits,
8582 +- vcpu->arch.tsc_scaling_ratio, &delta_tsc))
8583 +- return -ERANGE;
8584 +-
8585 +- /*
8586 +- * If the delta tsc can't fit in the 32 bit after the multi shift,
8587 +- * we can't use the preemption timer.
8588 +- * It's possible that it fits on later vmentries, but checking
8589 +- * on every vmentry is costly so we just use an hrtimer.
8590 +- */
8591 +- if (delta_tsc >> (cpu_preemption_timer_multi + 32))
8592 +- return -ERANGE;
8593 +-
8594 +- vmx->hv_deadline_tsc = tscl + delta_tsc;
8595 +- *expired = !delta_tsc;
8596 +- return 0;
8597 +-}
8598 +-
8599 +-static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
8600 +-{
8601 +- to_vmx(vcpu)->hv_deadline_tsc = -1;
8602 +-}
8603 +-#endif
8604 +-
8605 +-static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
8606 +-{
8607 +- if (!kvm_pause_in_guest(vcpu->kvm))
8608 +- shrink_ple_window(vcpu);
8609 +-}
8610 +-
8611 +-static void vmx_slot_enable_log_dirty(struct kvm *kvm,
8612 +- struct kvm_memory_slot *slot)
8613 +-{
8614 +- kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
8615 +- kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
8616 +-}
8617 +-
8618 +-static void vmx_slot_disable_log_dirty(struct kvm *kvm,
8619 +- struct kvm_memory_slot *slot)
8620 +-{
8621 +- kvm_mmu_slot_set_dirty(kvm, slot);
8622 +-}
8623 +-
8624 +-static void vmx_flush_log_dirty(struct kvm *kvm)
8625 +-{
8626 +- kvm_flush_pml_buffers(kvm);
8627 +-}
8628 +-
8629 +-static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
8630 +-{
8631 +- struct vmcs12 *vmcs12;
8632 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
8633 +- gpa_t gpa, dst;
8634 +-
8635 +- if (is_guest_mode(vcpu)) {
8636 +- WARN_ON_ONCE(vmx->nested.pml_full);
8637 +-
8638 +- /*
8639 +- * Check if PML is enabled for the nested guest.
8640 +- * Whether eptp bit 6 is set is already checked
8641 +- * as part of A/D emulation.
8642 +- */
8643 +- vmcs12 = get_vmcs12(vcpu);
8644 +- if (!nested_cpu_has_pml(vmcs12))
8645 +- return 0;
8646 +-
8647 +- if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
8648 +- vmx->nested.pml_full = true;
8649 +- return 1;
8650 +- }
8651 +-
8652 +- gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
8653 +- dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
8654 +-
8655 +- if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
8656 +- offset_in_page(dst), sizeof(gpa)))
8657 +- return 0;
8658 +-
8659 +- vmcs12->guest_pml_index--;
8660 +- }
8661 +-
8662 +- return 0;
8663 +-}
8664 +-
8665 +-static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
8666 +- struct kvm_memory_slot *memslot,
8667 +- gfn_t offset, unsigned long mask)
8668 +-{
8669 +- kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
8670 +-}
8671 +-
8672 +-static void __pi_post_block(struct kvm_vcpu *vcpu)
8673 +-{
8674 +- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
8675 +- struct pi_desc old, new;
8676 +- unsigned int dest;
8677 +-
8678 +- do {
8679 +- old.control = new.control = pi_desc->control;
8680 +- WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
8681 +- "Wakeup handler not enabled while the VCPU is blocked\n");
8682 +-
8683 +- dest = cpu_physical_id(vcpu->cpu);
8684 +-
8685 +- if (x2apic_enabled())
8686 +- new.ndst = dest;
8687 +- else
8688 +- new.ndst = (dest << 8) & 0xFF00;
8689 +-
8690 +- /* set 'NV' to 'notification vector' */
8691 +- new.nv = POSTED_INTR_VECTOR;
8692 +- } while (cmpxchg64(&pi_desc->control, old.control,
8693 +- new.control) != old.control);
8694 +-
8695 +- if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
8696 +- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
8697 +- list_del(&vcpu->blocked_vcpu_list);
8698 +- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
8699 +- vcpu->pre_pcpu = -1;
8700 +- }
8701 +-}
8702 +-
8703 +-/*
8704 +- * This routine does the following things for vCPU which is going
8705 +- * to be blocked if VT-d PI is enabled.
8706 +- * - Store the vCPU to the wakeup list, so when interrupts happen
8707 +- * we can find the right vCPU to wake up.
8708 +- * - Change the Posted-interrupt descriptor as below:
8709 +- * 'NDST' <-- vcpu->pre_pcpu
8710 +- * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
8711 +- * - If 'ON' is set during this process, which means at least one
8712 +- * interrupt is posted for this vCPU, we cannot block it, in
8713 +- * this case, return 1, otherwise, return 0.
8714 +- *
8715 +- */
8716 +-static int pi_pre_block(struct kvm_vcpu *vcpu)
8717 +-{
8718 +- unsigned int dest;
8719 +- struct pi_desc old, new;
8720 +- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
8721 +-
8722 +- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
8723 +- !irq_remapping_cap(IRQ_POSTING_CAP) ||
8724 +- !kvm_vcpu_apicv_active(vcpu))
8725 +- return 0;
8726 +-
8727 +- WARN_ON(irqs_disabled());
8728 +- local_irq_disable();
8729 +- if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
8730 +- vcpu->pre_pcpu = vcpu->cpu;
8731 +- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
8732 +- list_add_tail(&vcpu->blocked_vcpu_list,
8733 +- &per_cpu(blocked_vcpu_on_cpu,
8734 +- vcpu->pre_pcpu));
8735 +- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
8736 +- }
8737 +-
8738 +- do {
8739 +- old.control = new.control = pi_desc->control;
8740 +-
8741 +- WARN((pi_desc->sn == 1),
8742 +- "Warning: SN field of posted-interrupts "
8743 +- "is set before blocking\n");
8744 +-
8745 +- /*
8746 +- * Since vCPU can be preempted during this process,
8747 +- * vcpu->cpu could be different with pre_pcpu, we
8748 +- * need to set pre_pcpu as the destination of wakeup
8749 +- * notification event, then we can find the right vCPU
8750 +- * to wakeup in wakeup handler if interrupts happen
8751 +- * when the vCPU is in blocked state.
8752 +- */
8753 +- dest = cpu_physical_id(vcpu->pre_pcpu);
8754 +-
8755 +- if (x2apic_enabled())
8756 +- new.ndst = dest;
8757 +- else
8758 +- new.ndst = (dest << 8) & 0xFF00;
8759 +-
8760 +- /* set 'NV' to 'wakeup vector' */
8761 +- new.nv = POSTED_INTR_WAKEUP_VECTOR;
8762 +- } while (cmpxchg64(&pi_desc->control, old.control,
8763 +- new.control) != old.control);
8764 +-
8765 +- /* We should not block the vCPU if an interrupt is posted for it. */
8766 +- if (pi_test_on(pi_desc) == 1)
8767 +- __pi_post_block(vcpu);
8768 +-
8769 +- local_irq_enable();
8770 +- return (vcpu->pre_pcpu == -1);
8771 +-}
8772 +-
8773 +-static int vmx_pre_block(struct kvm_vcpu *vcpu)
8774 +-{
8775 +- if (pi_pre_block(vcpu))
8776 +- return 1;
8777 +-
8778 +- if (kvm_lapic_hv_timer_in_use(vcpu))
8779 +- kvm_lapic_switch_to_sw_timer(vcpu);
8780 +-
8781 +- return 0;
8782 +-}
8783 +-
8784 +-static void pi_post_block(struct kvm_vcpu *vcpu)
8785 +-{
8786 +- if (vcpu->pre_pcpu == -1)
8787 +- return;
8788 +-
8789 +- WARN_ON(irqs_disabled());
8790 +- local_irq_disable();
8791 +- __pi_post_block(vcpu);
8792 +- local_irq_enable();
8793 +-}
8794 +-
8795 +-static void vmx_post_block(struct kvm_vcpu *vcpu)
8796 +-{
8797 +- if (kvm_x86_ops->set_hv_timer)
8798 +- kvm_lapic_switch_to_hv_timer(vcpu);
8799 +-
8800 +- pi_post_block(vcpu);
8801 +-}
8802 +-
8803 +-/*
8804 +- * vmx_update_pi_irte - set IRTE for Posted-Interrupts
8805 +- *
8806 +- * @kvm: kvm
8807 +- * @host_irq: host irq of the interrupt
8808 +- * @guest_irq: gsi of the interrupt
8809 +- * @set: set or unset PI
8810 +- * returns 0 on success, < 0 on failure
8811 +- */
8812 +-static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
8813 +- uint32_t guest_irq, bool set)
8814 +-{
8815 +- struct kvm_kernel_irq_routing_entry *e;
8816 +- struct kvm_irq_routing_table *irq_rt;
8817 +- struct kvm_lapic_irq irq;
8818 +- struct kvm_vcpu *vcpu;
8819 +- struct vcpu_data vcpu_info;
8820 +- int idx, ret = 0;
8821 +-
8822 +- if (!kvm_arch_has_assigned_device(kvm) ||
8823 +- !irq_remapping_cap(IRQ_POSTING_CAP) ||
8824 +- !kvm_vcpu_apicv_active(kvm->vcpus[0]))
8825 +- return 0;
8826 +-
8827 +- idx = srcu_read_lock(&kvm->irq_srcu);
8828 +- irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
8829 +- if (guest_irq >= irq_rt->nr_rt_entries ||
8830 +- hlist_empty(&irq_rt->map[guest_irq])) {
8831 +- pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
8832 +- guest_irq, irq_rt->nr_rt_entries);
8833 +- goto out;
8834 +- }
8835 +-
8836 +- hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
8837 +- if (e->type != KVM_IRQ_ROUTING_MSI)
8838 +- continue;
8839 +- /*
8840 +- * VT-d PI cannot support posting multicast/broadcast
8841 +- * interrupts to a vCPU, we still use interrupt remapping
8842 +- * for these kind of interrupts.
8843 +- *
8844 +- * For lowest-priority interrupts, we only support
8845 +- * those with single CPU as the destination, e.g. user
8846 +- * configures the interrupts via /proc/irq or uses
8847 +- * irqbalance to make the interrupts single-CPU.
8848 +- *
8849 +- * We will support full lowest-priority interrupt later.
8850 +- *
8851 +- * In addition, we can only inject generic interrupts using
8852 +- * the PI mechanism, refuse to route others through it.
8853 +- */
8854 +-
8855 +- kvm_set_msi_irq(kvm, e, &irq);
8856 +- if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
8857 +- !kvm_irq_is_postable(&irq)) {
8858 +- /*
8859 +- * Make sure the IRTE is in remapped mode if
8860 +- * we don't handle it in posted mode.
8861 +- */
8862 +- ret = irq_set_vcpu_affinity(host_irq, NULL);
8863 +- if (ret < 0) {
8864 +- printk(KERN_INFO
8865 +- "failed to back to remapped mode, irq: %u\n",
8866 +- host_irq);
8867 +- goto out;
8868 +- }
8869 +-
8870 +- continue;
8871 +- }
8872 +-
8873 +- vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
8874 +- vcpu_info.vector = irq.vector;
8875 +-
8876 +- trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
8877 +- vcpu_info.vector, vcpu_info.pi_desc_addr, set);
8878 +-
8879 +- if (set)
8880 +- ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
8881 +- else
8882 +- ret = irq_set_vcpu_affinity(host_irq, NULL);
8883 +-
8884 +- if (ret < 0) {
8885 +- printk(KERN_INFO "%s: failed to update PI IRTE\n",
8886 +- __func__);
8887 +- goto out;
8888 +- }
8889 +- }
8890 +-
8891 +- ret = 0;
8892 +-out:
8893 +- srcu_read_unlock(&kvm->irq_srcu, idx);
8894 +- return ret;
8895 +-}
8896 +-
8897 +-static void vmx_setup_mce(struct kvm_vcpu *vcpu)
8898 +-{
8899 +- if (vcpu->arch.mcg_cap & MCG_LMCE_P)
8900 +- to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
8901 +- FEATURE_CONTROL_LMCE;
8902 +- else
8903 +- to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
8904 +- ~FEATURE_CONTROL_LMCE;
8905 +-}
8906 +-
8907 +-static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
8908 +-{
8909 +- /* we need a nested vmexit to enter SMM, postpone if run is pending */
8910 +- if (to_vmx(vcpu)->nested.nested_run_pending)
8911 +- return 0;
8912 +- return 1;
8913 +-}
8914 +-
8915 +-static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
8916 +-{
8917 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
8918 +-
8919 +- vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
8920 +- if (vmx->nested.smm.guest_mode)
8921 +- nested_vmx_vmexit(vcpu, -1, 0, 0);
8922 +-
8923 +- vmx->nested.smm.vmxon = vmx->nested.vmxon;
8924 +- vmx->nested.vmxon = false;
8925 +- vmx_clear_hlt(vcpu);
8926 +- return 0;
8927 +-}
8928 +-
8929 +-static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
8930 +-{
8931 +- struct vcpu_vmx *vmx = to_vmx(vcpu);
8932 +- int ret;
8933 +-
8934 +- if (vmx->nested.smm.vmxon) {
8935 +- vmx->nested.vmxon = true;
8936 +- vmx->nested.smm.vmxon = false;
8937 +- }
8938 +-
8939 +- if (vmx->nested.smm.guest_mode) {
8940 +- ret = nested_vmx_enter_non_root_mode(vcpu, false);
8941 +- if (ret)
8942 +- return ret;
8943 +-
8944 +- vmx->nested.smm.guest_mode = false;
8945 +- }
8946 +- return 0;
8947 +-}
8948 +-
8949 +-static int enable_smi_window(struct kvm_vcpu *vcpu)
8950 +-{
8951 +- return 0;
8952 +-}
8953 +-
8954 +-static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
8955 +-{
8956 +- return false;
8957 +-}
8958 +-
8959 +-static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
8960 +-{
8961 +- return to_vmx(vcpu)->nested.vmxon;
8962 +-}
8963 +-
8964 +-static __init int hardware_setup(void)
8965 +-{
8966 +- unsigned long host_bndcfgs;
8967 +- struct desc_ptr dt;
8968 +- int r, i;
8969 +-
8970 +- rdmsrl_safe(MSR_EFER, &host_efer);
8971 +-
8972 +- store_idt(&dt);
8973 +- host_idt_base = dt.address;
8974 +-
8975 +- for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
8976 +- kvm_define_shared_msr(i, vmx_msr_index[i]);
8977 +-
8978 +- if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
8979 +- return -EIO;
8980 +-
8981 +- if (boot_cpu_has(X86_FEATURE_NX))
8982 +- kvm_enable_efer_bits(EFER_NX);
8983 +-
8984 +- if (boot_cpu_has(X86_FEATURE_MPX)) {
8985 +- rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
8986 +- WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
8987 +- }
8988 +-
8989 +- if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
8990 +- !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
8991 +- enable_vpid = 0;
8992 +-
8993 +- if (!cpu_has_vmx_ept() ||
8994 +- !cpu_has_vmx_ept_4levels() ||
8995 +- !cpu_has_vmx_ept_mt_wb() ||
8996 +- !cpu_has_vmx_invept_global())
8997 +- enable_ept = 0;
8998 +-
8999 +- if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
9000 +- enable_ept_ad_bits = 0;
9001 +-
9002 +- if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
9003 +- enable_unrestricted_guest = 0;
9004 +-
9005 +- if (!cpu_has_vmx_flexpriority())
9006 +- flexpriority_enabled = 0;
9007 +-
9008 +- if (!cpu_has_virtual_nmis())
9009 +- enable_vnmi = 0;
9010 +-
9011 +- /*
9012 +- * set_apic_access_page_addr() is used to reload apic access
9013 +- * page upon invalidation. No need to do anything if not
9014 +- * using the APIC_ACCESS_ADDR VMCS field.
9015 +- */
9016 +- if (!flexpriority_enabled)
9017 +- kvm_x86_ops->set_apic_access_page_addr = NULL;
9018 +-
9019 +- if (!cpu_has_vmx_tpr_shadow())
9020 +- kvm_x86_ops->update_cr8_intercept = NULL;
9021 +-
9022 +- if (enable_ept && !cpu_has_vmx_ept_2m_page())
9023 +- kvm_disable_largepages();
9024 +-
9025 +-#if IS_ENABLED(CONFIG_HYPERV)
9026 +- if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
9027 +- && enable_ept) {
9028 +- kvm_x86_ops->tlb_remote_flush = hv_remote_flush_tlb;
9029 +- kvm_x86_ops->tlb_remote_flush_with_range =
9030 +- hv_remote_flush_tlb_with_range;
9031 +- }
9032 +-#endif
9033 +-
9034 +- if (!cpu_has_vmx_ple()) {
9035 +- ple_gap = 0;
9036 +- ple_window = 0;
9037 +- ple_window_grow = 0;
9038 +- ple_window_max = 0;
9039 +- ple_window_shrink = 0;
9040 +- }
9041 +-
9042 +- if (!cpu_has_vmx_apicv()) {
9043 +- enable_apicv = 0;
9044 +- kvm_x86_ops->sync_pir_to_irr = NULL;
9045 +- }
9046 +-
9047 +- if (cpu_has_vmx_tsc_scaling()) {
9048 +- kvm_has_tsc_control = true;
9049 +- kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
9050 +- kvm_tsc_scaling_ratio_frac_bits = 48;
9051 +- }
9052 +-
9053 +- set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
9054 +-
9055 +- if (enable_ept)
9056 +- vmx_enable_tdp();
9057 +- else
9058 +- kvm_disable_tdp();
9059 +-
9060 +- /*
9061 +- * Only enable PML when hardware supports PML feature, and both EPT
9062 +- * and EPT A/D bit features are enabled -- PML depends on them to work.
9063 +- */
9064 +- if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
9065 +- enable_pml = 0;
9066 +-
9067 +- if (!enable_pml) {
9068 +- kvm_x86_ops->slot_enable_log_dirty = NULL;
9069 +- kvm_x86_ops->slot_disable_log_dirty = NULL;
9070 +- kvm_x86_ops->flush_log_dirty = NULL;
9071 +- kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
9072 +- }
9073 +-
9074 +- if (!cpu_has_vmx_preemption_timer())
9075 +- enable_preemption_timer = false;
9076 +-
9077 +- if (enable_preemption_timer) {
9078 +- u64 use_timer_freq = 5000ULL * 1000 * 1000;
9079 +- u64 vmx_msr;
9080 +-
9081 +- rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
9082 +- cpu_preemption_timer_multi =
9083 +- vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
9084 +-
9085 +- if (tsc_khz)
9086 +- use_timer_freq = (u64)tsc_khz * 1000;
9087 +- use_timer_freq >>= cpu_preemption_timer_multi;
9088 +-
9089 +- /*
9090 +- * KVM "disables" the preemption timer by setting it to its max
9091 +- * value. Don't use the timer if it might cause spurious exits
9092 +- * at a rate faster than 0.1 Hz (of uninterrupted guest time).
9093 +- */
9094 +- if (use_timer_freq > 0xffffffffu / 10)
9095 +- enable_preemption_timer = false;
9096 +- }
9097 +-
9098 +- if (!enable_preemption_timer) {
9099 +- kvm_x86_ops->set_hv_timer = NULL;
9100 +- kvm_x86_ops->cancel_hv_timer = NULL;
9101 +- kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
9102 +- }
9103 +-
9104 +- kvm_set_posted_intr_wakeup_handler(wakeup_handler);
9105 +-
9106 +- kvm_mce_cap_supported |= MCG_LMCE_P;
9107 +-
9108 +- if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST)
9109 +- return -EINVAL;
9110 +- if (!enable_ept || !cpu_has_vmx_intel_pt())
9111 +- pt_mode = PT_MODE_SYSTEM;
9112 +-
9113 +- if (nested) {
9114 +- nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
9115 +- vmx_capability.ept, enable_apicv);
9116 +-
9117 +- r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
9118 +- if (r)
9119 +- return r;
9120 +- }
9121 +-
9122 +- r = alloc_kvm_area();
9123 +- if (r)
9124 +- nested_vmx_hardware_unsetup();
9125 +- return r;
9126 +-}
9127 +-
9128 +-static __exit void hardware_unsetup(void)
9129 +-{
9130 +- if (nested)
9131 +- nested_vmx_hardware_unsetup();
9132 +-
9133 +- free_kvm_area();
9134 +-}
9135 +-
9136 +-static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
9137 +- .cpu_has_kvm_support = cpu_has_kvm_support,
9138 +- .disabled_by_bios = vmx_disabled_by_bios,
9139 +- .hardware_setup = hardware_setup,
9140 +- .hardware_unsetup = hardware_unsetup,
9141 +- .check_processor_compatibility = vmx_check_processor_compat,
9142 +- .hardware_enable = hardware_enable,
9143 +- .hardware_disable = hardware_disable,
9144 +- .cpu_has_accelerated_tpr = report_flexpriority,
9145 +- .has_emulated_msr = vmx_has_emulated_msr,
9146 +-
9147 +- .vm_init = vmx_vm_init,
9148 +- .vm_alloc = vmx_vm_alloc,
9149 +- .vm_free = vmx_vm_free,
9150 +-
9151 +- .vcpu_create = vmx_create_vcpu,
9152 +- .vcpu_free = vmx_free_vcpu,
9153 +- .vcpu_reset = vmx_vcpu_reset,
9154 +-
9155 +- .prepare_guest_switch = vmx_prepare_switch_to_guest,
9156 +- .vcpu_load = vmx_vcpu_load,
9157 +- .vcpu_put = vmx_vcpu_put,
9158 +-
9159 +- .update_bp_intercept = update_exception_bitmap,
9160 +- .get_msr_feature = vmx_get_msr_feature,
9161 +- .get_msr = vmx_get_msr,
9162 +- .set_msr = vmx_set_msr,
9163 +- .get_segment_base = vmx_get_segment_base,
9164 +- .get_segment = vmx_get_segment,
9165 +- .set_segment = vmx_set_segment,
9166 +- .get_cpl = vmx_get_cpl,
9167 +- .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
9168 +- .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
9169 +- .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
9170 +- .set_cr0 = vmx_set_cr0,
9171 +- .set_cr3 = vmx_set_cr3,
9172 +- .set_cr4 = vmx_set_cr4,
9173 +- .set_efer = vmx_set_efer,
9174 +- .get_idt = vmx_get_idt,
9175 +- .set_idt = vmx_set_idt,
9176 +- .get_gdt = vmx_get_gdt,
9177 +- .set_gdt = vmx_set_gdt,
9178 +- .get_dr6 = vmx_get_dr6,
9179 +- .set_dr6 = vmx_set_dr6,
9180 +- .set_dr7 = vmx_set_dr7,
9181 +- .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
9182 +- .cache_reg = vmx_cache_reg,
9183 +- .get_rflags = vmx_get_rflags,
9184 +- .set_rflags = vmx_set_rflags,
9185 +-
9186 +- .tlb_flush = vmx_flush_tlb,
9187 +- .tlb_flush_gva = vmx_flush_tlb_gva,
9188 +-
9189 +- .run = vmx_vcpu_run,
9190 +- .handle_exit = vmx_handle_exit,
9191 +- .skip_emulated_instruction = skip_emulated_instruction,
9192 +- .set_interrupt_shadow = vmx_set_interrupt_shadow,
9193 +- .get_interrupt_shadow = vmx_get_interrupt_shadow,
9194 +- .patch_hypercall = vmx_patch_hypercall,
9195 +- .set_irq = vmx_inject_irq,
9196 +- .set_nmi = vmx_inject_nmi,
9197 +- .queue_exception = vmx_queue_exception,
9198 +- .cancel_injection = vmx_cancel_injection,
9199 +- .interrupt_allowed = vmx_interrupt_allowed,
9200 +- .nmi_allowed = vmx_nmi_allowed,
9201 +- .get_nmi_mask = vmx_get_nmi_mask,
9202 +- .set_nmi_mask = vmx_set_nmi_mask,
9203 +- .enable_nmi_window = enable_nmi_window,
9204 +- .enable_irq_window = enable_irq_window,
9205 +- .update_cr8_intercept = update_cr8_intercept,
9206 +- .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
9207 +- .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
9208 +- .get_enable_apicv = vmx_get_enable_apicv,
9209 +- .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
9210 +- .load_eoi_exitmap = vmx_load_eoi_exitmap,
9211 +- .apicv_post_state_restore = vmx_apicv_post_state_restore,
9212 +- .hwapic_irr_update = vmx_hwapic_irr_update,
9213 +- .hwapic_isr_update = vmx_hwapic_isr_update,
9214 +- .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
9215 +- .sync_pir_to_irr = vmx_sync_pir_to_irr,
9216 +- .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
9217 +- .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt,
9218 +-
9219 +- .set_tss_addr = vmx_set_tss_addr,
9220 +- .set_identity_map_addr = vmx_set_identity_map_addr,
9221 +- .get_tdp_level = get_ept_level,
9222 +- .get_mt_mask = vmx_get_mt_mask,
9223 +-
9224 +- .get_exit_info = vmx_get_exit_info,
9225 +-
9226 +- .get_lpage_level = vmx_get_lpage_level,
9227 +-
9228 +- .cpuid_update = vmx_cpuid_update,
9229 +-
9230 +- .rdtscp_supported = vmx_rdtscp_supported,
9231 +- .invpcid_supported = vmx_invpcid_supported,
9232 +-
9233 +- .set_supported_cpuid = vmx_set_supported_cpuid,
9234 +-
9235 +- .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
9236 +-
9237 +- .read_l1_tsc_offset = vmx_read_l1_tsc_offset,
9238 +- .write_l1_tsc_offset = vmx_write_l1_tsc_offset,
9239 +-
9240 +- .set_tdp_cr3 = vmx_set_cr3,
9241 +-
9242 +- .check_intercept = vmx_check_intercept,
9243 +- .handle_exit_irqoff = vmx_handle_exit_irqoff,
9244 +- .mpx_supported = vmx_mpx_supported,
9245 +- .xsaves_supported = vmx_xsaves_supported,
9246 +- .umip_emulated = vmx_umip_emulated,
9247 +- .pt_supported = vmx_pt_supported,
9248 +-
9249 +- .request_immediate_exit = vmx_request_immediate_exit,
9250 +-
9251 +- .sched_in = vmx_sched_in,
9252 +-
9253 +- .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
9254 +- .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
9255 +- .flush_log_dirty = vmx_flush_log_dirty,
9256 +- .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
9257 +- .write_log_dirty = vmx_write_pml_buffer,
9258 +-
9259 +- .pre_block = vmx_pre_block,
9260 +- .post_block = vmx_post_block,
9261 +-
9262 +- .pmu_ops = &intel_pmu_ops,
9263 +-
9264 +- .update_pi_irte = vmx_update_pi_irte,
9265 +-
9266 +-#ifdef CONFIG_X86_64
9267 +- .set_hv_timer = vmx_set_hv_timer,
9268 +- .cancel_hv_timer = vmx_cancel_hv_timer,
9269 +-#endif
9270 +-
9271 +- .setup_mce = vmx_setup_mce,
9272 +-
9273 +- .smi_allowed = vmx_smi_allowed,
9274 +- .pre_enter_smm = vmx_pre_enter_smm,
9275 +- .pre_leave_smm = vmx_pre_leave_smm,
9276 +- .enable_smi_window = enable_smi_window,
9277 +-
9278 +- .check_nested_events = NULL,
9279 +- .get_nested_state = NULL,
9280 +- .set_nested_state = NULL,
9281 +- .get_vmcs12_pages = NULL,
9282 +- .nested_enable_evmcs = NULL,
9283 +- .nested_get_evmcs_version = NULL,
9284 +- .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
9285 +- .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
9286 +-};
9287 +-
9288 +-static void vmx_cleanup_l1d_flush(void)
9289 +-{
9290 +- if (vmx_l1d_flush_pages) {
9291 +- free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
9292 +- vmx_l1d_flush_pages = NULL;
9293 +- }
9294 +- /* Restore state so sysfs ignores VMX */
9295 +- l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
9296 +-}
9297 +-
9298 +-static void vmx_exit(void)
9299 +-{
9300 +-#ifdef CONFIG_KEXEC_CORE
9301 +- RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
9302 +- synchronize_rcu();
9303 +-#endif
9304 +-
9305 +- kvm_exit();
9306 +-
9307 +-#if IS_ENABLED(CONFIG_HYPERV)
9308 +- if (static_branch_unlikely(&enable_evmcs)) {
9309 +- int cpu;
9310 +- struct hv_vp_assist_page *vp_ap;
9311 +- /*
9312 +- * Reset everything to support using non-enlightened VMCS
9313 +- * access later (e.g. when we reload the module with
9314 +- * enlightened_vmcs=0)
9315 +- */
9316 +- for_each_online_cpu(cpu) {
9317 +- vp_ap = hv_get_vp_assist_page(cpu);
9318 +-
9319 +- if (!vp_ap)
9320 +- continue;
9321 +-
9322 +- vp_ap->nested_control.features.directhypercall = 0;
9323 +- vp_ap->current_nested_vmcs = 0;
9324 +- vp_ap->enlighten_vmentry = 0;
9325 +- }
9326 +-
9327 +- static_branch_disable(&enable_evmcs);
9328 +- }
9329 +-#endif
9330 +- vmx_cleanup_l1d_flush();
9331 +-}
9332 +-module_exit(vmx_exit);
9333 +-
9334 +-static int __init vmx_init(void)
9335 +-{
9336 +- int r;
9337 +-
9338 +-#if IS_ENABLED(CONFIG_HYPERV)
9339 +- /*
9340 +- * Enlightened VMCS usage should be recommended and the host needs
9341 +- * to support eVMCS v1 or above. We can also disable eVMCS support
9342 +- * with module parameter.
9343 +- */
9344 +- if (enlightened_vmcs &&
9345 +- ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
9346 +- (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
9347 +- KVM_EVMCS_VERSION) {
9348 +- int cpu;
9349 +-
9350 +- /* Check that we have assist pages on all online CPUs */
9351 +- for_each_online_cpu(cpu) {
9352 +- if (!hv_get_vp_assist_page(cpu)) {
9353 +- enlightened_vmcs = false;
9354 +- break;
9355 +- }
9356 +- }
9357 +-
9358 +- if (enlightened_vmcs) {
9359 +- pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
9360 +- static_branch_enable(&enable_evmcs);
9361 +- }
9362 +-
9363 +- if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
9364 +- vmx_x86_ops.enable_direct_tlbflush
9365 +- = hv_enable_direct_tlbflush;
9366 +-
9367 +- } else {
9368 +- enlightened_vmcs = false;
9369 +- }
9370 +-#endif
9371 +-
9372 +- r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
9373 +- __alignof__(struct vcpu_vmx), THIS_MODULE);
9374 +- if (r)
9375 +- return r;
9376 +-
9377 +- /*
9378 +- * Must be called after kvm_init() so enable_ept is properly set
9379 +- * up. Hand the parameter mitigation value in which was stored in
9380 +- * the pre module init parser. If no parameter was given, it will
9381 +- * contain 'auto' which will be turned into the default 'cond'
9382 +- * mitigation mode.
9383 +- */
9384 +- r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
9385 +- if (r) {
9386 +- vmx_exit();
9387 +- return r;
9388 +- }
9389 +-
9390 +-#ifdef CONFIG_KEXEC_CORE
9391 +- rcu_assign_pointer(crash_vmclear_loaded_vmcss,
9392 +- crash_vmclear_local_loaded_vmcss);
9393 +-#endif
9394 +- vmx_check_vmcs12_offsets();
9395 +-
9396 +- return 0;
9397 +-}
9398 +-module_init(vmx_init);
9399 +diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
9400 +index 0a0e9112f284..5cb9f009f2be 100644
9401 +--- a/arch/x86/lib/x86-opcode-map.txt
9402 ++++ b/arch/x86/lib/x86-opcode-map.txt
9403 +@@ -909,7 +909,7 @@ EndTable
9404 +
9405 + GrpTable: Grp3_2
9406 + 0: TEST Ev,Iz
9407 +-1:
9408 ++1: TEST Ev,Iz
9409 + 2: NOT Ev
9410 + 3: NEG Ev
9411 + 4: MUL rAX,Ev
9412 +diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
9413 +index 835620ab435f..eaee1a7ed0b5 100644
9414 +--- a/arch/x86/mm/pageattr.c
9415 ++++ b/arch/x86/mm/pageattr.c
9416 +@@ -2077,19 +2077,13 @@ int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
9417 + .pgd = pgd,
9418 + .numpages = numpages,
9419 + .mask_set = __pgprot(0),
9420 +- .mask_clr = __pgprot(0),
9421 ++ .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)),
9422 + .flags = 0,
9423 + };
9424 +
9425 + if (!(__supported_pte_mask & _PAGE_NX))
9426 + goto out;
9427 +
9428 +- if (!(page_flags & _PAGE_NX))
9429 +- cpa.mask_clr = __pgprot(_PAGE_NX);
9430 +-
9431 +- if (!(page_flags & _PAGE_RW))
9432 +- cpa.mask_clr = __pgprot(_PAGE_RW);
9433 +-
9434 + if (!(page_flags & _PAGE_ENC))
9435 + cpa.mask_clr = pgprot_encrypted(cpa.mask_clr);
9436 +
9437 +diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
9438 +index 335a62e74a2e..e7f19dec16b9 100644
9439 +--- a/arch/x86/platform/efi/efi.c
9440 ++++ b/arch/x86/platform/efi/efi.c
9441 +@@ -480,7 +480,6 @@ void __init efi_init(void)
9442 + efi_char16_t *c16;
9443 + char vendor[100] = "unknown";
9444 + int i = 0;
9445 +- void *tmp;
9446 +
9447 + #ifdef CONFIG_X86_32
9448 + if (boot_params.efi_info.efi_systab_hi ||
9449 +@@ -505,14 +504,16 @@ void __init efi_init(void)
9450 + /*
9451 + * Show what we know for posterity
9452 + */
9453 +- c16 = tmp = early_memremap(efi.systab->fw_vendor, 2);
9454 ++ c16 = early_memremap_ro(efi.systab->fw_vendor,
9455 ++ sizeof(vendor) * sizeof(efi_char16_t));
9456 + if (c16) {
9457 +- for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
9458 +- vendor[i] = *c16++;
9459 ++ for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i)
9460 ++ vendor[i] = c16[i];
9461 + vendor[i] = '\0';
9462 +- } else
9463 ++ early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t));
9464 ++ } else {
9465 + pr_err("Could not map the firmware vendor!\n");
9466 +- early_memunmap(tmp, 2);
9467 ++ }
9468 +
9469 + pr_info("EFI v%u.%.02u by %s\n",
9470 + efi.systab->hdr.revision >> 16,
9471 +@@ -929,16 +930,14 @@ static void __init __efi_enter_virtual_mode(void)
9472 +
9473 + if (efi_alloc_page_tables()) {
9474 + pr_err("Failed to allocate EFI page tables\n");
9475 +- clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
9476 +- return;
9477 ++ goto err;
9478 + }
9479 +
9480 + efi_merge_regions();
9481 + new_memmap = efi_map_regions(&count, &pg_shift);
9482 + if (!new_memmap) {
9483 + pr_err("Error reallocating memory, EFI runtime non-functional!\n");
9484 +- clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
9485 +- return;
9486 ++ goto err;
9487 + }
9488 +
9489 + pa = __pa(new_memmap);
9490 +@@ -952,8 +951,7 @@ static void __init __efi_enter_virtual_mode(void)
9491 +
9492 + if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) {
9493 + pr_err("Failed to remap late EFI memory map\n");
9494 +- clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
9495 +- return;
9496 ++ goto err;
9497 + }
9498 +
9499 + if (efi_enabled(EFI_DBG)) {
9500 +@@ -961,12 +959,11 @@ static void __init __efi_enter_virtual_mode(void)
9501 + efi_print_memmap();
9502 + }
9503 +
9504 +- BUG_ON(!efi.systab);
9505 ++ if (WARN_ON(!efi.systab))
9506 ++ goto err;
9507 +
9508 +- if (efi_setup_page_tables(pa, 1 << pg_shift)) {
9509 +- clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
9510 +- return;
9511 +- }
9512 ++ if (efi_setup_page_tables(pa, 1 << pg_shift))
9513 ++ goto err;
9514 +
9515 + efi_sync_low_kernel_mappings();
9516 +
9517 +@@ -986,9 +983,9 @@ static void __init __efi_enter_virtual_mode(void)
9518 + }
9519 +
9520 + if (status != EFI_SUCCESS) {
9521 +- pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n",
9522 +- status);
9523 +- panic("EFI call to SetVirtualAddressMap() failed!");
9524 ++ pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n",
9525 ++ status);
9526 ++ goto err;
9527 + }
9528 +
9529 + /*
9530 +@@ -1015,6 +1012,10 @@ static void __init __efi_enter_virtual_mode(void)
9531 +
9532 + /* clean DUMMY object */
9533 + efi_delete_dummy_variable();
9534 ++ return;
9535 ++
9536 ++err:
9537 ++ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
9538 + }
9539 +
9540 + void __init efi_enter_virtual_mode(void)
9541 +diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
9542 +index ae369c2bbc3e..0ebb7f94fd51 100644
9543 +--- a/arch/x86/platform/efi/efi_64.c
9544 ++++ b/arch/x86/platform/efi/efi_64.c
9545 +@@ -390,11 +390,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
9546 + return 0;
9547 +
9548 + page = alloc_page(GFP_KERNEL|__GFP_DMA32);
9549 +- if (!page)
9550 +- panic("Unable to allocate EFI runtime stack < 4GB\n");
9551 ++ if (!page) {
9552 ++ pr_err("Unable to allocate EFI runtime stack < 4GB\n");
9553 ++ return 1;
9554 ++ }
9555 +
9556 +- efi_scratch.phys_stack = virt_to_phys(page_address(page));
9557 +- efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */
9558 ++ efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */
9559 +
9560 + npages = (_etext - _text) >> PAGE_SHIFT;
9561 + text = __pa(_text);
9562 +diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c
9563 +index 7bcf5f5ea029..8df4a49a99a6 100644
9564 +--- a/drivers/acpi/acpica/dsfield.c
9565 ++++ b/drivers/acpi/acpica/dsfield.c
9566 +@@ -273,7 +273,7 @@ cleanup:
9567 + * FUNCTION: acpi_ds_get_field_names
9568 + *
9569 + * PARAMETERS: info - create_field info structure
9570 +- * ` walk_state - Current method state
9571 ++ * walk_state - Current method state
9572 + * arg - First parser arg for the field name list
9573 + *
9574 + * RETURN: Status
9575 +diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c
9576 +index eaa859a89702..1d82e1419397 100644
9577 +--- a/drivers/acpi/acpica/dswload.c
9578 ++++ b/drivers/acpi/acpica/dswload.c
9579 +@@ -444,6 +444,27 @@ acpi_status acpi_ds_load1_end_op(struct acpi_walk_state *walk_state)
9580 + ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, "Op=%p State=%p\n", op,
9581 + walk_state));
9582 +
9583 ++ /*
9584 ++ * Disassembler: handle create field operators here.
9585 ++ *
9586 ++ * create_buffer_field is a deferred op that is typically processed in load
9587 ++ * pass 2. However, disassembly of control method contents walk the parse
9588 ++ * tree with ACPI_PARSE_LOAD_PASS1 and AML_CREATE operators are processed
9589 ++ * in a later walk. This is a problem when there is a control method that
9590 ++ * has the same name as the AML_CREATE object. In this case, any use of the
9591 ++ * name segment will be detected as a method call rather than a reference
9592 ++ * to a buffer field.
9593 ++ *
9594 ++ * This earlier creation during disassembly solves this issue by inserting
9595 ++ * the named object in the ACPI namespace so that references to this name
9596 ++ * would be a name string rather than a method call.
9597 ++ */
9598 ++ if ((walk_state->parse_flags & ACPI_PARSE_DISASSEMBLE) &&
9599 ++ (walk_state->op_info->flags & AML_CREATE)) {
9600 ++ status = acpi_ds_create_buffer_field(op, walk_state);
9601 ++ return_ACPI_STATUS(status);
9602 ++ }
9603 ++
9604 + /* We are only interested in opcodes that have an associated name */
9605 +
9606 + if (!(walk_state->op_info->flags & (AML_NAMED | AML_FIELD))) {
9607 +diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
9608 +index f003e301723a..0905c07b8c7e 100644
9609 +--- a/drivers/ata/ahci.c
9610 ++++ b/drivers/ata/ahci.c
9611 +@@ -88,6 +88,7 @@ enum board_ids {
9612 +
9613 + static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
9614 + static void ahci_remove_one(struct pci_dev *dev);
9615 ++static void ahci_shutdown_one(struct pci_dev *dev);
9616 + static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class,
9617 + unsigned long deadline);
9618 + static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class,
9619 +@@ -586,6 +587,7 @@ static struct pci_driver ahci_pci_driver = {
9620 + .id_table = ahci_pci_tbl,
9621 + .probe = ahci_init_one,
9622 + .remove = ahci_remove_one,
9623 ++ .shutdown = ahci_shutdown_one,
9624 + .driver = {
9625 + .pm = &ahci_pci_pm_ops,
9626 + },
9627 +@@ -1823,6 +1825,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
9628 + return 0;
9629 + }
9630 +
9631 ++static void ahci_shutdown_one(struct pci_dev *pdev)
9632 ++{
9633 ++ ata_pci_shutdown_one(pdev);
9634 ++}
9635 ++
9636 + static void ahci_remove_one(struct pci_dev *pdev)
9637 + {
9638 + pm_runtime_get_noresume(&pdev->dev);
9639 +diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
9640 +index 08f67c109429..33eb5e342a7a 100644
9641 +--- a/drivers/ata/libata-core.c
9642 ++++ b/drivers/ata/libata-core.c
9643 +@@ -6706,6 +6706,26 @@ void ata_pci_remove_one(struct pci_dev *pdev)
9644 + ata_host_detach(host);
9645 + }
9646 +
9647 ++void ata_pci_shutdown_one(struct pci_dev *pdev)
9648 ++{
9649 ++ struct ata_host *host = pci_get_drvdata(pdev);
9650 ++ int i;
9651 ++
9652 ++ for (i = 0; i < host->n_ports; i++) {
9653 ++ struct ata_port *ap = host->ports[i];
9654 ++
9655 ++ ap->pflags |= ATA_PFLAG_FROZEN;
9656 ++
9657 ++ /* Disable port interrupts */
9658 ++ if (ap->ops->freeze)
9659 ++ ap->ops->freeze(ap);
9660 ++
9661 ++ /* Stop the port DMA engines */
9662 ++ if (ap->ops->port_stop)
9663 ++ ap->ops->port_stop(ap);
9664 ++ }
9665 ++}
9666 ++
9667 + /* move to PCI subsystem */
9668 + int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits)
9669 + {
9670 +@@ -7326,6 +7346,7 @@ EXPORT_SYMBOL_GPL(ata_timing_cycle2mode);
9671 +
9672 + #ifdef CONFIG_PCI
9673 + EXPORT_SYMBOL_GPL(pci_test_config_bits);
9674 ++EXPORT_SYMBOL_GPL(ata_pci_shutdown_one);
9675 + EXPORT_SYMBOL_GPL(ata_pci_remove_one);
9676 + #ifdef CONFIG_PM
9677 + EXPORT_SYMBOL_GPL(ata_pci_device_do_suspend);
9678 +diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
9679 +index f8b7e86907cc..0a1ad1a1d34f 100644
9680 +--- a/drivers/atm/fore200e.c
9681 ++++ b/drivers/atm/fore200e.c
9682 +@@ -1496,12 +1496,14 @@ fore200e_open(struct atm_vcc *vcc)
9683 + static void
9684 + fore200e_close(struct atm_vcc* vcc)
9685 + {
9686 +- struct fore200e* fore200e = FORE200E_DEV(vcc->dev);
9687 + struct fore200e_vcc* fore200e_vcc;
9688 ++ struct fore200e* fore200e;
9689 + struct fore200e_vc_map* vc_map;
9690 + unsigned long flags;
9691 +
9692 + ASSERT(vcc);
9693 ++ fore200e = FORE200E_DEV(vcc->dev);
9694 ++
9695 + ASSERT((vcc->vpi >= 0) && (vcc->vpi < 1<<FORE200E_VPI_BITS));
9696 + ASSERT((vcc->vci >= 0) && (vcc->vci < 1<<FORE200E_VCI_BITS));
9697 +
9698 +@@ -1546,10 +1548,10 @@ fore200e_close(struct atm_vcc* vcc)
9699 + static int
9700 + fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb)
9701 + {
9702 +- struct fore200e* fore200e = FORE200E_DEV(vcc->dev);
9703 +- struct fore200e_vcc* fore200e_vcc = FORE200E_VCC(vcc);
9704 ++ struct fore200e* fore200e;
9705 ++ struct fore200e_vcc* fore200e_vcc;
9706 + struct fore200e_vc_map* vc_map;
9707 +- struct host_txq* txq = &fore200e->host_txq;
9708 ++ struct host_txq* txq;
9709 + struct host_txq_entry* entry;
9710 + struct tpd* tpd;
9711 + struct tpd_haddr tpd_haddr;
9712 +@@ -1562,9 +1564,18 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb)
9713 + unsigned char* data;
9714 + unsigned long flags;
9715 +
9716 +- ASSERT(vcc);
9717 +- ASSERT(fore200e);
9718 +- ASSERT(fore200e_vcc);
9719 ++ if (!vcc)
9720 ++ return -EINVAL;
9721 ++
9722 ++ fore200e = FORE200E_DEV(vcc->dev);
9723 ++ fore200e_vcc = FORE200E_VCC(vcc);
9724 ++
9725 ++ if (!fore200e)
9726 ++ return -EINVAL;
9727 ++
9728 ++ txq = &fore200e->host_txq;
9729 ++ if (!fore200e_vcc)
9730 ++ return -EINVAL;
9731 +
9732 + if (!test_bit(ATM_VF_READY, &vcc->flags)) {
9733 + DPRINTK(1, "VC %d.%d.%d not ready for tx\n", vcc->itf, vcc->vpi, vcc->vpi);
9734 +diff --git a/drivers/base/dd.c b/drivers/base/dd.c
9735 +index 536c9ac3b848..aa1a2d32360f 100644
9736 +--- a/drivers/base/dd.c
9737 ++++ b/drivers/base/dd.c
9738 +@@ -375,7 +375,10 @@ static int really_probe(struct device *dev, struct device_driver *drv)
9739 + atomic_inc(&probe_count);
9740 + pr_debug("bus: '%s': %s: probing driver %s with device %s\n",
9741 + drv->bus->name, __func__, drv->name, dev_name(dev));
9742 +- WARN_ON(!list_empty(&dev->devres_head));
9743 ++ if (!list_empty(&dev->devres_head)) {
9744 ++ dev_crit(dev, "Resources present before probing\n");
9745 ++ return -EBUSY;
9746 ++ }
9747 +
9748 + re_probe:
9749 + dev->driver = drv;
9750 +diff --git a/drivers/base/platform.c b/drivers/base/platform.c
9751 +index f1105de0d9fe..bcb6519fe211 100644
9752 +--- a/drivers/base/platform.c
9753 ++++ b/drivers/base/platform.c
9754 +@@ -28,6 +28,7 @@
9755 + #include <linux/limits.h>
9756 + #include <linux/property.h>
9757 + #include <linux/kmemleak.h>
9758 ++#include <linux/types.h>
9759 +
9760 + #include "base.h"
9761 + #include "power/power.h"
9762 +@@ -68,7 +69,7 @@ void __weak arch_setup_pdev_archdata(struct platform_device *pdev)
9763 + struct resource *platform_get_resource(struct platform_device *dev,
9764 + unsigned int type, unsigned int num)
9765 + {
9766 +- int i;
9767 ++ u32 i;
9768 +
9769 + for (i = 0; i < dev->num_resources; i++) {
9770 + struct resource *r = &dev->resource[i];
9771 +@@ -163,7 +164,7 @@ struct resource *platform_get_resource_byname(struct platform_device *dev,
9772 + unsigned int type,
9773 + const char *name)
9774 + {
9775 +- int i;
9776 ++ u32 i;
9777 +
9778 + for (i = 0; i < dev->num_resources; i++) {
9779 + struct resource *r = &dev->resource[i];
9780 +@@ -360,7 +361,8 @@ EXPORT_SYMBOL_GPL(platform_device_add_properties);
9781 + */
9782 + int platform_device_add(struct platform_device *pdev)
9783 + {
9784 +- int i, ret;
9785 ++ u32 i;
9786 ++ int ret;
9787 +
9788 + if (!pdev)
9789 + return -EINVAL;
9790 +@@ -426,7 +428,7 @@ int platform_device_add(struct platform_device *pdev)
9791 + pdev->id = PLATFORM_DEVID_AUTO;
9792 + }
9793 +
9794 +- while (--i >= 0) {
9795 ++ while (i--) {
9796 + struct resource *r = &pdev->resource[i];
9797 + if (r->parent)
9798 + release_resource(r);
9799 +@@ -447,7 +449,7 @@ EXPORT_SYMBOL_GPL(platform_device_add);
9800 + */
9801 + void platform_device_del(struct platform_device *pdev)
9802 + {
9803 +- int i;
9804 ++ u32 i;
9805 +
9806 + if (pdev) {
9807 + device_remove_properties(&pdev->dev);
9808 +diff --git a/drivers/block/brd.c b/drivers/block/brd.c
9809 +index 2d7178f7754e..0129b1921cb3 100644
9810 +--- a/drivers/block/brd.c
9811 ++++ b/drivers/block/brd.c
9812 +@@ -529,6 +529,25 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
9813 + return kobj;
9814 + }
9815 +
9816 ++static inline void brd_check_and_reset_par(void)
9817 ++{
9818 ++ if (unlikely(!max_part))
9819 ++ max_part = 1;
9820 ++
9821 ++ /*
9822 ++ * make sure 'max_part' can be divided exactly by (1U << MINORBITS),
9823 ++ * otherwise, it is possiable to get same dev_t when adding partitions.
9824 ++ */
9825 ++ if ((1U << MINORBITS) % max_part != 0)
9826 ++ max_part = 1UL << fls(max_part);
9827 ++
9828 ++ if (max_part > DISK_MAX_PARTS) {
9829 ++ pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n",
9830 ++ DISK_MAX_PARTS, DISK_MAX_PARTS);
9831 ++ max_part = DISK_MAX_PARTS;
9832 ++ }
9833 ++}
9834 ++
9835 + static int __init brd_init(void)
9836 + {
9837 + struct brd_device *brd, *next;
9838 +@@ -552,8 +571,7 @@ static int __init brd_init(void)
9839 + if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
9840 + return -EIO;
9841 +
9842 +- if (unlikely(!max_part))
9843 +- max_part = 1;
9844 ++ brd_check_and_reset_par();
9845 +
9846 + for (i = 0; i < rd_nr; i++) {
9847 + brd = brd_alloc(i);
9848 +diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
9849 +index 5f1aa3197244..cbf74731cfce 100644
9850 +--- a/drivers/block/floppy.c
9851 ++++ b/drivers/block/floppy.c
9852 +@@ -848,14 +848,17 @@ static void reset_fdc_info(int mode)
9853 + /* selects the fdc and drive, and enables the fdc's input/dma. */
9854 + static void set_fdc(int drive)
9855 + {
9856 ++ unsigned int new_fdc = fdc;
9857 ++
9858 + if (drive >= 0 && drive < N_DRIVE) {
9859 +- fdc = FDC(drive);
9860 ++ new_fdc = FDC(drive);
9861 + current_drive = drive;
9862 + }
9863 +- if (fdc != 1 && fdc != 0) {
9864 ++ if (new_fdc >= N_FDC) {
9865 + pr_info("bad fdc value\n");
9866 + return;
9867 + }
9868 ++ fdc = new_fdc;
9869 + set_dor(fdc, ~0, 8);
9870 + #if N_FDC > 1
9871 + set_dor(1 - fdc, ~8, 0);
9872 +diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
9873 +index 4c661ad91e7d..8f56e6b2f114 100644
9874 +--- a/drivers/block/nbd.c
9875 ++++ b/drivers/block/nbd.c
9876 +@@ -1203,6 +1203,16 @@ static int nbd_start_device(struct nbd_device *nbd)
9877 + args = kzalloc(sizeof(*args), GFP_KERNEL);
9878 + if (!args) {
9879 + sock_shutdown(nbd);
9880 ++ /*
9881 ++ * If num_connections is m (2 < m),
9882 ++ * and NO.1 ~ NO.n(1 < n < m) kzallocs are successful.
9883 ++ * But NO.(n + 1) failed. We still have n recv threads.
9884 ++ * So, add flush_workqueue here to prevent recv threads
9885 ++ * dropping the last config_refs and trying to destroy
9886 ++ * the workqueue from inside the workqueue.
9887 ++ */
9888 ++ if (i)
9889 ++ flush_workqueue(nbd->recv_workq);
9890 + return -ENOMEM;
9891 + }
9892 + sk_set_memalloc(config->socks[i]->sock->sk);
9893 +diff --git a/drivers/char/random.c b/drivers/char/random.c
9894 +index e6efa07e9f9e..50d5846acf48 100644
9895 +--- a/drivers/char/random.c
9896 ++++ b/drivers/char/random.c
9897 +@@ -1598,8 +1598,9 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller,
9898 + print_once = true;
9899 + #endif
9900 + if (__ratelimit(&unseeded_warning))
9901 +- pr_notice("random: %s called from %pS with crng_init=%d\n",
9902 +- func_name, caller, crng_init);
9903 ++ printk_deferred(KERN_NOTICE "random: %s called from %pS "
9904 ++ "with crng_init=%d\n", func_name, caller,
9905 ++ crng_init);
9906 + }
9907 +
9908 + /*
9909 +diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c
9910 +index a93439242565..d3953ea69fda 100644
9911 +--- a/drivers/clk/qcom/clk-rcg2.c
9912 ++++ b/drivers/clk/qcom/clk-rcg2.c
9913 +@@ -210,6 +210,9 @@ static int _freq_tbl_determine_rate(struct clk_hw *hw, const struct freq_tbl *f,
9914 +
9915 + clk_flags = clk_hw_get_flags(hw);
9916 + p = clk_hw_get_parent_by_index(hw, index);
9917 ++ if (!p)
9918 ++ return -EINVAL;
9919 ++
9920 + if (clk_flags & CLK_SET_RATE_PARENT) {
9921 + if (f->pre_div) {
9922 + if (!rate)
9923 +diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c
9924 +index eaafc038368f..183985c8c9ba 100644
9925 +--- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c
9926 ++++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c
9927 +@@ -884,11 +884,26 @@ static const struct sunxi_ccu_desc sun50i_a64_ccu_desc = {
9928 + .num_resets = ARRAY_SIZE(sun50i_a64_ccu_resets),
9929 + };
9930 +
9931 ++static struct ccu_pll_nb sun50i_a64_pll_cpu_nb = {
9932 ++ .common = &pll_cpux_clk.common,
9933 ++ /* copy from pll_cpux_clk */
9934 ++ .enable = BIT(31),
9935 ++ .lock = BIT(28),
9936 ++};
9937 ++
9938 ++static struct ccu_mux_nb sun50i_a64_cpu_nb = {
9939 ++ .common = &cpux_clk.common,
9940 ++ .cm = &cpux_clk.mux,
9941 ++ .delay_us = 1, /* > 8 clock cycles at 24 MHz */
9942 ++ .bypass_index = 1, /* index of 24 MHz oscillator */
9943 ++};
9944 ++
9945 + static int sun50i_a64_ccu_probe(struct platform_device *pdev)
9946 + {
9947 + struct resource *res;
9948 + void __iomem *reg;
9949 + u32 val;
9950 ++ int ret;
9951 +
9952 + res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
9953 + reg = devm_ioremap_resource(&pdev->dev, res);
9954 +@@ -902,7 +917,18 @@ static int sun50i_a64_ccu_probe(struct platform_device *pdev)
9955 +
9956 + writel(0x515, reg + SUN50I_A64_PLL_MIPI_REG);
9957 +
9958 +- return sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_a64_ccu_desc);
9959 ++ ret = sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_a64_ccu_desc);
9960 ++ if (ret)
9961 ++ return ret;
9962 ++
9963 ++ /* Gate then ungate PLL CPU after any rate changes */
9964 ++ ccu_pll_notifier_register(&sun50i_a64_pll_cpu_nb);
9965 ++
9966 ++ /* Reparent CPU during PLL CPU rate changes */
9967 ++ ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk,
9968 ++ &sun50i_a64_cpu_nb);
9969 ++
9970 ++ return 0;
9971 + }
9972 +
9973 + static const struct of_device_id sun50i_a64_ccu_ids[] = {
9974 +diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c
9975 +index 39e489a96ad7..8894cfc32be0 100644
9976 +--- a/drivers/clocksource/bcm2835_timer.c
9977 ++++ b/drivers/clocksource/bcm2835_timer.c
9978 +@@ -134,7 +134,7 @@ static int __init bcm2835_timer_init(struct device_node *node)
9979 + ret = setup_irq(irq, &timer->act);
9980 + if (ret) {
9981 + pr_err("Can't set up timer IRQ\n");
9982 +- goto err_iounmap;
9983 ++ goto err_timer_free;
9984 + }
9985 +
9986 + clockevents_config_and_register(&timer->evt, freq, 0xf, 0xffffffff);
9987 +@@ -143,6 +143,9 @@ static int __init bcm2835_timer_init(struct device_node *node)
9988 +
9989 + return 0;
9990 +
9991 ++err_timer_free:
9992 ++ kfree(timer);
9993 ++
9994 + err_iounmap:
9995 + iounmap(base);
9996 + return ret;
9997 +diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
9998 +index 6a172d338f6d..4c4ec68b0566 100644
9999 +--- a/drivers/devfreq/Kconfig
10000 ++++ b/drivers/devfreq/Kconfig
10001 +@@ -103,7 +103,8 @@ config ARM_TEGRA_DEVFREQ
10002 +
10003 + config ARM_RK3399_DMC_DEVFREQ
10004 + tristate "ARM RK3399 DMC DEVFREQ Driver"
10005 +- depends on ARCH_ROCKCHIP
10006 ++ depends on (ARCH_ROCKCHIP && HAVE_ARM_SMCCC) || \
10007 ++ (COMPILE_TEST && HAVE_ARM_SMCCC)
10008 + select DEVFREQ_EVENT_ROCKCHIP_DFI
10009 + select DEVFREQ_GOV_SIMPLE_ONDEMAND
10010 + select PM_DEVFREQ_EVENT
10011 +diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig
10012 +index cd949800eed9..8851bc4e8e3e 100644
10013 +--- a/drivers/devfreq/event/Kconfig
10014 ++++ b/drivers/devfreq/event/Kconfig
10015 +@@ -33,7 +33,7 @@ config DEVFREQ_EVENT_EXYNOS_PPMU
10016 +
10017 + config DEVFREQ_EVENT_ROCKCHIP_DFI
10018 + tristate "ROCKCHIP DFI DEVFREQ event Driver"
10019 +- depends on ARCH_ROCKCHIP
10020 ++ depends on ARCH_ROCKCHIP || COMPILE_TEST
10021 + help
10022 + This add the devfreq-event driver for Rockchip SoC. It provides DFI
10023 + (DDR Monitor Module) driver to count ddr load.
10024 +diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
10025 +index b451354735d3..faaaf10311ec 100644
10026 +--- a/drivers/dma/dmaengine.c
10027 ++++ b/drivers/dma/dmaengine.c
10028 +@@ -192,7 +192,7 @@ __dma_device_satisfies_mask(struct dma_device *device,
10029 +
10030 + static struct module *dma_chan_to_owner(struct dma_chan *chan)
10031 + {
10032 +- return chan->device->dev->driver->owner;
10033 ++ return chan->device->owner;
10034 + }
10035 +
10036 + /**
10037 +@@ -928,6 +928,8 @@ int dma_async_device_register(struct dma_device *device)
10038 + return -EIO;
10039 + }
10040 +
10041 ++ device->owner = device->dev->driver->owner;
10042 ++
10043 + if (dma_has_cap(DMA_MEMCPY, device->cap_mask) && !device->device_prep_dma_memcpy) {
10044 + dev_err(device->dev,
10045 + "Device claims capability %s, but op is not defined\n",
10046 +diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c
10047 +index 6544a16ab02e..7541bd327e6c 100644
10048 +--- a/drivers/gpio/gpio-grgpio.c
10049 ++++ b/drivers/gpio/gpio-grgpio.c
10050 +@@ -259,17 +259,16 @@ static int grgpio_irq_map(struct irq_domain *d, unsigned int irq,
10051 + lirq->irq = irq;
10052 + uirq = &priv->uirqs[lirq->index];
10053 + if (uirq->refcnt == 0) {
10054 ++ spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
10055 + ret = request_irq(uirq->uirq, grgpio_irq_handler, 0,
10056 + dev_name(priv->dev), priv);
10057 + if (ret) {
10058 + dev_err(priv->dev,
10059 + "Could not request underlying irq %d\n",
10060 + uirq->uirq);
10061 +-
10062 +- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
10063 +-
10064 + return ret;
10065 + }
10066 ++ spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
10067 + }
10068 + uirq->refcnt++;
10069 +
10070 +@@ -315,8 +314,11 @@ static void grgpio_irq_unmap(struct irq_domain *d, unsigned int irq)
10071 + if (index >= 0) {
10072 + uirq = &priv->uirqs[lirq->index];
10073 + uirq->refcnt--;
10074 +- if (uirq->refcnt == 0)
10075 ++ if (uirq->refcnt == 0) {
10076 ++ spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
10077 + free_irq(uirq->uirq, priv);
10078 ++ return;
10079 ++ }
10080 + }
10081 +
10082 + spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
10083 +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
10084 +index cc4e18dcd8b6..4779740421a8 100644
10085 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
10086 ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
10087 +@@ -336,17 +336,9 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
10088 + path_size += le16_to_cpu(path->usSize);
10089 +
10090 + if (device_support & le16_to_cpu(path->usDeviceTag)) {
10091 +- uint8_t con_obj_id, con_obj_num, con_obj_type;
10092 +-
10093 +- con_obj_id =
10094 ++ uint8_t con_obj_id =
10095 + (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK)
10096 + >> OBJECT_ID_SHIFT;
10097 +- con_obj_num =
10098 +- (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK)
10099 +- >> ENUM_ID_SHIFT;
10100 +- con_obj_type =
10101 +- (le16_to_cpu(path->usConnObjectId) &
10102 +- OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
10103 +
10104 + /* Skip TV/CV support */
10105 + if ((le16_to_cpu(path->usDeviceTag) ==
10106 +@@ -371,14 +363,7 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
10107 + router.ddc_valid = false;
10108 + router.cd_valid = false;
10109 + for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) {
10110 +- uint8_t grph_obj_id, grph_obj_num, grph_obj_type;
10111 +-
10112 +- grph_obj_id =
10113 +- (le16_to_cpu(path->usGraphicObjIds[j]) &
10114 +- OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
10115 +- grph_obj_num =
10116 +- (le16_to_cpu(path->usGraphicObjIds[j]) &
10117 +- ENUM_ID_MASK) >> ENUM_ID_SHIFT;
10118 ++ uint8_t grph_obj_type=
10119 + grph_obj_type =
10120 + (le16_to_cpu(path->usGraphicObjIds[j]) &
10121 + OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
10122 +diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
10123 +index ff7d4827385e..7a2366bd1fba 100644
10124 +--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
10125 ++++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
10126 +@@ -279,7 +279,12 @@ static void soc15_init_golden_registers(struct amdgpu_device *adev)
10127 + }
10128 + static u32 soc15_get_xclk(struct amdgpu_device *adev)
10129 + {
10130 +- return adev->clock.spll.reference_freq;
10131 ++ u32 reference_clock = adev->clock.spll.reference_freq;
10132 ++
10133 ++ if (adev->asic_type == CHIP_RAVEN)
10134 ++ return reference_clock / 4;
10135 ++
10136 ++ return reference_clock;
10137 + }
10138 +
10139 +
10140 +diff --git a/drivers/gpu/drm/drm_debugfs_crc.c b/drivers/gpu/drm/drm_debugfs_crc.c
10141 +index 2901b7944068..6858c80d2eb5 100644
10142 +--- a/drivers/gpu/drm/drm_debugfs_crc.c
10143 ++++ b/drivers/gpu/drm/drm_debugfs_crc.c
10144 +@@ -101,8 +101,8 @@ static ssize_t crc_control_write(struct file *file, const char __user *ubuf,
10145 + if (IS_ERR(source))
10146 + return PTR_ERR(source);
10147 +
10148 +- if (source[len] == '\n')
10149 +- source[len] = '\0';
10150 ++ if (source[len - 1] == '\n')
10151 ++ source[len - 1] = '\0';
10152 +
10153 + spin_lock_irq(&crc->lock);
10154 +
10155 +diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
10156 +index 2570c7f647a6..883fc45870dd 100644
10157 +--- a/drivers/gpu/drm/gma500/framebuffer.c
10158 ++++ b/drivers/gpu/drm/gma500/framebuffer.c
10159 +@@ -486,6 +486,7 @@ static int psbfb_probe(struct drm_fb_helper *helper,
10160 + container_of(helper, struct psb_fbdev, psb_fb_helper);
10161 + struct drm_device *dev = psb_fbdev->psb_fb_helper.dev;
10162 + struct drm_psb_private *dev_priv = dev->dev_private;
10163 ++ unsigned int fb_size;
10164 + int bytespp;
10165 +
10166 + bytespp = sizes->surface_bpp / 8;
10167 +@@ -495,8 +496,11 @@ static int psbfb_probe(struct drm_fb_helper *helper,
10168 + /* If the mode will not fit in 32bit then switch to 16bit to get
10169 + a console on full resolution. The X mode setting server will
10170 + allocate its own 32bit GEM framebuffer */
10171 +- if (ALIGN(sizes->fb_width * bytespp, 64) * sizes->fb_height >
10172 +- dev_priv->vram_stolen_size) {
10173 ++ fb_size = ALIGN(sizes->surface_width * bytespp, 64) *
10174 ++ sizes->surface_height;
10175 ++ fb_size = ALIGN(fb_size, PAGE_SIZE);
10176 ++
10177 ++ if (fb_size > dev_priv->vram_stolen_size) {
10178 + sizes->surface_bpp = 16;
10179 + sizes->surface_depth = 16;
10180 + }
10181 +diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
10182 +index 658b8dd45b83..3ea311d32fa9 100644
10183 +--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
10184 ++++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
10185 +@@ -307,6 +307,7 @@ err_pm_runtime_put:
10186 + static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc)
10187 + {
10188 + struct drm_device *drm = mtk_crtc->base.dev;
10189 ++ struct drm_crtc *crtc = &mtk_crtc->base;
10190 + int i;
10191 +
10192 + DRM_DEBUG_DRIVER("%s\n", __func__);
10193 +@@ -328,6 +329,13 @@ static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc)
10194 + mtk_disp_mutex_unprepare(mtk_crtc->mutex);
10195 +
10196 + pm_runtime_put(drm->dev);
10197 ++
10198 ++ if (crtc->state->event && !crtc->state->active) {
10199 ++ spin_lock_irq(&crtc->dev->event_lock);
10200 ++ drm_crtc_send_vblank_event(crtc, crtc->state->event);
10201 ++ crtc->state->event = NULL;
10202 ++ spin_unlock_irq(&crtc->dev->event_lock);
10203 ++ }
10204 + }
10205 +
10206 + static void mtk_crtc_ddp_config(struct drm_crtc *crtc)
10207 +diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
10208 +index 99e14e3e0fe4..72532539369f 100644
10209 +--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
10210 ++++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
10211 +@@ -158,7 +158,7 @@ nouveau_fence_wait_uevent_handler(struct nvif_notify *notify)
10212 +
10213 + fence = list_entry(fctx->pending.next, typeof(*fence), head);
10214 + chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
10215 +- if (nouveau_fence_update(fence->channel, fctx))
10216 ++ if (nouveau_fence_update(chan, fctx))
10217 + ret = NVIF_NOTIFY_DROP;
10218 + }
10219 + spin_unlock_irqrestore(&fctx->lock, flags);
10220 +diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c
10221 +index 0c0310498afd..cd9666583d4b 100644
10222 +--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c
10223 ++++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c
10224 +@@ -73,6 +73,8 @@ nv50_disp_chan_mthd(struct nv50_disp_chan *chan, int debug)
10225 +
10226 + if (debug > subdev->debug)
10227 + return;
10228 ++ if (!mthd)
10229 ++ return;
10230 +
10231 + for (i = 0; (list = mthd->data[i].mthd) != NULL; i++) {
10232 + u32 base = chan->head * mthd->addr;
10233 +diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
10234 +index de8b806b88fd..7618b2eb4fdf 100644
10235 +--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
10236 ++++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
10237 +@@ -143,23 +143,24 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name,
10238 +
10239 + nent = (fuc.size / sizeof(struct gk20a_fw_av));
10240 +
10241 +- pack = vzalloc((sizeof(*pack) * max_classes) +
10242 +- (sizeof(*init) * (nent + 1)));
10243 ++ pack = vzalloc((sizeof(*pack) * (max_classes + 1)) +
10244 ++ (sizeof(*init) * (nent + max_classes + 1)));
10245 + if (!pack) {
10246 + ret = -ENOMEM;
10247 + goto end;
10248 + }
10249 +
10250 +- init = (void *)(pack + max_classes);
10251 ++ init = (void *)(pack + max_classes + 1);
10252 +
10253 +- for (i = 0; i < nent; i++) {
10254 +- struct gf100_gr_init *ent = &init[i];
10255 ++ for (i = 0; i < nent; i++, init++) {
10256 + struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc.data)[i];
10257 + u32 class = av->addr & 0xffff;
10258 + u32 addr = (av->addr & 0xffff0000) >> 14;
10259 +
10260 + if (prevclass != class) {
10261 +- pack[classidx].init = ent;
10262 ++ if (prevclass) /* Add terminator to the method list. */
10263 ++ init++;
10264 ++ pack[classidx].init = init;
10265 + pack[classidx].type = class;
10266 + prevclass = class;
10267 + if (++classidx >= max_classes) {
10268 +@@ -169,10 +170,10 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name,
10269 + }
10270 + }
10271 +
10272 +- ent->addr = addr;
10273 +- ent->data = av->data;
10274 +- ent->count = 1;
10275 +- ent->pitch = 1;
10276 ++ init->addr = addr;
10277 ++ init->data = av->data;
10278 ++ init->count = 1;
10279 ++ init->pitch = 1;
10280 + }
10281 +
10282 + *ppack = pack;
10283 +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
10284 +index 30491d132d59..fbd10a67c6c6 100644
10285 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
10286 ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
10287 +@@ -108,6 +108,7 @@ gm20b_secboot_new(struct nvkm_device *device, int index,
10288 + struct gm200_secboot *gsb;
10289 + struct nvkm_acr *acr;
10290 +
10291 ++ *psb = NULL;
10292 + acr = acr_r352_new(BIT(NVKM_SECBOOT_FALCON_FECS) |
10293 + BIT(NVKM_SECBOOT_FALCON_PMU));
10294 + if (IS_ERR(acr))
10295 +@@ -116,10 +117,8 @@ gm20b_secboot_new(struct nvkm_device *device, int index,
10296 + acr->optional_falcons = BIT(NVKM_SECBOOT_FALCON_PMU);
10297 +
10298 + gsb = kzalloc(sizeof(*gsb), GFP_KERNEL);
10299 +- if (!gsb) {
10300 +- psb = NULL;
10301 ++ if (!gsb)
10302 + return -ENOMEM;
10303 +- }
10304 + *psb = &gsb->base;
10305 +
10306 + ret = nvkm_secboot_ctor(&gm20b_secboot, acr, device, index, &gsb->base);
10307 +diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
10308 +index 4f94b78cb464..d86110cdf085 100644
10309 +--- a/drivers/gpu/drm/radeon/radeon_display.c
10310 ++++ b/drivers/gpu/drm/radeon/radeon_display.c
10311 +@@ -119,6 +119,8 @@ static void dce5_crtc_load_lut(struct drm_crtc *crtc)
10312 +
10313 + DRM_DEBUG_KMS("%d\n", radeon_crtc->crtc_id);
10314 +
10315 ++ msleep(10);
10316 ++
10317 + WREG32(NI_INPUT_CSC_CONTROL + radeon_crtc->crtc_offset,
10318 + (NI_INPUT_CSC_GRPH_MODE(NI_INPUT_CSC_BYPASS) |
10319 + NI_INPUT_CSC_OVL_MODE(NI_INPUT_CSC_BYPASS)));
10320 +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
10321 +index 36c7b6c839c0..738ad2fc79a2 100644
10322 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
10323 ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
10324 +@@ -210,8 +210,10 @@ int vmw_cmdbuf_res_add(struct vmw_cmdbuf_res_manager *man,
10325 +
10326 + cres->hash.key = user_key | (res_type << 24);
10327 + ret = drm_ht_insert_item(&man->resources, &cres->hash);
10328 +- if (unlikely(ret != 0))
10329 ++ if (unlikely(ret != 0)) {
10330 ++ kfree(cres);
10331 + goto out_invalid_key;
10332 ++ }
10333 +
10334 + cres->state = VMW_CMDBUF_RES_ADD;
10335 + cres->res = vmw_resource_reference(res);
10336 +diff --git a/drivers/hwmon/pmbus/ltc2978.c b/drivers/hwmon/pmbus/ltc2978.c
10337 +index 58b789c28b48..94eea2ac6251 100644
10338 +--- a/drivers/hwmon/pmbus/ltc2978.c
10339 ++++ b/drivers/hwmon/pmbus/ltc2978.c
10340 +@@ -89,8 +89,8 @@ enum chips { ltc2974, ltc2975, ltc2977, ltc2978, ltc2980, ltc3880, ltc3882,
10341 +
10342 + #define LTC_POLL_TIMEOUT 100 /* in milli-seconds */
10343 +
10344 +-#define LTC_NOT_BUSY BIT(5)
10345 +-#define LTC_NOT_PENDING BIT(4)
10346 ++#define LTC_NOT_BUSY BIT(6)
10347 ++#define LTC_NOT_PENDING BIT(5)
10348 +
10349 + /*
10350 + * LTC2978 clears peak data whenever the CLEAR_FAULTS command is executed, which
10351 +diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
10352 +index b127ed60c733..9dde8390da09 100644
10353 +--- a/drivers/ide/cmd64x.c
10354 ++++ b/drivers/ide/cmd64x.c
10355 +@@ -65,6 +65,9 @@ static void cmd64x_program_timings(ide_drive_t *drive, u8 mode)
10356 + struct ide_timing t;
10357 + u8 arttim = 0;
10358 +
10359 ++ if (drive->dn >= ARRAY_SIZE(drwtim_regs))
10360 ++ return;
10361 ++
10362 + ide_timing_compute(drive, mode, &t, T, 0);
10363 +
10364 + /*
10365 +diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c
10366 +index a97affca18ab..0f57d45484d1 100644
10367 +--- a/drivers/ide/serverworks.c
10368 ++++ b/drivers/ide/serverworks.c
10369 +@@ -114,6 +114,9 @@ static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
10370 + struct pci_dev *dev = to_pci_dev(hwif->dev);
10371 + const u8 pio = drive->pio_mode - XFER_PIO_0;
10372 +
10373 ++ if (drive->dn >= ARRAY_SIZE(drive_pci))
10374 ++ return;
10375 ++
10376 + pci_write_config_byte(dev, drive_pci[drive->dn], pio_modes[pio]);
10377 +
10378 + if (svwks_csb_check(dev)) {
10379 +@@ -140,6 +143,9 @@ static void svwks_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
10380 +
10381 + u8 ultra_enable = 0, ultra_timing = 0, dma_timing = 0;
10382 +
10383 ++ if (drive->dn >= ARRAY_SIZE(drive_pci2))
10384 ++ return;
10385 ++
10386 + pci_read_config_byte(dev, (0x56|hwif->channel), &ultra_timing);
10387 + pci_read_config_byte(dev, 0x54, &ultra_enable);
10388 +
10389 +diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
10390 +index a3dd88c57be7..9b8276691329 100644
10391 +--- a/drivers/infiniband/core/security.c
10392 ++++ b/drivers/infiniband/core/security.c
10393 +@@ -338,22 +338,16 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp,
10394 + if (!new_pps)
10395 + return NULL;
10396 +
10397 +- if (qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) {
10398 +- if (!qp_pps) {
10399 +- new_pps->main.port_num = qp_attr->port_num;
10400 +- new_pps->main.pkey_index = qp_attr->pkey_index;
10401 +- } else {
10402 +- new_pps->main.port_num = (qp_attr_mask & IB_QP_PORT) ?
10403 +- qp_attr->port_num :
10404 +- qp_pps->main.port_num;
10405 +-
10406 +- new_pps->main.pkey_index =
10407 +- (qp_attr_mask & IB_QP_PKEY_INDEX) ?
10408 +- qp_attr->pkey_index :
10409 +- qp_pps->main.pkey_index;
10410 +- }
10411 ++ if (qp_attr_mask & IB_QP_PORT)
10412 ++ new_pps->main.port_num =
10413 ++ (qp_pps) ? qp_pps->main.port_num : qp_attr->port_num;
10414 ++ if (qp_attr_mask & IB_QP_PKEY_INDEX)
10415 ++ new_pps->main.pkey_index = (qp_pps) ? qp_pps->main.pkey_index :
10416 ++ qp_attr->pkey_index;
10417 ++ if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT))
10418 + new_pps->main.state = IB_PORT_PKEY_VALID;
10419 +- } else if (qp_pps) {
10420 ++
10421 ++ if (!(qp_attr_mask & (IB_QP_PKEY_INDEX || IB_QP_PORT)) && qp_pps) {
10422 + new_pps->main.port_num = qp_pps->main.port_num;
10423 + new_pps->main.pkey_index = qp_pps->main.pkey_index;
10424 + if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)
10425 +diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
10426 +index 4a0b7c003477..cb5785dda524 100644
10427 +--- a/drivers/infiniband/hw/hfi1/chip.c
10428 ++++ b/drivers/infiniband/hw/hfi1/chip.c
10429 +@@ -1686,6 +1686,14 @@ static u64 access_sw_pio_drain(const struct cntr_entry *entry,
10430 + return dd->verbs_dev.n_piodrain;
10431 + }
10432 +
10433 ++static u64 access_sw_ctx0_seq_drop(const struct cntr_entry *entry,
10434 ++ void *context, int vl, int mode, u64 data)
10435 ++{
10436 ++ struct hfi1_devdata *dd = context;
10437 ++
10438 ++ return dd->ctx0_seq_drop;
10439 ++}
10440 ++
10441 + static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
10442 + void *context, int vl, int mode, u64 data)
10443 + {
10444 +@@ -4246,6 +4254,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
10445 + access_sw_cpu_intr),
10446 + [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL,
10447 + access_sw_cpu_rcv_limit),
10448 ++[C_SW_CTX0_SEQ_DROP] = CNTR_ELEM("SeqDrop0", 0, 0, CNTR_NORMAL,
10449 ++ access_sw_ctx0_seq_drop),
10450 + [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL,
10451 + access_sw_vtx_wait),
10452 + [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
10453 +diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
10454 +index 50b8645d0b87..a88ef2433cea 100644
10455 +--- a/drivers/infiniband/hw/hfi1/chip.h
10456 ++++ b/drivers/infiniband/hw/hfi1/chip.h
10457 +@@ -864,6 +864,7 @@ enum {
10458 + C_DC_PG_STS_TX_MBE_CNT,
10459 + C_SW_CPU_INTR,
10460 + C_SW_CPU_RCV_LIM,
10461 ++ C_SW_CTX0_SEQ_DROP,
10462 + C_SW_VTX_WAIT,
10463 + C_SW_PIO_WAIT,
10464 + C_SW_PIO_DRAIN,
10465 +diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
10466 +index 72c836b826ca..7aa1aabb7a43 100644
10467 +--- a/drivers/infiniband/hw/hfi1/driver.c
10468 ++++ b/drivers/infiniband/hw/hfi1/driver.c
10469 +@@ -710,6 +710,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
10470 + {
10471 + int ret;
10472 +
10473 ++ packet->rcd->dd->ctx0_seq_drop++;
10474 + /* Set up for the next packet */
10475 + packet->rhqoff += packet->rsize;
10476 + if (packet->rhqoff >= packet->maxcnt)
10477 +diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
10478 +index 76861a8b5c1e..b3ab803bf8b1 100644
10479 +--- a/drivers/infiniband/hw/hfi1/file_ops.c
10480 ++++ b/drivers/infiniband/hw/hfi1/file_ops.c
10481 +@@ -195,23 +195,24 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
10482 +
10483 + fd = kzalloc(sizeof(*fd), GFP_KERNEL);
10484 +
10485 +- if (fd) {
10486 +- fd->rec_cpu_num = -1; /* no cpu affinity by default */
10487 +- fd->mm = current->mm;
10488 +- mmgrab(fd->mm);
10489 +- fd->dd = dd;
10490 +- kobject_get(&fd->dd->kobj);
10491 +- fp->private_data = fd;
10492 +- } else {
10493 +- fp->private_data = NULL;
10494 +-
10495 +- if (atomic_dec_and_test(&dd->user_refcount))
10496 +- complete(&dd->user_comp);
10497 +-
10498 +- return -ENOMEM;
10499 +- }
10500 +-
10501 ++ if (!fd || init_srcu_struct(&fd->pq_srcu))
10502 ++ goto nomem;
10503 ++ spin_lock_init(&fd->pq_rcu_lock);
10504 ++ spin_lock_init(&fd->tid_lock);
10505 ++ spin_lock_init(&fd->invalid_lock);
10506 ++ fd->rec_cpu_num = -1; /* no cpu affinity by default */
10507 ++ fd->mm = current->mm;
10508 ++ mmgrab(fd->mm);
10509 ++ fd->dd = dd;
10510 ++ kobject_get(&fd->dd->kobj);
10511 ++ fp->private_data = fd;
10512 + return 0;
10513 ++nomem:
10514 ++ kfree(fd);
10515 ++ fp->private_data = NULL;
10516 ++ if (atomic_dec_and_test(&dd->user_refcount))
10517 ++ complete(&dd->user_comp);
10518 ++ return -ENOMEM;
10519 + }
10520 +
10521 + static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
10522 +@@ -417,21 +418,30 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
10523 + static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
10524 + {
10525 + struct hfi1_filedata *fd = kiocb->ki_filp->private_data;
10526 +- struct hfi1_user_sdma_pkt_q *pq = fd->pq;
10527 ++ struct hfi1_user_sdma_pkt_q *pq;
10528 + struct hfi1_user_sdma_comp_q *cq = fd->cq;
10529 + int done = 0, reqs = 0;
10530 + unsigned long dim = from->nr_segs;
10531 ++ int idx;
10532 +
10533 +- if (!cq || !pq)
10534 ++ idx = srcu_read_lock(&fd->pq_srcu);
10535 ++ pq = srcu_dereference(fd->pq, &fd->pq_srcu);
10536 ++ if (!cq || !pq) {
10537 ++ srcu_read_unlock(&fd->pq_srcu, idx);
10538 + return -EIO;
10539 ++ }
10540 +
10541 +- if (!iter_is_iovec(from) || !dim)
10542 ++ if (!iter_is_iovec(from) || !dim) {
10543 ++ srcu_read_unlock(&fd->pq_srcu, idx);
10544 + return -EINVAL;
10545 ++ }
10546 +
10547 + trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim);
10548 +
10549 +- if (atomic_read(&pq->n_reqs) == pq->n_max_reqs)
10550 ++ if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) {
10551 ++ srcu_read_unlock(&fd->pq_srcu, idx);
10552 + return -ENOSPC;
10553 ++ }
10554 +
10555 + while (dim) {
10556 + int ret;
10557 +@@ -449,6 +459,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
10558 + reqs++;
10559 + }
10560 +
10561 ++ srcu_read_unlock(&fd->pq_srcu, idx);
10562 + return reqs;
10563 + }
10564 +
10565 +@@ -824,6 +835,7 @@ done:
10566 + if (atomic_dec_and_test(&dd->user_refcount))
10567 + complete(&dd->user_comp);
10568 +
10569 ++ cleanup_srcu_struct(&fdata->pq_srcu);
10570 + kfree(fdata);
10571 + return 0;
10572 + }
10573 +diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
10574 +index af550c1767e3..cf9bc95d8039 100644
10575 +--- a/drivers/infiniband/hw/hfi1/hfi.h
10576 ++++ b/drivers/infiniband/hw/hfi1/hfi.h
10577 +@@ -1043,6 +1043,8 @@ struct hfi1_devdata {
10578 +
10579 + char *boardname; /* human readable board info */
10580 +
10581 ++ u64 ctx0_seq_drop;
10582 ++
10583 + /* reset value */
10584 + u64 z_int_counter;
10585 + u64 z_rcv_limit;
10586 +@@ -1353,10 +1355,13 @@ struct mmu_rb_handler;
10587 +
10588 + /* Private data for file operations */
10589 + struct hfi1_filedata {
10590 ++ struct srcu_struct pq_srcu;
10591 + struct hfi1_devdata *dd;
10592 + struct hfi1_ctxtdata *uctxt;
10593 + struct hfi1_user_sdma_comp_q *cq;
10594 +- struct hfi1_user_sdma_pkt_q *pq;
10595 ++ /* update side lock for SRCU */
10596 ++ spinlock_t pq_rcu_lock;
10597 ++ struct hfi1_user_sdma_pkt_q __rcu *pq;
10598 + u16 subctxt;
10599 + /* for cpu affinity; -1 if none */
10600 + int rec_cpu_num;
10601 +diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
10602 +index b38e3808836c..c6d085e1c10d 100644
10603 +--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
10604 ++++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
10605 +@@ -90,9 +90,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
10606 + struct hfi1_devdata *dd = uctxt->dd;
10607 + int ret = 0;
10608 +
10609 +- spin_lock_init(&fd->tid_lock);
10610 +- spin_lock_init(&fd->invalid_lock);
10611 +-
10612 + fd->entry_to_rb = kcalloc(uctxt->expected_count,
10613 + sizeof(struct rb_node *),
10614 + GFP_KERNEL);
10615 +diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
10616 +index 4854a4a453b5..f23d47194c12 100644
10617 +--- a/drivers/infiniband/hw/hfi1/user_sdma.c
10618 ++++ b/drivers/infiniband/hw/hfi1/user_sdma.c
10619 +@@ -179,7 +179,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
10620 + pq = kzalloc(sizeof(*pq), GFP_KERNEL);
10621 + if (!pq)
10622 + return -ENOMEM;
10623 +-
10624 + pq->dd = dd;
10625 + pq->ctxt = uctxt->ctxt;
10626 + pq->subctxt = fd->subctxt;
10627 +@@ -236,7 +235,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
10628 + goto pq_mmu_fail;
10629 + }
10630 +
10631 +- fd->pq = pq;
10632 ++ rcu_assign_pointer(fd->pq, pq);
10633 + fd->cq = cq;
10634 +
10635 + return 0;
10636 +@@ -264,8 +263,14 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
10637 +
10638 + trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt);
10639 +
10640 +- pq = fd->pq;
10641 ++ spin_lock(&fd->pq_rcu_lock);
10642 ++ pq = srcu_dereference_check(fd->pq, &fd->pq_srcu,
10643 ++ lockdep_is_held(&fd->pq_rcu_lock));
10644 + if (pq) {
10645 ++ rcu_assign_pointer(fd->pq, NULL);
10646 ++ spin_unlock(&fd->pq_rcu_lock);
10647 ++ synchronize_srcu(&fd->pq_srcu);
10648 ++ /* at this point there can be no more new requests */
10649 + if (pq->handler)
10650 + hfi1_mmu_rb_unregister(pq->handler);
10651 + iowait_sdma_drain(&pq->busy);
10652 +@@ -277,7 +282,8 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
10653 + kfree(pq->req_in_use);
10654 + kmem_cache_destroy(pq->txreq_cache);
10655 + kfree(pq);
10656 +- fd->pq = NULL;
10657 ++ } else {
10658 ++ spin_unlock(&fd->pq_rcu_lock);
10659 + }
10660 + if (fd->cq) {
10661 + vfree(fd->cq->comps);
10662 +@@ -321,7 +327,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
10663 + {
10664 + int ret = 0, i;
10665 + struct hfi1_ctxtdata *uctxt = fd->uctxt;
10666 +- struct hfi1_user_sdma_pkt_q *pq = fd->pq;
10667 ++ struct hfi1_user_sdma_pkt_q *pq =
10668 ++ srcu_dereference(fd->pq, &fd->pq_srcu);
10669 + struct hfi1_user_sdma_comp_q *cq = fd->cq;
10670 + struct hfi1_devdata *dd = pq->dd;
10671 + unsigned long idx = 0;
10672 +diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
10673 +index d1cc89f6f2e3..46c8a66731e6 100644
10674 +--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
10675 ++++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
10676 +@@ -408,7 +408,7 @@ struct rxe_dev {
10677 + struct list_head pending_mmaps;
10678 +
10679 + spinlock_t mmap_offset_lock; /* guard mmap_offset */
10680 +- int mmap_offset;
10681 ++ u64 mmap_offset;
10682 +
10683 + atomic64_t stats_counters[RXE_NUM_OF_COUNTERS];
10684 +
10685 +diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
10686 +index 9b5691f306a2..ee3f630c9217 100644
10687 +--- a/drivers/infiniband/ulp/isert/ib_isert.c
10688 ++++ b/drivers/infiniband/ulp/isert/ib_isert.c
10689 +@@ -2582,6 +2582,17 @@ isert_wait4logout(struct isert_conn *isert_conn)
10690 + }
10691 + }
10692 +
10693 ++static void
10694 ++isert_wait4cmds(struct iscsi_conn *conn)
10695 ++{
10696 ++ isert_info("iscsi_conn %p\n", conn);
10697 ++
10698 ++ if (conn->sess) {
10699 ++ target_sess_cmd_list_set_waiting(conn->sess->se_sess);
10700 ++ target_wait_for_sess_cmds(conn->sess->se_sess);
10701 ++ }
10702 ++}
10703 ++
10704 + /**
10705 + * isert_put_unsol_pending_cmds() - Drop commands waiting for
10706 + * unsolicitate dataout
10707 +@@ -2629,6 +2640,7 @@ static void isert_wait_conn(struct iscsi_conn *conn)
10708 +
10709 + ib_drain_qp(isert_conn->qp);
10710 + isert_put_unsol_pending_cmds(conn);
10711 ++ isert_wait4cmds(conn);
10712 + isert_wait4logout(isert_conn);
10713 +
10714 + queue_work(isert_release_wq, &isert_conn->release_work);
10715 +diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
10716 +index 111a71190547..5f764e0993a4 100644
10717 +--- a/drivers/input/mouse/synaptics.c
10718 ++++ b/drivers/input/mouse/synaptics.c
10719 +@@ -149,7 +149,6 @@ static const char * const topbuttonpad_pnp_ids[] = {
10720 + "LEN0042", /* Yoga */
10721 + "LEN0045",
10722 + "LEN0047",
10723 +- "LEN0049",
10724 + "LEN2000", /* S540 */
10725 + "LEN2001", /* Edge E431 */
10726 + "LEN2002", /* Edge E531 */
10727 +@@ -169,9 +168,11 @@ static const char * const smbus_pnp_ids[] = {
10728 + /* all of the topbuttonpad_pnp_ids are valid, we just add some extras */
10729 + "LEN0048", /* X1 Carbon 3 */
10730 + "LEN0046", /* X250 */
10731 ++ "LEN0049", /* Yoga 11e */
10732 + "LEN004a", /* W541 */
10733 + "LEN005b", /* P50 */
10734 + "LEN005e", /* T560 */
10735 ++ "LEN006c", /* T470s */
10736 + "LEN0071", /* T480 */
10737 + "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */
10738 + "LEN0073", /* X1 Carbon G5 (Elantech) */
10739 +@@ -182,6 +183,7 @@ static const char * const smbus_pnp_ids[] = {
10740 + "LEN0097", /* X280 -> ALPS trackpoint */
10741 + "LEN009b", /* T580 */
10742 + "LEN200f", /* T450s */
10743 ++ "LEN2044", /* L470 */
10744 + "LEN2054", /* E480 */
10745 + "LEN2055", /* E580 */
10746 + "SYN3052", /* HP EliteBook 840 G4 */
10747 +diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
10748 +index 5bf63f76ddda..4eff5b44640c 100644
10749 +--- a/drivers/input/touchscreen/edt-ft5x06.c
10750 ++++ b/drivers/input/touchscreen/edt-ft5x06.c
10751 +@@ -888,6 +888,7 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client,
10752 + {
10753 + const struct edt_i2c_chip_data *chip_data;
10754 + struct edt_ft5x06_ts_data *tsdata;
10755 ++ u8 buf[2] = { 0xfc, 0x00 };
10756 + struct input_dev *input;
10757 + unsigned long irq_flags;
10758 + int error;
10759 +@@ -957,6 +958,12 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client,
10760 + return error;
10761 + }
10762 +
10763 ++ /*
10764 ++ * Dummy read access. EP0700MLP1 returns bogus data on the first
10765 ++ * register read access and ignores writes.
10766 ++ */
10767 ++ edt_ft5x06_ts_readwrite(tsdata->client, 2, buf, 2, buf);
10768 ++
10769 + edt_ft5x06_ts_set_regs(tsdata);
10770 + edt_ft5x06_ts_get_defaults(&client->dev, tsdata);
10771 + edt_ft5x06_ts_get_parameters(tsdata);
10772 +diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
10773 +index 09eb258a9a7d..29feafa8007f 100644
10774 +--- a/drivers/iommu/arm-smmu-v3.c
10775 ++++ b/drivers/iommu/arm-smmu-v3.c
10776 +@@ -1145,7 +1145,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
10777 + }
10778 +
10779 + arm_smmu_sync_ste_for_sid(smmu, sid);
10780 +- dst[0] = cpu_to_le64(val);
10781 ++ /* See comment in arm_smmu_write_ctx_desc() */
10782 ++ WRITE_ONCE(dst[0], cpu_to_le64(val));
10783 + arm_smmu_sync_ste_for_sid(smmu, sid);
10784 +
10785 + /* It's likely that we'll want to use the new STE soon */
10786 +diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
10787 +index c8a587d034b0..b08002851e06 100644
10788 +--- a/drivers/iommu/qcom_iommu.c
10789 ++++ b/drivers/iommu/qcom_iommu.c
10790 +@@ -327,21 +327,19 @@ static void qcom_iommu_domain_free(struct iommu_domain *domain)
10791 + {
10792 + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
10793 +
10794 +- if (WARN_ON(qcom_domain->iommu)) /* forgot to detach? */
10795 +- return;
10796 +-
10797 + iommu_put_dma_cookie(domain);
10798 +
10799 +- /* NOTE: unmap can be called after client device is powered off,
10800 +- * for example, with GPUs or anything involving dma-buf. So we
10801 +- * cannot rely on the device_link. Make sure the IOMMU is on to
10802 +- * avoid unclocked accesses in the TLB inv path:
10803 +- */
10804 +- pm_runtime_get_sync(qcom_domain->iommu->dev);
10805 +-
10806 +- free_io_pgtable_ops(qcom_domain->pgtbl_ops);
10807 +-
10808 +- pm_runtime_put_sync(qcom_domain->iommu->dev);
10809 ++ if (qcom_domain->iommu) {
10810 ++ /*
10811 ++ * NOTE: unmap can be called after client device is powered
10812 ++ * off, for example, with GPUs or anything involving dma-buf.
10813 ++ * So we cannot rely on the device_link. Make sure the IOMMU
10814 ++ * is on to avoid unclocked accesses in the TLB inv path:
10815 ++ */
10816 ++ pm_runtime_get_sync(qcom_domain->iommu->dev);
10817 ++ free_io_pgtable_ops(qcom_domain->pgtbl_ops);
10818 ++ pm_runtime_put_sync(qcom_domain->iommu->dev);
10819 ++ }
10820 +
10821 + kfree(qcom_domain);
10822 + }
10823 +@@ -386,7 +384,7 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de
10824 + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
10825 + unsigned i;
10826 +
10827 +- if (!qcom_domain->iommu)
10828 ++ if (WARN_ON(!qcom_domain->iommu))
10829 + return;
10830 +
10831 + pm_runtime_get_sync(qcom_iommu->dev);
10832 +@@ -397,8 +395,6 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de
10833 + iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
10834 + }
10835 + pm_runtime_put_sync(qcom_iommu->dev);
10836 +-
10837 +- qcom_domain->iommu = NULL;
10838 + }
10839 +
10840 + static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova,
10841 +diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
10842 +index 52238e6bed39..799df1e598db 100644
10843 +--- a/drivers/irqchip/irq-gic-v3-its.c
10844 ++++ b/drivers/irqchip/irq-gic-v3-its.c
10845 +@@ -527,7 +527,7 @@ static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd,
10846 + struct its_cmd_desc *desc)
10847 + {
10848 + its_encode_cmd(cmd, GITS_CMD_INVALL);
10849 +- its_encode_collection(cmd, desc->its_mapc_cmd.col->col_id);
10850 ++ its_encode_collection(cmd, desc->its_invall_cmd.col->col_id);
10851 +
10852 + its_fixup_cmd(cmd);
10853 +
10854 +diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
10855 +index 3d7374655587..730b3c1cf7f6 100644
10856 +--- a/drivers/irqchip/irq-gic-v3.c
10857 ++++ b/drivers/irqchip/irq-gic-v3.c
10858 +@@ -1253,6 +1253,7 @@ static struct
10859 + struct redist_region *redist_regs;
10860 + u32 nr_redist_regions;
10861 + bool single_redist;
10862 ++ int enabled_rdists;
10863 + u32 maint_irq;
10864 + int maint_irq_mode;
10865 + phys_addr_t vcpu_base;
10866 +@@ -1347,8 +1348,10 @@ static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header,
10867 + * If GICC is enabled and has valid gicr base address, then it means
10868 + * GICR base is presented via GICC
10869 + */
10870 +- if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address)
10871 ++ if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) {
10872 ++ acpi_data.enabled_rdists++;
10873 + return 0;
10874 ++ }
10875 +
10876 + /*
10877 + * It's perfectly valid firmware can pass disabled GICC entry, driver
10878 +@@ -1378,8 +1381,10 @@ static int __init gic_acpi_count_gicr_regions(void)
10879 +
10880 + count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
10881 + gic_acpi_match_gicc, 0);
10882 +- if (count > 0)
10883 ++ if (count > 0) {
10884 + acpi_data.single_redist = true;
10885 ++ count = acpi_data.enabled_rdists;
10886 ++ }
10887 +
10888 + return count;
10889 + }
10890 +diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
10891 +index 98b6e1d4b1a6..f7fdbf5d183b 100644
10892 +--- a/drivers/irqchip/irq-mbigen.c
10893 ++++ b/drivers/irqchip/irq-mbigen.c
10894 +@@ -381,6 +381,7 @@ static struct platform_driver mbigen_platform_driver = {
10895 + .name = "Hisilicon MBIGEN-V2",
10896 + .of_match_table = mbigen_of_match,
10897 + .acpi_match_table = ACPI_PTR(mbigen_acpi_match),
10898 ++ .suppress_bind_attrs = true,
10899 + },
10900 + .probe = mbigen_device_probe,
10901 + };
10902 +diff --git a/drivers/leds/leds-pca963x.c b/drivers/leds/leds-pca963x.c
10903 +index 3bf9a1271819..88c7313cf869 100644
10904 +--- a/drivers/leds/leds-pca963x.c
10905 ++++ b/drivers/leds/leds-pca963x.c
10906 +@@ -43,6 +43,8 @@
10907 + #define PCA963X_LED_PWM 0x2 /* Controlled through PWM */
10908 + #define PCA963X_LED_GRP_PWM 0x3 /* Controlled through PWM/GRPPWM */
10909 +
10910 ++#define PCA963X_MODE2_OUTDRV 0x04 /* Open-drain or totem pole */
10911 ++#define PCA963X_MODE2_INVRT 0x10 /* Normal or inverted direction */
10912 + #define PCA963X_MODE2_DMBLNK 0x20 /* Enable blinking */
10913 +
10914 + #define PCA963X_MODE1 0x00
10915 +@@ -462,12 +464,12 @@ static int pca963x_probe(struct i2c_client *client,
10916 + PCA963X_MODE2);
10917 + /* Configure output: open-drain or totem pole (push-pull) */
10918 + if (pdata->outdrv == PCA963X_OPEN_DRAIN)
10919 +- mode2 |= 0x01;
10920 ++ mode2 &= ~PCA963X_MODE2_OUTDRV;
10921 + else
10922 +- mode2 |= 0x05;
10923 ++ mode2 |= PCA963X_MODE2_OUTDRV;
10924 + /* Configure direction: normal or inverted */
10925 + if (pdata->dir == PCA963X_INVERTED)
10926 +- mode2 |= 0x10;
10927 ++ mode2 |= PCA963X_MODE2_INVRT;
10928 + i2c_smbus_write_byte_data(pca963x->chip->client, PCA963X_MODE2,
10929 + mode2);
10930 + }
10931 +diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
10932 +index 8d1964b472e7..0bfde500af19 100644
10933 +--- a/drivers/md/bcache/bset.h
10934 ++++ b/drivers/md/bcache/bset.h
10935 +@@ -381,7 +381,8 @@ void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *);
10936 +
10937 + /* Bkey utility code */
10938 +
10939 +-#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys)
10940 ++#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, \
10941 ++ (unsigned int)(i)->keys)
10942 +
10943 + static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx)
10944 + {
10945 +diff --git a/drivers/media/i2c/mt9v032.c b/drivers/media/i2c/mt9v032.c
10946 +index 8a430640c85d..1a20d0d558d3 100644
10947 +--- a/drivers/media/i2c/mt9v032.c
10948 ++++ b/drivers/media/i2c/mt9v032.c
10949 +@@ -423,10 +423,12 @@ static int mt9v032_enum_mbus_code(struct v4l2_subdev *subdev,
10950 + struct v4l2_subdev_pad_config *cfg,
10951 + struct v4l2_subdev_mbus_code_enum *code)
10952 + {
10953 ++ struct mt9v032 *mt9v032 = to_mt9v032(subdev);
10954 ++
10955 + if (code->index > 0)
10956 + return -EINVAL;
10957 +
10958 +- code->code = MEDIA_BUS_FMT_SGRBG10_1X10;
10959 ++ code->code = mt9v032->format.code;
10960 + return 0;
10961 + }
10962 +
10963 +@@ -434,7 +436,11 @@ static int mt9v032_enum_frame_size(struct v4l2_subdev *subdev,
10964 + struct v4l2_subdev_pad_config *cfg,
10965 + struct v4l2_subdev_frame_size_enum *fse)
10966 + {
10967 +- if (fse->index >= 3 || fse->code != MEDIA_BUS_FMT_SGRBG10_1X10)
10968 ++ struct mt9v032 *mt9v032 = to_mt9v032(subdev);
10969 ++
10970 ++ if (fse->index >= 3)
10971 ++ return -EINVAL;
10972 ++ if (mt9v032->format.code != fse->code)
10973 + return -EINVAL;
10974 +
10975 + fse->min_width = MT9V032_WINDOW_WIDTH_DEF / (1 << fse->index);
10976 +diff --git a/drivers/media/platform/sti/bdisp/bdisp-hw.c b/drivers/media/platform/sti/bdisp/bdisp-hw.c
10977 +index b7892f3efd98..5c4c3f0c57be 100644
10978 +--- a/drivers/media/platform/sti/bdisp/bdisp-hw.c
10979 ++++ b/drivers/media/platform/sti/bdisp/bdisp-hw.c
10980 +@@ -14,8 +14,8 @@
10981 + #define MAX_SRC_WIDTH 2048
10982 +
10983 + /* Reset & boot poll config */
10984 +-#define POLL_RST_MAX 50
10985 +-#define POLL_RST_DELAY_MS 20
10986 ++#define POLL_RST_MAX 500
10987 ++#define POLL_RST_DELAY_MS 2
10988 +
10989 + enum bdisp_target_plan {
10990 + BDISP_RGB,
10991 +@@ -382,7 +382,7 @@ int bdisp_hw_reset(struct bdisp_dev *bdisp)
10992 + for (i = 0; i < POLL_RST_MAX; i++) {
10993 + if (readl(bdisp->regs + BLT_STA1) & BLT_STA1_IDLE)
10994 + break;
10995 +- msleep(POLL_RST_DELAY_MS);
10996 ++ udelay(POLL_RST_DELAY_MS * 1000);
10997 + }
10998 + if (i == POLL_RST_MAX)
10999 + dev_err(bdisp->dev, "Reset timeout\n");
11000 +diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
11001 +index 19f374b180fc..52a3b32390a9 100644
11002 +--- a/drivers/net/ethernet/cisco/enic/enic_main.c
11003 ++++ b/drivers/net/ethernet/cisco/enic/enic_main.c
11004 +@@ -1972,10 +1972,10 @@ static int enic_stop(struct net_device *netdev)
11005 + napi_disable(&enic->napi[i]);
11006 +
11007 + netif_carrier_off(netdev);
11008 +- netif_tx_disable(netdev);
11009 + if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX)
11010 + for (i = 0; i < enic->wq_count; i++)
11011 + napi_disable(&enic->napi[enic_cq_wq(enic, i)]);
11012 ++ netif_tx_disable(netdev);
11013 +
11014 + if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic))
11015 + enic_dev_del_station_addr(enic);
11016 +diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
11017 +index 27d0e3b9833c..e4a2c74a9b47 100644
11018 +--- a/drivers/net/ethernet/freescale/gianfar.c
11019 ++++ b/drivers/net/ethernet/freescale/gianfar.c
11020 +@@ -2685,13 +2685,17 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
11021 + skb_dirtytx = tx_queue->skb_dirtytx;
11022 +
11023 + while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) {
11024 ++ bool do_tstamp;
11025 ++
11026 ++ do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
11027 ++ priv->hwts_tx_en;
11028 +
11029 + frags = skb_shinfo(skb)->nr_frags;
11030 +
11031 + /* When time stamping, one additional TxBD must be freed.
11032 + * Also, we need to dma_unmap_single() the TxPAL.
11033 + */
11034 +- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
11035 ++ if (unlikely(do_tstamp))
11036 + nr_txbds = frags + 2;
11037 + else
11038 + nr_txbds = frags + 1;
11039 +@@ -2705,7 +2709,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
11040 + (lstatus & BD_LENGTH_MASK))
11041 + break;
11042 +
11043 +- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
11044 ++ if (unlikely(do_tstamp)) {
11045 + next = next_txbd(bdp, base, tx_ring_size);
11046 + buflen = be16_to_cpu(next->length) +
11047 + GMAC_FCB_LEN + GMAC_TXPAL_LEN;
11048 +@@ -2715,7 +2719,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
11049 + dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr),
11050 + buflen, DMA_TO_DEVICE);
11051 +
11052 +- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
11053 ++ if (unlikely(do_tstamp)) {
11054 + struct skb_shared_hwtstamps shhwtstamps;
11055 + u64 *ns = (u64 *)(((uintptr_t)skb->data + 0x10) &
11056 + ~0x7UL);
11057 +diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
11058 +index 51e6846da72b..3c04f3d5de2d 100644
11059 +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
11060 ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
11061 +@@ -225,7 +225,7 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled,
11062 + start_again:
11063 + err = devlink_dpipe_entry_ctx_prepare(dump_ctx);
11064 + if (err)
11065 +- return err;
11066 ++ goto err_ctx_prepare;
11067 + j = 0;
11068 + for (; i < rif_count; i++) {
11069 + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
11070 +@@ -257,6 +257,7 @@ start_again:
11071 + return 0;
11072 + err_entry_append:
11073 + err_entry_get:
11074 ++err_ctx_prepare:
11075 + rtnl_unlock();
11076 + devlink_dpipe_entry_clear(&entry);
11077 + return err;
11078 +diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
11079 +index 571a1ff8f81f..6a26cef62193 100644
11080 +--- a/drivers/net/wan/fsl_ucc_hdlc.c
11081 ++++ b/drivers/net/wan/fsl_ucc_hdlc.c
11082 +@@ -240,6 +240,11 @@ static int uhdlc_init(struct ucc_hdlc_private *priv)
11083 + ret = -ENOMEM;
11084 + goto free_riptr;
11085 + }
11086 ++ if (riptr != (u16)riptr || tiptr != (u16)tiptr) {
11087 ++ dev_err(priv->dev, "MURAM allocation out of addressable range\n");
11088 ++ ret = -ENOMEM;
11089 ++ goto free_tiptr;
11090 ++ }
11091 +
11092 + /* Set RIPTR, TIPTR */
11093 + iowrite16be(riptr, &priv->ucc_pram->riptr);
11094 +diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
11095 +index 6a505c26a3e7..a269ed63d90f 100644
11096 +--- a/drivers/net/wan/ixp4xx_hss.c
11097 ++++ b/drivers/net/wan/ixp4xx_hss.c
11098 +@@ -261,7 +261,7 @@ struct port {
11099 + struct hss_plat_info *plat;
11100 + buffer_t *rx_buff_tab[RX_DESCS], *tx_buff_tab[TX_DESCS];
11101 + struct desc *desc_tab; /* coherent */
11102 +- u32 desc_tab_phys;
11103 ++ dma_addr_t desc_tab_phys;
11104 + unsigned int id;
11105 + unsigned int clock_type, clock_rate, loopback;
11106 + unsigned int initialized, carrier;
11107 +@@ -861,7 +861,7 @@ static int hss_hdlc_xmit(struct sk_buff *skb, struct net_device *dev)
11108 + dev->stats.tx_dropped++;
11109 + return NETDEV_TX_OK;
11110 + }
11111 +- memcpy_swab32(mem, (u32 *)((int)skb->data & ~3), bytes / 4);
11112 ++ memcpy_swab32(mem, (u32 *)((uintptr_t)skb->data & ~3), bytes / 4);
11113 + dev_kfree_skb(skb);
11114 + #endif
11115 +
11116 +diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c
11117 +index f1e3dad57629..f435bd0f8b5b 100644
11118 +--- a/drivers/net/wireless/broadcom/b43legacy/main.c
11119 ++++ b/drivers/net/wireless/broadcom/b43legacy/main.c
11120 +@@ -1304,8 +1304,9 @@ static void handle_irq_ucode_debug(struct b43legacy_wldev *dev)
11121 + }
11122 +
11123 + /* Interrupt handler bottom-half */
11124 +-static void b43legacy_interrupt_tasklet(struct b43legacy_wldev *dev)
11125 ++static void b43legacy_interrupt_tasklet(unsigned long data)
11126 + {
11127 ++ struct b43legacy_wldev *dev = (struct b43legacy_wldev *)data;
11128 + u32 reason;
11129 + u32 dma_reason[ARRAY_SIZE(dev->dma_reason)];
11130 + u32 merged_dma_reason = 0;
11131 +@@ -3775,7 +3776,7 @@ static int b43legacy_one_core_attach(struct ssb_device *dev,
11132 + b43legacy_set_status(wldev, B43legacy_STAT_UNINIT);
11133 + wldev->bad_frames_preempt = modparam_bad_frames_preempt;
11134 + tasklet_init(&wldev->isr_tasklet,
11135 +- (void (*)(unsigned long))b43legacy_interrupt_tasklet,
11136 ++ b43legacy_interrupt_tasklet,
11137 + (unsigned long)wldev);
11138 + if (modparam_pio)
11139 + wldev->__using_pio = true;
11140 +diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
11141 +index 4c28b04ea605..d198a8780b96 100644
11142 +--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
11143 ++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
11144 +@@ -1932,6 +1932,7 @@ static uint brcmf_sdio_readframes(struct brcmf_sdio *bus, uint maxframes)
11145 + BRCMF_SDIO_FT_NORMAL)) {
11146 + rd->len = 0;
11147 + brcmu_pkt_buf_free_skb(pkt);
11148 ++ continue;
11149 + }
11150 + bus->sdcnt.rx_readahead_cnt++;
11151 + if (rd->len != roundup(rd_new.len, 16)) {
11152 +diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
11153 +index 19c442cb93e4..8fbdd7d4fd0c 100644
11154 +--- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c
11155 ++++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
11156 +@@ -3220,8 +3220,9 @@ static void ipw2100_tx_send_data(struct ipw2100_priv *priv)
11157 + }
11158 + }
11159 +
11160 +-static void ipw2100_irq_tasklet(struct ipw2100_priv *priv)
11161 ++static void ipw2100_irq_tasklet(unsigned long data)
11162 + {
11163 ++ struct ipw2100_priv *priv = (struct ipw2100_priv *)data;
11164 + struct net_device *dev = priv->net_dev;
11165 + unsigned long flags;
11166 + u32 inta, tmp;
11167 +@@ -6027,7 +6028,7 @@ static void ipw2100_rf_kill(struct work_struct *work)
11168 + spin_unlock_irqrestore(&priv->low_lock, flags);
11169 + }
11170 +
11171 +-static void ipw2100_irq_tasklet(struct ipw2100_priv *priv);
11172 ++static void ipw2100_irq_tasklet(unsigned long data);
11173 +
11174 + static const struct net_device_ops ipw2100_netdev_ops = {
11175 + .ndo_open = ipw2100_open,
11176 +@@ -6157,7 +6158,7 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev,
11177 + INIT_DELAYED_WORK(&priv->rf_kill, ipw2100_rf_kill);
11178 + INIT_DELAYED_WORK(&priv->scan_event, ipw2100_scan_event);
11179 +
11180 +- tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long))
11181 ++ tasklet_init(&priv->irq_tasklet,
11182 + ipw2100_irq_tasklet, (unsigned long)priv);
11183 +
11184 + /* NOTE: We do not start the deferred work for status checks yet */
11185 +diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
11186 +index 8da87496cb58..2d0734ab3f74 100644
11187 +--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c
11188 ++++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
11189 +@@ -1966,8 +1966,9 @@ static void notify_wx_assoc_event(struct ipw_priv *priv)
11190 + wireless_send_event(priv->net_dev, SIOCGIWAP, &wrqu, NULL);
11191 + }
11192 +
11193 +-static void ipw_irq_tasklet(struct ipw_priv *priv)
11194 ++static void ipw_irq_tasklet(unsigned long data)
11195 + {
11196 ++ struct ipw_priv *priv = (struct ipw_priv *)data;
11197 + u32 inta, inta_mask, handled = 0;
11198 + unsigned long flags;
11199 + int rc = 0;
11200 +@@ -10702,7 +10703,7 @@ static int ipw_setup_deferred_work(struct ipw_priv *priv)
11201 + INIT_WORK(&priv->qos_activate, ipw_bg_qos_activate);
11202 + #endif /* CONFIG_IPW2200_QOS */
11203 +
11204 +- tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long))
11205 ++ tasklet_init(&priv->irq_tasklet,
11206 + ipw_irq_tasklet, (unsigned long)priv);
11207 +
11208 + return ret;
11209 +diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
11210 +index 329f3a63dadd..0fb81151a132 100644
11211 +--- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c
11212 ++++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
11213 +@@ -1399,8 +1399,9 @@ il3945_dump_nic_error_log(struct il_priv *il)
11214 + }
11215 +
11216 + static void
11217 +-il3945_irq_tasklet(struct il_priv *il)
11218 ++il3945_irq_tasklet(unsigned long data)
11219 + {
11220 ++ struct il_priv *il = (struct il_priv *)data;
11221 + u32 inta, handled = 0;
11222 + u32 inta_fh;
11223 + unsigned long flags;
11224 +@@ -3432,7 +3433,7 @@ il3945_setup_deferred_work(struct il_priv *il)
11225 + setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il);
11226 +
11227 + tasklet_init(&il->irq_tasklet,
11228 +- (void (*)(unsigned long))il3945_irq_tasklet,
11229 ++ il3945_irq_tasklet,
11230 + (unsigned long)il);
11231 + }
11232 +
11233 +diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
11234 +index de9b6522c43f..665e82effb03 100644
11235 +--- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c
11236 ++++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
11237 +@@ -4363,8 +4363,9 @@ il4965_synchronize_irq(struct il_priv *il)
11238 + }
11239 +
11240 + static void
11241 +-il4965_irq_tasklet(struct il_priv *il)
11242 ++il4965_irq_tasklet(unsigned long data)
11243 + {
11244 ++ struct il_priv *il = (struct il_priv *)data;
11245 + u32 inta, handled = 0;
11246 + u32 inta_fh;
11247 + unsigned long flags;
11248 +@@ -6264,7 +6265,7 @@ il4965_setup_deferred_work(struct il_priv *il)
11249 + setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il);
11250 +
11251 + tasklet_init(&il->irq_tasklet,
11252 +- (void (*)(unsigned long))il4965_irq_tasklet,
11253 ++ il4965_irq_tasklet,
11254 + (unsigned long)il);
11255 + }
11256 +
11257 +diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c
11258 +index 8d5acda92a9b..6e6b124f0d5e 100644
11259 +--- a/drivers/net/wireless/intel/iwlegacy/common.c
11260 ++++ b/drivers/net/wireless/intel/iwlegacy/common.c
11261 +@@ -717,7 +717,7 @@ il_eeprom_init(struct il_priv *il)
11262 + u32 gp = _il_rd(il, CSR_EEPROM_GP);
11263 + int sz;
11264 + int ret;
11265 +- u16 addr;
11266 ++ int addr;
11267 +
11268 + /* allocate eeprom */
11269 + sz = il->cfg->eeprom_size;
11270 +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
11271 +index 1232f63278eb..319103f4b432 100644
11272 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
11273 ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
11274 +@@ -739,7 +739,8 @@ static struct thermal_zone_device_ops tzone_ops = {
11275 + static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
11276 + {
11277 + int i;
11278 +- char name[] = "iwlwifi";
11279 ++ char name[16];
11280 ++ static atomic_t counter = ATOMIC_INIT(0);
11281 +
11282 + if (!iwl_mvm_is_tt_in_fw(mvm)) {
11283 + mvm->tz_device.tzone = NULL;
11284 +@@ -749,6 +750,7 @@ static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
11285 +
11286 + BUILD_BUG_ON(ARRAY_SIZE(name) >= THERMAL_NAME_LENGTH);
11287 +
11288 ++ sprintf(name, "iwlwifi_%u", atomic_inc_return(&counter) & 0xFF);
11289 + mvm->tz_device.tzone = thermal_zone_device_register(name,
11290 + IWL_MAX_DTS_TRIPS,
11291 + IWL_WRITABLE_TRIPS_MSK,
11292 +diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c
11293 +index 1a8d8db80b05..486ca1ee306e 100644
11294 +--- a/drivers/net/wireless/intersil/hostap/hostap_ap.c
11295 ++++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c
11296 +@@ -2568,7 +2568,7 @@ static int prism2_hostapd_add_sta(struct ap_data *ap,
11297 + sta->supported_rates[0] = 2;
11298 + if (sta->tx_supp_rates & WLAN_RATE_2M)
11299 + sta->supported_rates[1] = 4;
11300 +- if (sta->tx_supp_rates & WLAN_RATE_5M5)
11301 ++ if (sta->tx_supp_rates & WLAN_RATE_5M5)
11302 + sta->supported_rates[2] = 11;
11303 + if (sta->tx_supp_rates & WLAN_RATE_11M)
11304 + sta->supported_rates[3] = 22;
11305 +diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
11306 +index 95015d74b1c0..5a64674a5c8d 100644
11307 +--- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
11308 ++++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
11309 +@@ -1364,7 +1364,8 @@ static int ezusb_init(struct hermes *hw)
11310 + int retval;
11311 +
11312 + BUG_ON(in_interrupt());
11313 +- BUG_ON(!upriv);
11314 ++ if (!upriv)
11315 ++ return -EINVAL;
11316 +
11317 + upriv->reply_count = 0;
11318 + /* Write the MAGIC number on the simulated registers to keep
11319 +diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c
11320 +index 457a0f725c8a..ab74f3155854 100644
11321 +--- a/drivers/net/wireless/realtek/rtlwifi/pci.c
11322 ++++ b/drivers/net/wireless/realtek/rtlwifi/pci.c
11323 +@@ -1091,13 +1091,15 @@ done:
11324 + return ret;
11325 + }
11326 +
11327 +-static void _rtl_pci_irq_tasklet(struct ieee80211_hw *hw)
11328 ++static void _rtl_pci_irq_tasklet(unsigned long data)
11329 + {
11330 ++ struct ieee80211_hw *hw = (struct ieee80211_hw *)data;
11331 + _rtl_pci_tx_chk_waitq(hw);
11332 + }
11333 +
11334 +-static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw)
11335 ++static void _rtl_pci_prepare_bcn_tasklet(unsigned long data)
11336 + {
11337 ++ struct ieee80211_hw *hw = (struct ieee80211_hw *)data;
11338 + struct rtl_priv *rtlpriv = rtl_priv(hw);
11339 + struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
11340 + struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
11341 +@@ -1223,10 +1225,10 @@ static void _rtl_pci_init_struct(struct ieee80211_hw *hw,
11342 +
11343 + /*task */
11344 + tasklet_init(&rtlpriv->works.irq_tasklet,
11345 +- (void (*)(unsigned long))_rtl_pci_irq_tasklet,
11346 ++ _rtl_pci_irq_tasklet,
11347 + (unsigned long)hw);
11348 + tasklet_init(&rtlpriv->works.irq_prepare_bcn_tasklet,
11349 +- (void (*)(unsigned long))_rtl_pci_prepare_bcn_tasklet,
11350 ++ _rtl_pci_prepare_bcn_tasklet,
11351 + (unsigned long)hw);
11352 + INIT_WORK(&rtlpriv->works.lps_change_work,
11353 + rtl_lps_change_work_callback);
11354 +diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c
11355 +index 60ae382f50da..06bb226c62ef 100644
11356 +--- a/drivers/nfc/port100.c
11357 ++++ b/drivers/nfc/port100.c
11358 +@@ -574,7 +574,7 @@ static void port100_tx_update_payload_len(void *_frame, int len)
11359 + {
11360 + struct port100_frame *frame = _frame;
11361 +
11362 +- frame->datalen = cpu_to_le16(le16_to_cpu(frame->datalen) + len);
11363 ++ le16_add_cpu(&frame->datalen, len);
11364 + }
11365 +
11366 + static bool port100_rx_frame_is_valid(void *_frame)
11367 +diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
11368 +index 0fd8e164339c..0dc646c1bc3d 100644
11369 +--- a/drivers/pci/iov.c
11370 ++++ b/drivers/pci/iov.c
11371 +@@ -179,6 +179,7 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset)
11372 + failed2:
11373 + sysfs_remove_link(&dev->dev.kobj, buf);
11374 + failed1:
11375 ++ pci_stop_and_remove_bus_device(virtfn);
11376 + pci_dev_put(dev);
11377 + pci_stop_and_remove_bus_device(virtfn);
11378 + failed0:
11379 +diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
11380 +index 9df5d29d708d..4fb3e44f9133 100644
11381 +--- a/drivers/pinctrl/intel/pinctrl-baytrail.c
11382 ++++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
11383 +@@ -958,7 +958,13 @@ static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned int offset)
11384 +
11385 + raw_spin_lock_irqsave(&byt_lock, flags);
11386 + value = readl(reg);
11387 +- value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL);
11388 ++
11389 ++ /* Do not clear direct-irq enabled IRQs (from gpio_disable_free) */
11390 ++ if (value & BYT_DIRECT_IRQ_EN)
11391 ++ /* nothing to do */ ;
11392 ++ else
11393 ++ value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL);
11394 ++
11395 + writel(value, reg);
11396 + raw_spin_unlock_irqrestore(&byt_lock, flags);
11397 + }
11398 +diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7264.c b/drivers/pinctrl/sh-pfc/pfc-sh7264.c
11399 +index e1c34e19222e..3ddb9565ed80 100644
11400 +--- a/drivers/pinctrl/sh-pfc/pfc-sh7264.c
11401 ++++ b/drivers/pinctrl/sh-pfc/pfc-sh7264.c
11402 +@@ -500,17 +500,15 @@ enum {
11403 + SD_WP_MARK, SD_CLK_MARK, SD_CMD_MARK,
11404 + CRX0_MARK, CRX1_MARK,
11405 + CTX0_MARK, CTX1_MARK,
11406 ++ CRX0_CRX1_MARK, CTX0_CTX1_MARK,
11407 +
11408 + PWM1A_MARK, PWM1B_MARK, PWM1C_MARK, PWM1D_MARK,
11409 + PWM1E_MARK, PWM1F_MARK, PWM1G_MARK, PWM1H_MARK,
11410 + PWM2A_MARK, PWM2B_MARK, PWM2C_MARK, PWM2D_MARK,
11411 + PWM2E_MARK, PWM2F_MARK, PWM2G_MARK, PWM2H_MARK,
11412 + IERXD_MARK, IETXD_MARK,
11413 +- CRX0_CRX1_MARK,
11414 + WDTOVF_MARK,
11415 +
11416 +- CRX0X1_MARK,
11417 +-
11418 + /* DMAC */
11419 + TEND0_MARK, DACK0_MARK, DREQ0_MARK,
11420 + TEND1_MARK, DACK1_MARK, DREQ1_MARK,
11421 +@@ -998,12 +996,12 @@ static const u16 pinmux_data[] = {
11422 +
11423 + PINMUX_DATA(PJ3_DATA, PJ3MD_00),
11424 + PINMUX_DATA(CRX1_MARK, PJ3MD_01),
11425 +- PINMUX_DATA(CRX0X1_MARK, PJ3MD_10),
11426 ++ PINMUX_DATA(CRX0_CRX1_MARK, PJ3MD_10),
11427 + PINMUX_DATA(IRQ1_PJ_MARK, PJ3MD_11),
11428 +
11429 + PINMUX_DATA(PJ2_DATA, PJ2MD_000),
11430 + PINMUX_DATA(CTX1_MARK, PJ2MD_001),
11431 +- PINMUX_DATA(CRX0_CRX1_MARK, PJ2MD_010),
11432 ++ PINMUX_DATA(CTX0_CTX1_MARK, PJ2MD_010),
11433 + PINMUX_DATA(CS2_MARK, PJ2MD_011),
11434 + PINMUX_DATA(SCK0_MARK, PJ2MD_100),
11435 + PINMUX_DATA(LCD_M_DISP_MARK, PJ2MD_101),
11436 +@@ -1248,6 +1246,7 @@ static const struct pinmux_func pinmux_func_gpios[] = {
11437 + GPIO_FN(CTX1),
11438 + GPIO_FN(CRX1),
11439 + GPIO_FN(CTX0),
11440 ++ GPIO_FN(CTX0_CTX1),
11441 + GPIO_FN(CRX0),
11442 + GPIO_FN(CRX0_CRX1),
11443 +
11444 +diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7269.c b/drivers/pinctrl/sh-pfc/pfc-sh7269.c
11445 +index cfdb4fc177c3..3df0c0d139d0 100644
11446 +--- a/drivers/pinctrl/sh-pfc/pfc-sh7269.c
11447 ++++ b/drivers/pinctrl/sh-pfc/pfc-sh7269.c
11448 +@@ -740,13 +740,12 @@ enum {
11449 + CRX0_MARK, CTX0_MARK,
11450 + CRX1_MARK, CTX1_MARK,
11451 + CRX2_MARK, CTX2_MARK,
11452 +- CRX0_CRX1_MARK,
11453 +- CRX0_CRX1_CRX2_MARK,
11454 +- CTX0CTX1CTX2_MARK,
11455 ++ CRX0_CRX1_MARK, CTX0_CTX1_MARK,
11456 ++ CRX0_CRX1_CRX2_MARK, CTX0_CTX1_CTX2_MARK,
11457 + CRX1_PJ22_MARK, CTX1_PJ23_MARK,
11458 + CRX2_PJ20_MARK, CTX2_PJ21_MARK,
11459 +- CRX0CRX1_PJ22_MARK,
11460 +- CRX0CRX1CRX2_PJ20_MARK,
11461 ++ CRX0_CRX1_PJ22_MARK, CTX0_CTX1_PJ23_MARK,
11462 ++ CRX0_CRX1_CRX2_PJ20_MARK, CTX0_CTX1_CTX2_PJ21_MARK,
11463 +
11464 + /* VDC */
11465 + DV_CLK_MARK,
11466 +@@ -824,6 +823,7 @@ static const u16 pinmux_data[] = {
11467 + PINMUX_DATA(CS3_MARK, PC8MD_001),
11468 + PINMUX_DATA(TXD7_MARK, PC8MD_010),
11469 + PINMUX_DATA(CTX1_MARK, PC8MD_011),
11470 ++ PINMUX_DATA(CTX0_CTX1_MARK, PC8MD_100),
11471 +
11472 + PINMUX_DATA(PC7_DATA, PC7MD_000),
11473 + PINMUX_DATA(CKE_MARK, PC7MD_001),
11474 +@@ -836,11 +836,12 @@ static const u16 pinmux_data[] = {
11475 + PINMUX_DATA(CAS_MARK, PC6MD_001),
11476 + PINMUX_DATA(SCK7_MARK, PC6MD_010),
11477 + PINMUX_DATA(CTX0_MARK, PC6MD_011),
11478 ++ PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC6MD_100),
11479 +
11480 + PINMUX_DATA(PC5_DATA, PC5MD_000),
11481 + PINMUX_DATA(RAS_MARK, PC5MD_001),
11482 + PINMUX_DATA(CRX0_MARK, PC5MD_011),
11483 +- PINMUX_DATA(CTX0CTX1CTX2_MARK, PC5MD_100),
11484 ++ PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC5MD_100),
11485 + PINMUX_DATA(IRQ0_PC_MARK, PC5MD_101),
11486 +
11487 + PINMUX_DATA(PC4_DATA, PC4MD_00),
11488 +@@ -1292,30 +1293,32 @@ static const u16 pinmux_data[] = {
11489 + PINMUX_DATA(LCD_DATA23_PJ23_MARK, PJ23MD_010),
11490 + PINMUX_DATA(LCD_TCON6_MARK, PJ23MD_011),
11491 + PINMUX_DATA(IRQ3_PJ_MARK, PJ23MD_100),
11492 +- PINMUX_DATA(CTX1_MARK, PJ23MD_101),
11493 ++ PINMUX_DATA(CTX1_PJ23_MARK, PJ23MD_101),
11494 ++ PINMUX_DATA(CTX0_CTX1_PJ23_MARK, PJ23MD_110),
11495 +
11496 + PINMUX_DATA(PJ22_DATA, PJ22MD_000),
11497 + PINMUX_DATA(DV_DATA22_MARK, PJ22MD_001),
11498 + PINMUX_DATA(LCD_DATA22_PJ22_MARK, PJ22MD_010),
11499 + PINMUX_DATA(LCD_TCON5_MARK, PJ22MD_011),
11500 + PINMUX_DATA(IRQ2_PJ_MARK, PJ22MD_100),
11501 +- PINMUX_DATA(CRX1_MARK, PJ22MD_101),
11502 +- PINMUX_DATA(CRX0_CRX1_MARK, PJ22MD_110),
11503 ++ PINMUX_DATA(CRX1_PJ22_MARK, PJ22MD_101),
11504 ++ PINMUX_DATA(CRX0_CRX1_PJ22_MARK, PJ22MD_110),
11505 +
11506 + PINMUX_DATA(PJ21_DATA, PJ21MD_000),
11507 + PINMUX_DATA(DV_DATA21_MARK, PJ21MD_001),
11508 + PINMUX_DATA(LCD_DATA21_PJ21_MARK, PJ21MD_010),
11509 + PINMUX_DATA(LCD_TCON4_MARK, PJ21MD_011),
11510 + PINMUX_DATA(IRQ1_PJ_MARK, PJ21MD_100),
11511 +- PINMUX_DATA(CTX2_MARK, PJ21MD_101),
11512 ++ PINMUX_DATA(CTX2_PJ21_MARK, PJ21MD_101),
11513 ++ PINMUX_DATA(CTX0_CTX1_CTX2_PJ21_MARK, PJ21MD_110),
11514 +
11515 + PINMUX_DATA(PJ20_DATA, PJ20MD_000),
11516 + PINMUX_DATA(DV_DATA20_MARK, PJ20MD_001),
11517 + PINMUX_DATA(LCD_DATA20_PJ20_MARK, PJ20MD_010),
11518 + PINMUX_DATA(LCD_TCON3_MARK, PJ20MD_011),
11519 + PINMUX_DATA(IRQ0_PJ_MARK, PJ20MD_100),
11520 +- PINMUX_DATA(CRX2_MARK, PJ20MD_101),
11521 +- PINMUX_DATA(CRX0CRX1CRX2_PJ20_MARK, PJ20MD_110),
11522 ++ PINMUX_DATA(CRX2_PJ20_MARK, PJ20MD_101),
11523 ++ PINMUX_DATA(CRX0_CRX1_CRX2_PJ20_MARK, PJ20MD_110),
11524 +
11525 + PINMUX_DATA(PJ19_DATA, PJ19MD_000),
11526 + PINMUX_DATA(DV_DATA19_MARK, PJ19MD_001),
11527 +@@ -1666,12 +1669,24 @@ static const struct pinmux_func pinmux_func_gpios[] = {
11528 + GPIO_FN(WDTOVF),
11529 +
11530 + /* CAN */
11531 ++ GPIO_FN(CTX2),
11532 ++ GPIO_FN(CRX2),
11533 + GPIO_FN(CTX1),
11534 + GPIO_FN(CRX1),
11535 + GPIO_FN(CTX0),
11536 + GPIO_FN(CRX0),
11537 ++ GPIO_FN(CTX0_CTX1),
11538 + GPIO_FN(CRX0_CRX1),
11539 ++ GPIO_FN(CTX0_CTX1_CTX2),
11540 + GPIO_FN(CRX0_CRX1_CRX2),
11541 ++ GPIO_FN(CTX2_PJ21),
11542 ++ GPIO_FN(CRX2_PJ20),
11543 ++ GPIO_FN(CTX1_PJ23),
11544 ++ GPIO_FN(CRX1_PJ22),
11545 ++ GPIO_FN(CTX0_CTX1_PJ23),
11546 ++ GPIO_FN(CRX0_CRX1_PJ22),
11547 ++ GPIO_FN(CTX0_CTX1_CTX2_PJ21),
11548 ++ GPIO_FN(CRX0_CRX1_CRX2_PJ20),
11549 +
11550 + /* DMAC */
11551 + GPIO_FN(TEND0),
11552 +diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c
11553 +index 5ad42f33e70c..2e15acf13893 100644
11554 +--- a/drivers/pwm/pwm-omap-dmtimer.c
11555 ++++ b/drivers/pwm/pwm-omap-dmtimer.c
11556 +@@ -337,6 +337,11 @@ static int pwm_omap_dmtimer_probe(struct platform_device *pdev)
11557 + static int pwm_omap_dmtimer_remove(struct platform_device *pdev)
11558 + {
11559 + struct pwm_omap_dmtimer_chip *omap = platform_get_drvdata(pdev);
11560 ++ int ret;
11561 ++
11562 ++ ret = pwmchip_remove(&omap->chip);
11563 ++ if (ret)
11564 ++ return ret;
11565 +
11566 + if (pm_runtime_active(&omap->dm_timer_pdev->dev))
11567 + omap->pdata->stop(omap->dm_timer);
11568 +@@ -345,7 +350,7 @@ static int pwm_omap_dmtimer_remove(struct platform_device *pdev)
11569 +
11570 + mutex_destroy(&omap->mutex);
11571 +
11572 +- return pwmchip_remove(&omap->chip);
11573 ++ return 0;
11574 + }
11575 +
11576 + static const struct of_device_id pwm_omap_dmtimer_of_match[] = {
11577 +diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c
11578 +index 567f5e2771c4..e1e5dfcb16f3 100644
11579 +--- a/drivers/pwm/pwm-pca9685.c
11580 ++++ b/drivers/pwm/pwm-pca9685.c
11581 +@@ -170,13 +170,9 @@ static void pca9685_pwm_gpio_set(struct gpio_chip *gpio, unsigned int offset,
11582 + static void pca9685_pwm_gpio_free(struct gpio_chip *gpio, unsigned int offset)
11583 + {
11584 + struct pca9685 *pca = gpiochip_get_data(gpio);
11585 +- struct pwm_device *pwm;
11586 +
11587 + pca9685_pwm_gpio_set(gpio, offset, 0);
11588 + pm_runtime_put(pca->chip.dev);
11589 +- mutex_lock(&pca->lock);
11590 +- pwm = &pca->chip.pwms[offset];
11591 +- mutex_unlock(&pca->lock);
11592 + }
11593 +
11594 + static int pca9685_pwm_gpio_get_direction(struct gpio_chip *chip,
11595 +diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c
11596 +index 213b68743cc8..92498ac50303 100644
11597 +--- a/drivers/regulator/rk808-regulator.c
11598 ++++ b/drivers/regulator/rk808-regulator.c
11599 +@@ -714,7 +714,7 @@ static int rk808_regulator_dt_parse_pdata(struct device *dev,
11600 + }
11601 +
11602 + if (!pdata->dvs_gpio[i]) {
11603 +- dev_warn(dev, "there is no dvs%d gpio\n", i);
11604 ++ dev_info(dev, "there is no dvs%d gpio\n", i);
11605 + continue;
11606 + }
11607 +
11608 +diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
11609 +index eab14b414bf0..cc733b89560a 100644
11610 +--- a/drivers/remoteproc/remoteproc_core.c
11611 ++++ b/drivers/remoteproc/remoteproc_core.c
11612 +@@ -1620,7 +1620,7 @@ static int __init remoteproc_init(void)
11613 +
11614 + return 0;
11615 + }
11616 +-module_init(remoteproc_init);
11617 ++subsys_initcall(remoteproc_init);
11618 +
11619 + static void __exit remoteproc_exit(void)
11620 + {
11621 +diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c
11622 +index 381846164003..fdbb0a3dc9b4 100644
11623 +--- a/drivers/scsi/aic7xxx/aic7xxx_core.c
11624 ++++ b/drivers/scsi/aic7xxx/aic7xxx_core.c
11625 +@@ -2321,7 +2321,7 @@ ahc_find_syncrate(struct ahc_softc *ahc, u_int *period,
11626 + * At some speeds, we only support
11627 + * ST transfers.
11628 + */
11629 +- if ((syncrate->sxfr_u2 & ST_SXFR) != 0)
11630 ++ if ((syncrate->sxfr_u2 & ST_SXFR) != 0)
11631 + *ppr_options &= ~MSG_EXT_PPR_DT_REQ;
11632 + break;
11633 + }
11634 +diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
11635 +index 7e3a77d3c6f0..e3ca16043f9a 100644
11636 +--- a/drivers/scsi/iscsi_tcp.c
11637 ++++ b/drivers/scsi/iscsi_tcp.c
11638 +@@ -890,6 +890,10 @@ free_host:
11639 + static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session)
11640 + {
11641 + struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
11642 ++ struct iscsi_session *session = cls_session->dd_data;
11643 ++
11644 ++ if (WARN_ON_ONCE(session->leadconn))
11645 ++ return;
11646 +
11647 + iscsi_tcp_r2tpool_free(cls_session->dd_data);
11648 + iscsi_session_teardown(cls_session);
11649 +diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
11650 +index 5f9d4dbc4a98..d4024015f859 100644
11651 +--- a/drivers/scsi/qla2xxx/qla_os.c
11652 ++++ b/drivers/scsi/qla2xxx/qla_os.c
11653 +@@ -3178,6 +3178,10 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
11654 + base_vha->mgmt_svr_loop_id, host->sg_tablesize);
11655 +
11656 + ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM, 0);
11657 ++ if (unlikely(!ha->wq)) {
11658 ++ ret = -ENOMEM;
11659 ++ goto probe_failed;
11660 ++ }
11661 +
11662 + if (ha->mqenable) {
11663 + bool mq = false;
11664 +diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
11665 +index 95d71e301a53..aecb563a2b4e 100644
11666 +--- a/drivers/scsi/scsi_transport_iscsi.c
11667 ++++ b/drivers/scsi/scsi_transport_iscsi.c
11668 +@@ -2945,6 +2945,24 @@ iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev)
11669 + return err;
11670 + }
11671 +
11672 ++static int iscsi_session_has_conns(int sid)
11673 ++{
11674 ++ struct iscsi_cls_conn *conn;
11675 ++ unsigned long flags;
11676 ++ int found = 0;
11677 ++
11678 ++ spin_lock_irqsave(&connlock, flags);
11679 ++ list_for_each_entry(conn, &connlist, conn_list) {
11680 ++ if (iscsi_conn_get_sid(conn) == sid) {
11681 ++ found = 1;
11682 ++ break;
11683 ++ }
11684 ++ }
11685 ++ spin_unlock_irqrestore(&connlock, flags);
11686 ++
11687 ++ return found;
11688 ++}
11689 ++
11690 + static int
11691 + iscsi_set_iface_params(struct iscsi_transport *transport,
11692 + struct iscsi_uevent *ev, uint32_t len)
11693 +@@ -3522,10 +3540,12 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
11694 + break;
11695 + case ISCSI_UEVENT_DESTROY_SESSION:
11696 + session = iscsi_session_lookup(ev->u.d_session.sid);
11697 +- if (session)
11698 +- transport->destroy_session(session);
11699 +- else
11700 ++ if (!session)
11701 + err = -EINVAL;
11702 ++ else if (iscsi_session_has_conns(ev->u.d_session.sid))
11703 ++ err = -EBUSY;
11704 ++ else
11705 ++ transport->destroy_session(session);
11706 + break;
11707 + case ISCSI_UEVENT_UNBIND_SESSION:
11708 + session = iscsi_session_lookup(ev->u.d_session.sid);
11709 +diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
11710 +index ce40de334f11..c35045324695 100644
11711 +--- a/drivers/scsi/ufs/ufshcd.c
11712 ++++ b/drivers/scsi/ufs/ufshcd.c
11713 +@@ -4580,7 +4580,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
11714 + break;
11715 + } /* end of switch */
11716 +
11717 +- if (host_byte(result) != DID_OK)
11718 ++ if ((host_byte(result) != DID_OK) && !hba->silence_err_logs)
11719 + ufshcd_print_trs(hba, 1 << lrbp->task_tag, true);
11720 + return result;
11721 + }
11722 +@@ -5109,8 +5109,8 @@ static void ufshcd_err_handler(struct work_struct *work)
11723 +
11724 + /*
11725 + * if host reset is required then skip clearing the pending
11726 +- * transfers forcefully because they will automatically get
11727 +- * cleared after link startup.
11728 ++ * transfers forcefully because they will get cleared during
11729 ++ * host reset and restore
11730 + */
11731 + if (needs_reset)
11732 + goto skip_pending_xfer_clear;
11733 +@@ -5749,9 +5749,15 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
11734 + int err;
11735 + unsigned long flags;
11736 +
11737 +- /* Reset the host controller */
11738 ++ /*
11739 ++ * Stop the host controller and complete the requests
11740 ++ * cleared by h/w
11741 ++ */
11742 + spin_lock_irqsave(hba->host->host_lock, flags);
11743 + ufshcd_hba_stop(hba, false);
11744 ++ hba->silence_err_logs = true;
11745 ++ ufshcd_complete_requests(hba);
11746 ++ hba->silence_err_logs = false;
11747 + spin_unlock_irqrestore(hba->host->host_lock, flags);
11748 +
11749 + /* scale up clocks to max frequency before full reinitialization */
11750 +@@ -5785,22 +5791,12 @@ out:
11751 + static int ufshcd_reset_and_restore(struct ufs_hba *hba)
11752 + {
11753 + int err = 0;
11754 +- unsigned long flags;
11755 + int retries = MAX_HOST_RESET_RETRIES;
11756 +
11757 + do {
11758 + err = ufshcd_host_reset_and_restore(hba);
11759 + } while (err && --retries);
11760 +
11761 +- /*
11762 +- * After reset the door-bell might be cleared, complete
11763 +- * outstanding requests in s/w here.
11764 +- */
11765 +- spin_lock_irqsave(hba->host->host_lock, flags);
11766 +- ufshcd_transfer_req_compl(hba);
11767 +- ufshcd_tmc_handler(hba);
11768 +- spin_unlock_irqrestore(hba->host->host_lock, flags);
11769 +-
11770 + return err;
11771 + }
11772 +
11773 +diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
11774 +index cdc8bd05f7df..4aac4d86f57b 100644
11775 +--- a/drivers/scsi/ufs/ufshcd.h
11776 ++++ b/drivers/scsi/ufs/ufshcd.h
11777 +@@ -485,6 +485,7 @@ struct ufs_stats {
11778 + * @uic_error: UFS interconnect layer error status
11779 + * @saved_err: sticky error mask
11780 + * @saved_uic_err: sticky UIC error mask
11781 ++ * @silence_err_logs: flag to silence error logs
11782 + * @dev_cmd: ufs device management command information
11783 + * @last_dme_cmd_tstamp: time stamp of the last completed DME command
11784 + * @auto_bkops_enabled: to track whether bkops is enabled in device
11785 +@@ -621,6 +622,7 @@ struct ufs_hba {
11786 + u32 saved_err;
11787 + u32 saved_uic_err;
11788 + struct ufs_stats ufs_stats;
11789 ++ bool silence_err_logs;
11790 +
11791 + /* Device management request data */
11792 + struct ufs_dev_cmd dev_cmd;
11793 +diff --git a/drivers/soc/tegra/fuse/tegra-apbmisc.c b/drivers/soc/tegra/fuse/tegra-apbmisc.c
11794 +index 5b18f6ffa45c..cd61c883c19f 100644
11795 +--- a/drivers/soc/tegra/fuse/tegra-apbmisc.c
11796 ++++ b/drivers/soc/tegra/fuse/tegra-apbmisc.c
11797 +@@ -134,7 +134,7 @@ void __init tegra_init_apbmisc(void)
11798 + apbmisc.flags = IORESOURCE_MEM;
11799 +
11800 + /* strapping options */
11801 +- if (tegra_get_chip_id() == TEGRA124) {
11802 ++ if (of_machine_is_compatible("nvidia,tegra124")) {
11803 + straps.start = 0x7000e864;
11804 + straps.end = 0x7000e867;
11805 + } else {
11806 +diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
11807 +index 4151bb44a410..9481c0b23386 100644
11808 +--- a/drivers/staging/android/ashmem.c
11809 ++++ b/drivers/staging/android/ashmem.c
11810 +@@ -361,8 +361,23 @@ static inline vm_flags_t calc_vm_may_flags(unsigned long prot)
11811 + _calc_vm_trans(prot, PROT_EXEC, VM_MAYEXEC);
11812 + }
11813 +
11814 ++static int ashmem_vmfile_mmap(struct file *file, struct vm_area_struct *vma)
11815 ++{
11816 ++ /* do not allow to mmap ashmem backing shmem file directly */
11817 ++ return -EPERM;
11818 ++}
11819 ++
11820 ++static unsigned long
11821 ++ashmem_vmfile_get_unmapped_area(struct file *file, unsigned long addr,
11822 ++ unsigned long len, unsigned long pgoff,
11823 ++ unsigned long flags)
11824 ++{
11825 ++ return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
11826 ++}
11827 ++
11828 + static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
11829 + {
11830 ++ static struct file_operations vmfile_fops;
11831 + struct ashmem_area *asma = file->private_data;
11832 + int ret = 0;
11833 +
11834 +@@ -403,6 +418,19 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
11835 + }
11836 + vmfile->f_mode |= FMODE_LSEEK;
11837 + asma->file = vmfile;
11838 ++ /*
11839 ++ * override mmap operation of the vmfile so that it can't be
11840 ++ * remapped which would lead to creation of a new vma with no
11841 ++ * asma permission checks. Have to override get_unmapped_area
11842 ++ * as well to prevent VM_BUG_ON check for f_ops modification.
11843 ++ */
11844 ++ if (!vmfile_fops.mmap) {
11845 ++ vmfile_fops = *vmfile->f_op;
11846 ++ vmfile_fops.mmap = ashmem_vmfile_mmap;
11847 ++ vmfile_fops.get_unmapped_area =
11848 ++ ashmem_vmfile_get_unmapped_area;
11849 ++ }
11850 ++ vmfile->f_op = &vmfile_fops;
11851 + }
11852 + get_file(asma->file);
11853 +
11854 +diff --git a/drivers/staging/greybus/audio_manager.c b/drivers/staging/greybus/audio_manager.c
11855 +index aa6508b44fab..ed7c32542cb3 100644
11856 +--- a/drivers/staging/greybus/audio_manager.c
11857 ++++ b/drivers/staging/greybus/audio_manager.c
11858 +@@ -90,8 +90,8 @@ void gb_audio_manager_remove_all(void)
11859 +
11860 + list_for_each_entry_safe(module, next, &modules_list, list) {
11861 + list_del(&module->list);
11862 +- kobject_put(&module->kobj);
11863 + ida_simple_remove(&module_id, module->id);
11864 ++ kobject_put(&module->kobj);
11865 + }
11866 +
11867 + is_empty = list_empty(&modules_list);
11868 +diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
11869 +index 446310775e90..184fc05a0f8b 100644
11870 +--- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
11871 ++++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
11872 +@@ -2051,7 +2051,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p)
11873 + struct ieee_param *param;
11874 + uint ret = 0;
11875 +
11876 +- if (p->length < sizeof(struct ieee_param) || !p->pointer) {
11877 ++ if (!p->pointer || p->length != sizeof(struct ieee_param)) {
11878 + ret = -EINVAL;
11879 + goto out;
11880 + }
11881 +@@ -2856,7 +2856,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p)
11882 + goto out;
11883 + }
11884 +
11885 +- if (!p->pointer) {
11886 ++ if (!p->pointer || p->length != sizeof(struct ieee_param)) {
11887 + ret = -EINVAL;
11888 + goto out;
11889 + }
11890 +diff --git a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c
11891 +index d0b317077511..f92f9073c507 100644
11892 +--- a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c
11893 ++++ b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c
11894 +@@ -486,14 +486,13 @@ int rtl8723bs_xmit_thread(void *context)
11895 + s32 ret;
11896 + struct adapter *padapter;
11897 + struct xmit_priv *pxmitpriv;
11898 +- u8 thread_name[20] = "RTWHALXT";
11899 +-
11900 ++ u8 thread_name[20];
11901 +
11902 + ret = _SUCCESS;
11903 + padapter = context;
11904 + pxmitpriv = &padapter->xmitpriv;
11905 +
11906 +- rtw_sprintf(thread_name, 20, "%s-"ADPT_FMT, thread_name, ADPT_ARG(padapter));
11907 ++ rtw_sprintf(thread_name, 20, "RTWHALXT-" ADPT_FMT, ADPT_ARG(padapter));
11908 + thread_enter(thread_name);
11909 +
11910 + DBG_871X("start "FUNC_ADPT_FMT"\n", FUNC_ADPT_ARG(padapter));
11911 +diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
11912 +index 1b61da61690b..d51f6c452972 100644
11913 +--- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
11914 ++++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
11915 +@@ -3495,7 +3495,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p)
11916 +
11917 + /* down(&ieee->wx_sem); */
11918 +
11919 +- if (p->length < sizeof(struct ieee_param) || !p->pointer) {
11920 ++ if (!p->pointer || p->length != sizeof(struct ieee_param)) {
11921 + ret = -EINVAL;
11922 + goto out;
11923 + }
11924 +@@ -4340,7 +4340,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p)
11925 +
11926 +
11927 + /* if (p->length < sizeof(struct ieee_param) || !p->pointer) { */
11928 +- if (!p->pointer) {
11929 ++ if (!p->pointer || p->length != sizeof(*param)) {
11930 + ret = -EINVAL;
11931 + goto out;
11932 + }
11933 +diff --git a/drivers/staging/vt6656/dpc.c b/drivers/staging/vt6656/dpc.c
11934 +index 655f0002f880..7b73fa2f8834 100644
11935 +--- a/drivers/staging/vt6656/dpc.c
11936 ++++ b/drivers/staging/vt6656/dpc.c
11937 +@@ -140,7 +140,7 @@ int vnt_rx_data(struct vnt_private *priv, struct vnt_rcb *ptr_rcb,
11938 +
11939 + vnt_rf_rssi_to_dbm(priv, *rssi, &rx_dbm);
11940 +
11941 +- priv->bb_pre_ed_rssi = (u8)rx_dbm + 1;
11942 ++ priv->bb_pre_ed_rssi = (u8)-rx_dbm + 1;
11943 + priv->current_rssi = priv->bb_pre_ed_rssi;
11944 +
11945 + frame = skb_data + 8;
11946 +diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
11947 +index 37d64acea5e1..fb7bd422e2e1 100644
11948 +--- a/drivers/target/iscsi/iscsi_target.c
11949 ++++ b/drivers/target/iscsi/iscsi_target.c
11950 +@@ -1158,9 +1158,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
11951 + hdr->cmdsn, be32_to_cpu(hdr->data_length), payload_length,
11952 + conn->cid);
11953 +
11954 +- if (target_get_sess_cmd(&cmd->se_cmd, true) < 0)
11955 +- return iscsit_add_reject_cmd(cmd,
11956 +- ISCSI_REASON_WAITING_FOR_LOGOUT, buf);
11957 ++ target_get_sess_cmd(&cmd->se_cmd, true);
11958 +
11959 + cmd->sense_reason = transport_lookup_cmd_lun(&cmd->se_cmd,
11960 + scsilun_to_int(&hdr->lun));
11961 +@@ -2006,9 +2004,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
11962 + conn->sess->se_sess, 0, DMA_NONE,
11963 + TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
11964 +
11965 +- if (target_get_sess_cmd(&cmd->se_cmd, true) < 0)
11966 +- return iscsit_add_reject_cmd(cmd,
11967 +- ISCSI_REASON_WAITING_FOR_LOGOUT, buf);
11968 ++ target_get_sess_cmd(&cmd->se_cmd, true);
11969 +
11970 + /*
11971 + * TASK_REASSIGN for ERL=2 / connection stays inside of
11972 +@@ -4155,6 +4151,9 @@ int iscsit_close_connection(
11973 + iscsit_stop_nopin_response_timer(conn);
11974 + iscsit_stop_nopin_timer(conn);
11975 +
11976 ++ if (conn->conn_transport->iscsit_wait_conn)
11977 ++ conn->conn_transport->iscsit_wait_conn(conn);
11978 ++
11979 + /*
11980 + * During Connection recovery drop unacknowledged out of order
11981 + * commands for this connection, and prepare the other commands
11982 +@@ -4237,11 +4236,6 @@ int iscsit_close_connection(
11983 + * must wait until they have completed.
11984 + */
11985 + iscsit_check_conn_usage_count(conn);
11986 +- target_sess_cmd_list_set_waiting(sess->se_sess);
11987 +- target_wait_for_sess_cmds(sess->se_sess);
11988 +-
11989 +- if (conn->conn_transport->iscsit_wait_conn)
11990 +- conn->conn_transport->iscsit_wait_conn(conn);
11991 +
11992 + ahash_request_free(conn->conn_tx_hash);
11993 + if (conn->conn_rx_hash) {
11994 +diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
11995 +index fe2384b019ec..9cfc65ca173d 100644
11996 +--- a/drivers/thunderbolt/switch.c
11997 ++++ b/drivers/thunderbolt/switch.c
11998 +@@ -240,6 +240,12 @@ static int tb_switch_nvm_read(void *priv, unsigned int offset, void *val,
11999 + return dma_port_flash_read(sw->dma_port, offset, val, bytes);
12000 + }
12001 +
12002 ++static int tb_switch_nvm_no_read(void *priv, unsigned int offset, void *val,
12003 ++ size_t bytes)
12004 ++{
12005 ++ return -EPERM;
12006 ++}
12007 ++
12008 + static int tb_switch_nvm_write(void *priv, unsigned int offset, void *val,
12009 + size_t bytes)
12010 + {
12011 +@@ -285,6 +291,7 @@ static struct nvmem_device *register_nvmem(struct tb_switch *sw, int id,
12012 + config.read_only = true;
12013 + } else {
12014 + config.name = "nvm_non_active";
12015 ++ config.reg_read = tb_switch_nvm_no_read;
12016 + config.reg_write = tb_switch_nvm_write;
12017 + config.root_only = true;
12018 + }
12019 +diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c
12020 +index 69fc6d9ab490..88cf520da739 100644
12021 +--- a/drivers/tty/serdev/serdev-ttyport.c
12022 ++++ b/drivers/tty/serdev/serdev-ttyport.c
12023 +@@ -238,7 +238,6 @@ struct device *serdev_tty_port_register(struct tty_port *port,
12024 + struct device *parent,
12025 + struct tty_driver *drv, int idx)
12026 + {
12027 +- const struct tty_port_client_operations *old_ops;
12028 + struct serdev_controller *ctrl;
12029 + struct serport *serport;
12030 + int ret;
12031 +@@ -257,7 +256,6 @@ struct device *serdev_tty_port_register(struct tty_port *port,
12032 +
12033 + ctrl->ops = &ctrl_ops;
12034 +
12035 +- old_ops = port->client_ops;
12036 + port->client_ops = &client_ops;
12037 + port->client_data = ctrl;
12038 +
12039 +@@ -270,7 +268,7 @@ struct device *serdev_tty_port_register(struct tty_port *port,
12040 +
12041 + err_reset_data:
12042 + port->client_data = NULL;
12043 +- port->client_ops = old_ops;
12044 ++ port->client_ops = &tty_port_default_client_ops;
12045 + serdev_controller_put(ctrl);
12046 +
12047 + return ERR_PTR(ret);
12048 +@@ -285,8 +283,8 @@ int serdev_tty_port_unregister(struct tty_port *port)
12049 + return -ENODEV;
12050 +
12051 + serdev_controller_remove(ctrl);
12052 +- port->client_ops = NULL;
12053 + port->client_data = NULL;
12054 ++ port->client_ops = &tty_port_default_client_ops;
12055 + serdev_controller_put(ctrl);
12056 +
12057 + return 0;
12058 +diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c
12059 +index 33a801353114..0a89df390f24 100644
12060 +--- a/drivers/tty/serial/8250/8250_aspeed_vuart.c
12061 ++++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c
12062 +@@ -256,7 +256,6 @@ static int aspeed_vuart_probe(struct platform_device *pdev)
12063 + port.port.line = rc;
12064 +
12065 + port.port.irq = irq_of_parse_and_map(np, 0);
12066 +- port.port.irqflags = IRQF_SHARED;
12067 + port.port.iotype = UPIO_MEM;
12068 + port.port.type = PORT_16550A;
12069 + port.port.uartclk = clk;
12070 +diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
12071 +index c698ebab6d3b..5017a0f46b82 100644
12072 +--- a/drivers/tty/serial/8250/8250_core.c
12073 ++++ b/drivers/tty/serial/8250/8250_core.c
12074 +@@ -181,7 +181,7 @@ static int serial_link_irq_chain(struct uart_8250_port *up)
12075 + struct hlist_head *h;
12076 + struct hlist_node *n;
12077 + struct irq_info *i;
12078 +- int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0;
12079 ++ int ret;
12080 +
12081 + mutex_lock(&hash_mutex);
12082 +
12083 +@@ -216,9 +216,8 @@ static int serial_link_irq_chain(struct uart_8250_port *up)
12084 + INIT_LIST_HEAD(&up->list);
12085 + i->head = &up->list;
12086 + spin_unlock_irq(&i->lock);
12087 +- irq_flags |= up->port.irqflags;
12088 + ret = request_irq(up->port.irq, serial8250_interrupt,
12089 +- irq_flags, up->port.name, i);
12090 ++ up->port.irqflags, up->port.name, i);
12091 + if (ret < 0)
12092 + serial_do_unlink(i, up);
12093 + }
12094 +diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
12095 +index a73d2bc4b685..90a93c001e16 100644
12096 +--- a/drivers/tty/serial/8250/8250_port.c
12097 ++++ b/drivers/tty/serial/8250/8250_port.c
12098 +@@ -2258,6 +2258,10 @@ int serial8250_do_startup(struct uart_port *port)
12099 + }
12100 + }
12101 +
12102 ++ /* Check if we need to have shared IRQs */
12103 ++ if (port->irq && (up->port.flags & UPF_SHARE_IRQ))
12104 ++ up->port.irqflags |= IRQF_SHARED;
12105 ++
12106 + if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) {
12107 + unsigned char iir1;
12108 + /*
12109 +diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
12110 +index 367ce812743e..a00227d312d3 100644
12111 +--- a/drivers/tty/serial/atmel_serial.c
12112 ++++ b/drivers/tty/serial/atmel_serial.c
12113 +@@ -498,7 +498,8 @@ static void atmel_stop_tx(struct uart_port *port)
12114 + atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask);
12115 +
12116 + if (atmel_uart_is_half_duplex(port))
12117 +- atmel_start_rx(port);
12118 ++ if (!atomic_read(&atmel_port->tasklet_shutdown))
12119 ++ atmel_start_rx(port);
12120 +
12121 + }
12122 +
12123 +diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
12124 +index a81a5be0cf7a..630065b551f5 100644
12125 +--- a/drivers/tty/serial/imx.c
12126 ++++ b/drivers/tty/serial/imx.c
12127 +@@ -80,7 +80,7 @@
12128 + #define UCR1_IDEN (1<<12) /* Idle condition interrupt */
12129 + #define UCR1_ICD_REG(x) (((x) & 3) << 10) /* idle condition detect */
12130 + #define UCR1_RRDYEN (1<<9) /* Recv ready interrupt enable */
12131 +-#define UCR1_RDMAEN (1<<8) /* Recv ready DMA enable */
12132 ++#define UCR1_RXDMAEN (1<<8) /* Recv ready DMA enable */
12133 + #define UCR1_IREN (1<<7) /* Infrared interface enable */
12134 + #define UCR1_TXMPTYEN (1<<6) /* Transimitter empty interrupt enable */
12135 + #define UCR1_RTSDEN (1<<5) /* RTS delta interrupt enable */
12136 +@@ -352,6 +352,30 @@ static void imx_port_rts_auto(struct imx_port *sport, unsigned long *ucr2)
12137 + *ucr2 |= UCR2_CTSC;
12138 + }
12139 +
12140 ++/*
12141 ++ * interrupts disabled on entry
12142 ++ */
12143 ++static void imx_start_rx(struct uart_port *port)
12144 ++{
12145 ++ struct imx_port *sport = (struct imx_port *)port;
12146 ++ unsigned int ucr1, ucr2;
12147 ++
12148 ++ ucr1 = readl(port->membase + UCR1);
12149 ++ ucr2 = readl(port->membase + UCR2);
12150 ++
12151 ++ ucr2 |= UCR2_RXEN;
12152 ++
12153 ++ if (sport->dma_is_enabled) {
12154 ++ ucr1 |= UCR1_RXDMAEN | UCR1_ATDMAEN;
12155 ++ } else {
12156 ++ ucr1 |= UCR1_RRDYEN;
12157 ++ }
12158 ++
12159 ++ /* Write UCR2 first as it includes RXEN */
12160 ++ writel(ucr2, port->membase + UCR2);
12161 ++ writel(ucr1, port->membase + UCR1);
12162 ++}
12163 ++
12164 + /*
12165 + * interrupts disabled on entry
12166 + */
12167 +@@ -378,9 +402,10 @@ static void imx_stop_tx(struct uart_port *port)
12168 + imx_port_rts_active(sport, &temp);
12169 + else
12170 + imx_port_rts_inactive(sport, &temp);
12171 +- temp |= UCR2_RXEN;
12172 + writel(temp, port->membase + UCR2);
12173 +
12174 ++ imx_start_rx(port);
12175 ++
12176 + temp = readl(port->membase + UCR4);
12177 + temp &= ~UCR4_TCEN;
12178 + writel(temp, port->membase + UCR4);
12179 +@@ -393,7 +418,7 @@ static void imx_stop_tx(struct uart_port *port)
12180 + static void imx_stop_rx(struct uart_port *port)
12181 + {
12182 + struct imx_port *sport = (struct imx_port *)port;
12183 +- unsigned long temp;
12184 ++ unsigned long ucr1, ucr2;
12185 +
12186 + if (sport->dma_is_enabled && sport->dma_is_rxing) {
12187 + if (sport->port.suspended) {
12188 +@@ -404,12 +429,18 @@ static void imx_stop_rx(struct uart_port *port)
12189 + }
12190 + }
12191 +
12192 +- temp = readl(sport->port.membase + UCR2);
12193 +- writel(temp & ~UCR2_RXEN, sport->port.membase + UCR2);
12194 ++ ucr1 = readl(sport->port.membase + UCR1);
12195 ++ ucr2 = readl(sport->port.membase + UCR2);
12196 +
12197 +- /* disable the `Receiver Ready Interrrupt` */
12198 +- temp = readl(sport->port.membase + UCR1);
12199 +- writel(temp & ~UCR1_RRDYEN, sport->port.membase + UCR1);
12200 ++ if (sport->dma_is_enabled) {
12201 ++ ucr1 &= ~(UCR1_RXDMAEN | UCR1_ATDMAEN);
12202 ++ } else {
12203 ++ ucr1 &= ~UCR1_RRDYEN;
12204 ++ }
12205 ++ writel(ucr1, port->membase + UCR1);
12206 ++
12207 ++ ucr2 &= ~UCR2_RXEN;
12208 ++ writel(ucr2, port->membase + UCR2);
12209 + }
12210 +
12211 + /*
12212 +@@ -526,7 +557,7 @@ static void imx_dma_tx(struct imx_port *sport)
12213 +
12214 + sport->tx_bytes = uart_circ_chars_pending(xmit);
12215 +
12216 +- if (xmit->tail < xmit->head) {
12217 ++ if (xmit->tail < xmit->head || xmit->head == 0) {
12218 + sport->dma_tx_nents = 1;
12219 + sg_init_one(sgl, xmit->buf + xmit->tail, sport->tx_bytes);
12220 + } else {
12221 +@@ -581,10 +612,11 @@ static void imx_start_tx(struct uart_port *port)
12222 + imx_port_rts_active(sport, &temp);
12223 + else
12224 + imx_port_rts_inactive(sport, &temp);
12225 +- if (!(port->rs485.flags & SER_RS485_RX_DURING_TX))
12226 +- temp &= ~UCR2_RXEN;
12227 + writel(temp, port->membase + UCR2);
12228 +
12229 ++ if (!(port->rs485.flags & SER_RS485_RX_DURING_TX))
12230 ++ imx_stop_rx(port);
12231 ++
12232 + /* enable transmitter and shifter empty irq */
12233 + temp = readl(port->membase + UCR4);
12234 + temp |= UCR4_TCEN;
12235 +@@ -811,14 +843,42 @@ static void imx_mctrl_check(struct imx_port *sport)
12236 + static irqreturn_t imx_int(int irq, void *dev_id)
12237 + {
12238 + struct imx_port *sport = dev_id;
12239 +- unsigned int sts;
12240 +- unsigned int sts2;
12241 ++ unsigned int usr1, usr2, ucr1, ucr2, ucr3, ucr4;
12242 + irqreturn_t ret = IRQ_NONE;
12243 +
12244 +- sts = readl(sport->port.membase + USR1);
12245 +- sts2 = readl(sport->port.membase + USR2);
12246 ++ usr1 = readl(sport->port.membase + USR1);
12247 ++ usr2 = readl(sport->port.membase + USR2);
12248 ++ ucr1 = readl(sport->port.membase + UCR1);
12249 ++ ucr2 = readl(sport->port.membase + UCR2);
12250 ++ ucr3 = readl(sport->port.membase + UCR3);
12251 ++ ucr4 = readl(sport->port.membase + UCR4);
12252 +
12253 +- if (sts & (USR1_RRDY | USR1_AGTIM)) {
12254 ++ /*
12255 ++ * Even if a condition is true that can trigger an irq only handle it if
12256 ++ * the respective irq source is enabled. This prevents some undesired
12257 ++ * actions, for example if a character that sits in the RX FIFO and that
12258 ++ * should be fetched via DMA is tried to be fetched using PIO. Or the
12259 ++ * receiver is currently off and so reading from URXD0 results in an
12260 ++ * exception. So just mask the (raw) status bits for disabled irqs.
12261 ++ */
12262 ++ if ((ucr1 & UCR1_RRDYEN) == 0)
12263 ++ usr1 &= ~USR1_RRDY;
12264 ++ if ((ucr2 & UCR2_ATEN) == 0)
12265 ++ usr1 &= ~USR1_AGTIM;
12266 ++ if ((ucr1 & UCR1_TXMPTYEN) == 0)
12267 ++ usr1 &= ~USR1_TRDY;
12268 ++ if ((ucr4 & UCR4_TCEN) == 0)
12269 ++ usr2 &= ~USR2_TXDC;
12270 ++ if ((ucr3 & UCR3_DTRDEN) == 0)
12271 ++ usr1 &= ~USR1_DTRD;
12272 ++ if ((ucr1 & UCR1_RTSDEN) == 0)
12273 ++ usr1 &= ~USR1_RTSD;
12274 ++ if ((ucr3 & UCR3_AWAKEN) == 0)
12275 ++ usr1 &= ~USR1_AWAKE;
12276 ++ if ((ucr4 & UCR4_OREN) == 0)
12277 ++ usr2 &= ~USR2_ORE;
12278 ++
12279 ++ if (usr1 & (USR1_RRDY | USR1_AGTIM)) {
12280 + if (sport->dma_is_enabled)
12281 + imx_dma_rxint(sport);
12282 + else
12283 +@@ -826,18 +886,15 @@ static irqreturn_t imx_int(int irq, void *dev_id)
12284 + ret = IRQ_HANDLED;
12285 + }
12286 +
12287 +- if ((sts & USR1_TRDY &&
12288 +- readl(sport->port.membase + UCR1) & UCR1_TXMPTYEN) ||
12289 +- (sts2 & USR2_TXDC &&
12290 +- readl(sport->port.membase + UCR4) & UCR4_TCEN)) {
12291 ++ if ((usr1 & USR1_TRDY) || (usr2 & USR2_TXDC)) {
12292 + imx_txint(irq, dev_id);
12293 + ret = IRQ_HANDLED;
12294 + }
12295 +
12296 +- if (sts & USR1_DTRD) {
12297 ++ if (usr1 & USR1_DTRD) {
12298 + unsigned long flags;
12299 +
12300 +- if (sts & USR1_DTRD)
12301 ++ if (usr1 & USR1_DTRD)
12302 + writel(USR1_DTRD, sport->port.membase + USR1);
12303 +
12304 + spin_lock_irqsave(&sport->port.lock, flags);
12305 +@@ -847,17 +904,17 @@ static irqreturn_t imx_int(int irq, void *dev_id)
12306 + ret = IRQ_HANDLED;
12307 + }
12308 +
12309 +- if (sts & USR1_RTSD) {
12310 ++ if (usr1 & USR1_RTSD) {
12311 + imx_rtsint(irq, dev_id);
12312 + ret = IRQ_HANDLED;
12313 + }
12314 +
12315 +- if (sts & USR1_AWAKE) {
12316 ++ if (usr1 & USR1_AWAKE) {
12317 + writel(USR1_AWAKE, sport->port.membase + USR1);
12318 + ret = IRQ_HANDLED;
12319 + }
12320 +
12321 +- if (sts2 & USR2_ORE) {
12322 ++ if (usr2 & USR2_ORE) {
12323 + sport->port.icount.overrun++;
12324 + writel(USR2_ORE, sport->port.membase + USR2);
12325 + ret = IRQ_HANDLED;
12326 +@@ -1206,7 +1263,7 @@ static void imx_enable_dma(struct imx_port *sport)
12327 +
12328 + /* set UCR1 */
12329 + temp = readl(sport->port.membase + UCR1);
12330 +- temp |= UCR1_RDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN;
12331 ++ temp |= UCR1_RXDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN;
12332 + writel(temp, sport->port.membase + UCR1);
12333 +
12334 + temp = readl(sport->port.membase + UCR2);
12335 +@@ -1224,7 +1281,7 @@ static void imx_disable_dma(struct imx_port *sport)
12336 +
12337 + /* clear UCR1 */
12338 + temp = readl(sport->port.membase + UCR1);
12339 +- temp &= ~(UCR1_RDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN);
12340 ++ temp &= ~(UCR1_RXDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN);
12341 + writel(temp, sport->port.membase + UCR1);
12342 +
12343 + /* clear UCR2 */
12344 +@@ -1289,11 +1346,9 @@ static int imx_startup(struct uart_port *port)
12345 + writel(USR1_RTSD | USR1_DTRD, sport->port.membase + USR1);
12346 + writel(USR2_ORE, sport->port.membase + USR2);
12347 +
12348 +- if (sport->dma_is_inited && !sport->dma_is_enabled)
12349 +- imx_enable_dma(sport);
12350 +-
12351 + temp = readl(sport->port.membase + UCR1);
12352 +- temp |= UCR1_RRDYEN | UCR1_UARTEN;
12353 ++ temp &= ~UCR1_RRDYEN;
12354 ++ temp |= UCR1_UARTEN;
12355 + if (sport->have_rtscts)
12356 + temp |= UCR1_RTSDEN;
12357 +
12358 +@@ -1332,14 +1387,13 @@ static int imx_startup(struct uart_port *port)
12359 + */
12360 + imx_enable_ms(&sport->port);
12361 +
12362 +- /*
12363 +- * Start RX DMA immediately instead of waiting for RX FIFO interrupts.
12364 +- * In our iMX53 the average delay for the first reception dropped from
12365 +- * approximately 35000 microseconds to 1000 microseconds.
12366 +- */
12367 +- if (sport->dma_is_enabled) {
12368 +- imx_disable_rx_int(sport);
12369 ++ if (sport->dma_is_inited) {
12370 ++ imx_enable_dma(sport);
12371 + start_rx_dma(sport);
12372 ++ } else {
12373 ++ temp = readl(sport->port.membase + UCR1);
12374 ++ temp |= UCR1_RRDYEN;
12375 ++ writel(temp, sport->port.membase + UCR1);
12376 + }
12377 +
12378 + spin_unlock_irqrestore(&sport->port.lock, flags);
12379 +@@ -1386,7 +1440,8 @@ static void imx_shutdown(struct uart_port *port)
12380 +
12381 + spin_lock_irqsave(&sport->port.lock, flags);
12382 + temp = readl(sport->port.membase + UCR1);
12383 +- temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN);
12384 ++ temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN |
12385 ++ UCR1_RXDMAEN | UCR1_ATDMAEN);
12386 +
12387 + writel(temp, sport->port.membase + UCR1);
12388 + spin_unlock_irqrestore(&sport->port.lock, flags);
12389 +@@ -1659,7 +1714,7 @@ static int imx_poll_init(struct uart_port *port)
12390 + {
12391 + struct imx_port *sport = (struct imx_port *)port;
12392 + unsigned long flags;
12393 +- unsigned long temp;
12394 ++ unsigned long ucr1, ucr2;
12395 + int retval;
12396 +
12397 + retval = clk_prepare_enable(sport->clk_ipg);
12398 +@@ -1673,16 +1728,29 @@ static int imx_poll_init(struct uart_port *port)
12399 +
12400 + spin_lock_irqsave(&sport->port.lock, flags);
12401 +
12402 +- temp = readl(sport->port.membase + UCR1);
12403 ++ /*
12404 ++ * Be careful about the order of enabling bits here. First enable the
12405 ++ * receiver (UARTEN + RXEN) and only then the corresponding irqs.
12406 ++ * This prevents that a character that already sits in the RX fifo is
12407 ++ * triggering an irq but the try to fetch it from there results in an
12408 ++ * exception because UARTEN or RXEN is still off.
12409 ++ */
12410 ++ ucr1 = readl(port->membase + UCR1);
12411 ++ ucr2 = readl(port->membase + UCR2);
12412 ++
12413 + if (is_imx1_uart(sport))
12414 +- temp |= IMX1_UCR1_UARTCLKEN;
12415 +- temp |= UCR1_UARTEN | UCR1_RRDYEN;
12416 +- temp &= ~(UCR1_TXMPTYEN | UCR1_RTSDEN);
12417 +- writel(temp, sport->port.membase + UCR1);
12418 ++ ucr1 |= IMX1_UCR1_UARTCLKEN;
12419 +
12420 +- temp = readl(sport->port.membase + UCR2);
12421 +- temp |= UCR2_RXEN;
12422 +- writel(temp, sport->port.membase + UCR2);
12423 ++ ucr1 |= UCR1_UARTEN;
12424 ++ ucr1 &= ~(UCR1_TXMPTYEN | UCR1_RTSDEN | UCR1_RRDYEN);
12425 ++
12426 ++ ucr2 |= UCR2_RXEN;
12427 ++
12428 ++ writel(ucr1, sport->port.membase + UCR1);
12429 ++ writel(ucr2, sport->port.membase + UCR2);
12430 ++
12431 ++ /* now enable irqs */
12432 ++ writel(ucr1 | UCR1_RRDYEN, sport->port.membase + UCR1);
12433 +
12434 + spin_unlock_irqrestore(&sport->port.lock, flags);
12435 +
12436 +@@ -1742,11 +1810,8 @@ static int imx_rs485_config(struct uart_port *port,
12437 +
12438 + /* Make sure Rx is enabled in case Tx is active with Rx disabled */
12439 + if (!(rs485conf->flags & SER_RS485_ENABLED) ||
12440 +- rs485conf->flags & SER_RS485_RX_DURING_TX) {
12441 +- temp = readl(sport->port.membase + UCR2);
12442 +- temp |= UCR2_RXEN;
12443 +- writel(temp, sport->port.membase + UCR2);
12444 +- }
12445 ++ rs485conf->flags & SER_RS485_RX_DURING_TX)
12446 ++ imx_start_rx(port);
12447 +
12448 + port->rs485 = *rs485conf;
12449 +
12450 +diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
12451 +index 344e8c427c7e..9d68f89a2bf8 100644
12452 +--- a/drivers/tty/synclink_gt.c
12453 ++++ b/drivers/tty/synclink_gt.c
12454 +@@ -1349,10 +1349,10 @@ static void throttle(struct tty_struct * tty)
12455 + DBGINFO(("%s throttle\n", info->device_name));
12456 + if (I_IXOFF(tty))
12457 + send_xchar(tty, STOP_CHAR(tty));
12458 +- if (C_CRTSCTS(tty)) {
12459 ++ if (C_CRTSCTS(tty)) {
12460 + spin_lock_irqsave(&info->lock,flags);
12461 + info->signals &= ~SerialSignal_RTS;
12462 +- set_signals(info);
12463 ++ set_signals(info);
12464 + spin_unlock_irqrestore(&info->lock,flags);
12465 + }
12466 + }
12467 +@@ -1374,10 +1374,10 @@ static void unthrottle(struct tty_struct * tty)
12468 + else
12469 + send_xchar(tty, START_CHAR(tty));
12470 + }
12471 +- if (C_CRTSCTS(tty)) {
12472 ++ if (C_CRTSCTS(tty)) {
12473 + spin_lock_irqsave(&info->lock,flags);
12474 + info->signals |= SerialSignal_RTS;
12475 +- set_signals(info);
12476 ++ set_signals(info);
12477 + spin_unlock_irqrestore(&info->lock,flags);
12478 + }
12479 + }
12480 +@@ -2575,8 +2575,8 @@ static void change_params(struct slgt_info *info)
12481 + info->read_status_mask = IRQ_RXOVER;
12482 + if (I_INPCK(info->port.tty))
12483 + info->read_status_mask |= MASK_PARITY | MASK_FRAMING;
12484 +- if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty))
12485 +- info->read_status_mask |= MASK_BREAK;
12486 ++ if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty))
12487 ++ info->read_status_mask |= MASK_BREAK;
12488 + if (I_IGNPAR(info->port.tty))
12489 + info->ignore_status_mask |= MASK_PARITY | MASK_FRAMING;
12490 + if (I_IGNBRK(info->port.tty)) {
12491 +@@ -3207,7 +3207,7 @@ static int tiocmset(struct tty_struct *tty,
12492 + info->signals &= ~SerialSignal_DTR;
12493 +
12494 + spin_lock_irqsave(&info->lock,flags);
12495 +- set_signals(info);
12496 ++ set_signals(info);
12497 + spin_unlock_irqrestore(&info->lock,flags);
12498 + return 0;
12499 + }
12500 +@@ -3218,7 +3218,7 @@ static int carrier_raised(struct tty_port *port)
12501 + struct slgt_info *info = container_of(port, struct slgt_info, port);
12502 +
12503 + spin_lock_irqsave(&info->lock,flags);
12504 +- get_signals(info);
12505 ++ get_signals(info);
12506 + spin_unlock_irqrestore(&info->lock,flags);
12507 + return (info->signals & SerialSignal_DCD) ? 1 : 0;
12508 + }
12509 +@@ -3233,7 +3233,7 @@ static void dtr_rts(struct tty_port *port, int on)
12510 + info->signals |= SerialSignal_RTS | SerialSignal_DTR;
12511 + else
12512 + info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR);
12513 +- set_signals(info);
12514 ++ set_signals(info);
12515 + spin_unlock_irqrestore(&info->lock,flags);
12516 + }
12517 +
12518 +diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
12519 +index 4fed9e7b281f..3c9e314406b4 100644
12520 +--- a/drivers/tty/synclinkmp.c
12521 ++++ b/drivers/tty/synclinkmp.c
12522 +@@ -1467,10 +1467,10 @@ static void throttle(struct tty_struct * tty)
12523 + if (I_IXOFF(tty))
12524 + send_xchar(tty, STOP_CHAR(tty));
12525 +
12526 +- if (C_CRTSCTS(tty)) {
12527 ++ if (C_CRTSCTS(tty)) {
12528 + spin_lock_irqsave(&info->lock,flags);
12529 + info->serial_signals &= ~SerialSignal_RTS;
12530 +- set_signals(info);
12531 ++ set_signals(info);
12532 + spin_unlock_irqrestore(&info->lock,flags);
12533 + }
12534 + }
12535 +@@ -1496,10 +1496,10 @@ static void unthrottle(struct tty_struct * tty)
12536 + send_xchar(tty, START_CHAR(tty));
12537 + }
12538 +
12539 +- if (C_CRTSCTS(tty)) {
12540 ++ if (C_CRTSCTS(tty)) {
12541 + spin_lock_irqsave(&info->lock,flags);
12542 + info->serial_signals |= SerialSignal_RTS;
12543 +- set_signals(info);
12544 ++ set_signals(info);
12545 + spin_unlock_irqrestore(&info->lock,flags);
12546 + }
12547 + }
12548 +@@ -2484,7 +2484,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status )
12549 + if (status & SerialSignal_CTS) {
12550 + if ( debug_level >= DEBUG_LEVEL_ISR )
12551 + printk("CTS tx start...");
12552 +- info->port.tty->hw_stopped = 0;
12553 ++ info->port.tty->hw_stopped = 0;
12554 + tx_start(info);
12555 + info->pending_bh |= BH_TRANSMIT;
12556 + return;
12557 +@@ -2493,7 +2493,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status )
12558 + if (!(status & SerialSignal_CTS)) {
12559 + if ( debug_level >= DEBUG_LEVEL_ISR )
12560 + printk("CTS tx stop...");
12561 +- info->port.tty->hw_stopped = 1;
12562 ++ info->port.tty->hw_stopped = 1;
12563 + tx_stop(info);
12564 + }
12565 + }
12566 +@@ -2820,8 +2820,8 @@ static void change_params(SLMP_INFO *info)
12567 + info->read_status_mask2 = OVRN;
12568 + if (I_INPCK(info->port.tty))
12569 + info->read_status_mask2 |= PE | FRME;
12570 +- if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty))
12571 +- info->read_status_mask1 |= BRKD;
12572 ++ if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty))
12573 ++ info->read_status_mask1 |= BRKD;
12574 + if (I_IGNPAR(info->port.tty))
12575 + info->ignore_status_mask2 |= PE | FRME;
12576 + if (I_IGNBRK(info->port.tty)) {
12577 +@@ -3191,7 +3191,7 @@ static int tiocmget(struct tty_struct *tty)
12578 + unsigned long flags;
12579 +
12580 + spin_lock_irqsave(&info->lock,flags);
12581 +- get_signals(info);
12582 ++ get_signals(info);
12583 + spin_unlock_irqrestore(&info->lock,flags);
12584 +
12585 + result = ((info->serial_signals & SerialSignal_RTS) ? TIOCM_RTS : 0) |
12586 +@@ -3229,7 +3229,7 @@ static int tiocmset(struct tty_struct *tty,
12587 + info->serial_signals &= ~SerialSignal_DTR;
12588 +
12589 + spin_lock_irqsave(&info->lock,flags);
12590 +- set_signals(info);
12591 ++ set_signals(info);
12592 + spin_unlock_irqrestore(&info->lock,flags);
12593 +
12594 + return 0;
12595 +@@ -3241,7 +3241,7 @@ static int carrier_raised(struct tty_port *port)
12596 + unsigned long flags;
12597 +
12598 + spin_lock_irqsave(&info->lock,flags);
12599 +- get_signals(info);
12600 ++ get_signals(info);
12601 + spin_unlock_irqrestore(&info->lock,flags);
12602 +
12603 + return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
12604 +@@ -3257,7 +3257,7 @@ static void dtr_rts(struct tty_port *port, int on)
12605 + info->serial_signals |= SerialSignal_RTS | SerialSignal_DTR;
12606 + else
12607 + info->serial_signals &= ~(SerialSignal_RTS | SerialSignal_DTR);
12608 +- set_signals(info);
12609 ++ set_signals(info);
12610 + spin_unlock_irqrestore(&info->lock,flags);
12611 + }
12612 +
12613 +diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
12614 +index c93a33701d32..dd12c3b86eb4 100644
12615 +--- a/drivers/tty/tty_port.c
12616 ++++ b/drivers/tty/tty_port.c
12617 +@@ -51,10 +51,11 @@ static void tty_port_default_wakeup(struct tty_port *port)
12618 + }
12619 + }
12620 +
12621 +-static const struct tty_port_client_operations default_client_ops = {
12622 ++const struct tty_port_client_operations tty_port_default_client_ops = {
12623 + .receive_buf = tty_port_default_receive_buf,
12624 + .write_wakeup = tty_port_default_wakeup,
12625 + };
12626 ++EXPORT_SYMBOL_GPL(tty_port_default_client_ops);
12627 +
12628 + void tty_port_init(struct tty_port *port)
12629 + {
12630 +@@ -67,7 +68,7 @@ void tty_port_init(struct tty_port *port)
12631 + spin_lock_init(&port->lock);
12632 + port->close_delay = (50 * HZ) / 100;
12633 + port->closing_wait = (3000 * HZ) / 100;
12634 +- port->client_ops = &default_client_ops;
12635 ++ port->client_ops = &tty_port_default_client_ops;
12636 + kref_init(&port->kref);
12637 + }
12638 + EXPORT_SYMBOL(tty_port_init);
12639 +diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c
12640 +index 7a4c8022c023..b157f17d2be2 100644
12641 +--- a/drivers/tty/vt/selection.c
12642 ++++ b/drivers/tty/vt/selection.c
12643 +@@ -27,6 +27,8 @@
12644 + #include <linux/console.h>
12645 + #include <linux/tty_flip.h>
12646 +
12647 ++#include <linux/sched/signal.h>
12648 ++
12649 + /* Don't take this from <ctype.h>: 011-015 on the screen aren't spaces */
12650 + #define isspace(c) ((c) == ' ')
12651 +
12652 +@@ -338,6 +340,7 @@ int paste_selection(struct tty_struct *tty)
12653 + unsigned int count;
12654 + struct tty_ldisc *ld;
12655 + DECLARE_WAITQUEUE(wait, current);
12656 ++ int ret = 0;
12657 +
12658 + console_lock();
12659 + poke_blanked_console();
12660 +@@ -351,6 +354,10 @@ int paste_selection(struct tty_struct *tty)
12661 + add_wait_queue(&vc->paste_wait, &wait);
12662 + while (sel_buffer && sel_buffer_lth > pasted) {
12663 + set_current_state(TASK_INTERRUPTIBLE);
12664 ++ if (signal_pending(current)) {
12665 ++ ret = -EINTR;
12666 ++ break;
12667 ++ }
12668 + if (tty_throttled(tty)) {
12669 + schedule();
12670 + continue;
12671 +@@ -366,5 +373,5 @@ int paste_selection(struct tty_struct *tty)
12672 +
12673 + tty_buffer_unlock_exclusive(&vc->port);
12674 + tty_ldisc_deref(ld);
12675 +- return 0;
12676 ++ return ret;
12677 + }
12678 +diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
12679 +index 7b34b0ddbf0e..c320fefab360 100644
12680 +--- a/drivers/tty/vt/vt_ioctl.c
12681 ++++ b/drivers/tty/vt/vt_ioctl.c
12682 +@@ -847,58 +847,49 @@ int vt_ioctl(struct tty_struct *tty,
12683 +
12684 + case VT_RESIZEX:
12685 + {
12686 +- struct vt_consize __user *vtconsize = up;
12687 +- ushort ll,cc,vlin,clin,vcol,ccol;
12688 ++ struct vt_consize v;
12689 + if (!perm)
12690 + return -EPERM;
12691 +- if (!access_ok(VERIFY_READ, vtconsize,
12692 +- sizeof(struct vt_consize))) {
12693 +- ret = -EFAULT;
12694 +- break;
12695 +- }
12696 ++ if (copy_from_user(&v, up, sizeof(struct vt_consize)))
12697 ++ return -EFAULT;
12698 + /* FIXME: Should check the copies properly */
12699 +- __get_user(ll, &vtconsize->v_rows);
12700 +- __get_user(cc, &vtconsize->v_cols);
12701 +- __get_user(vlin, &vtconsize->v_vlin);
12702 +- __get_user(clin, &vtconsize->v_clin);
12703 +- __get_user(vcol, &vtconsize->v_vcol);
12704 +- __get_user(ccol, &vtconsize->v_ccol);
12705 +- vlin = vlin ? vlin : vc->vc_scan_lines;
12706 +- if (clin) {
12707 +- if (ll) {
12708 +- if (ll != vlin/clin) {
12709 +- /* Parameters don't add up */
12710 +- ret = -EINVAL;
12711 +- break;
12712 +- }
12713 +- } else
12714 +- ll = vlin/clin;
12715 ++ if (!v.v_vlin)
12716 ++ v.v_vlin = vc->vc_scan_lines;
12717 ++ if (v.v_clin) {
12718 ++ int rows = v.v_vlin/v.v_clin;
12719 ++ if (v.v_rows != rows) {
12720 ++ if (v.v_rows) /* Parameters don't add up */
12721 ++ return -EINVAL;
12722 ++ v.v_rows = rows;
12723 ++ }
12724 + }
12725 +- if (vcol && ccol) {
12726 +- if (cc) {
12727 +- if (cc != vcol/ccol) {
12728 +- ret = -EINVAL;
12729 +- break;
12730 +- }
12731 +- } else
12732 +- cc = vcol/ccol;
12733 ++ if (v.v_vcol && v.v_ccol) {
12734 ++ int cols = v.v_vcol/v.v_ccol;
12735 ++ if (v.v_cols != cols) {
12736 ++ if (v.v_cols)
12737 ++ return -EINVAL;
12738 ++ v.v_cols = cols;
12739 ++ }
12740 + }
12741 +
12742 +- if (clin > 32) {
12743 +- ret = -EINVAL;
12744 +- break;
12745 +- }
12746 +-
12747 ++ if (v.v_clin > 32)
12748 ++ return -EINVAL;
12749 ++
12750 + for (i = 0; i < MAX_NR_CONSOLES; i++) {
12751 ++ struct vc_data *vcp;
12752 ++
12753 + if (!vc_cons[i].d)
12754 + continue;
12755 + console_lock();
12756 +- if (vlin)
12757 +- vc_cons[i].d->vc_scan_lines = vlin;
12758 +- if (clin)
12759 +- vc_cons[i].d->vc_font.height = clin;
12760 +- vc_cons[i].d->vc_resize_user = 1;
12761 +- vc_resize(vc_cons[i].d, cc, ll);
12762 ++ vcp = vc_cons[i].d;
12763 ++ if (vcp) {
12764 ++ if (v.v_vlin)
12765 ++ vcp->vc_scan_lines = v.v_vlin;
12766 ++ if (v.v_clin)
12767 ++ vcp->vc_font.height = v.v_clin;
12768 ++ vcp->vc_resize_user = 1;
12769 ++ vc_resize(vcp, v.v_cols, v.v_rows);
12770 ++ }
12771 + console_unlock();
12772 + }
12773 + break;
12774 +diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c
12775 +index e1134a4d97f3..a00b4aee6c79 100644
12776 +--- a/drivers/uio/uio_dmem_genirq.c
12777 ++++ b/drivers/uio/uio_dmem_genirq.c
12778 +@@ -135,11 +135,13 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on)
12779 + if (irq_on) {
12780 + if (test_and_clear_bit(0, &priv->flags))
12781 + enable_irq(dev_info->irq);
12782 ++ spin_unlock_irqrestore(&priv->lock, flags);
12783 + } else {
12784 +- if (!test_and_set_bit(0, &priv->flags))
12785 ++ if (!test_and_set_bit(0, &priv->flags)) {
12786 ++ spin_unlock_irqrestore(&priv->lock, flags);
12787 + disable_irq(dev_info->irq);
12788 ++ }
12789 + }
12790 +- spin_unlock_irqrestore(&priv->lock, flags);
12791 +
12792 + return 0;
12793 + }
12794 +diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
12795 +index 7d5ecf36a33c..ff1be6a6841b 100644
12796 +--- a/drivers/usb/core/hub.c
12797 ++++ b/drivers/usb/core/hub.c
12798 +@@ -36,7 +36,9 @@
12799 + #include "otg_whitelist.h"
12800 +
12801 + #define USB_VENDOR_GENESYS_LOGIC 0x05e3
12802 ++#define USB_VENDOR_SMSC 0x0424
12803 + #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND 0x01
12804 ++#define HUB_QUIRK_DISABLE_AUTOSUSPEND 0x02
12805 +
12806 + /* Protect struct usb_device->state and ->children members
12807 + * Note: Both are also protected by ->dev.sem, except that ->state can
12808 +@@ -1189,11 +1191,6 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
12809 + #ifdef CONFIG_PM
12810 + udev->reset_resume = 1;
12811 + #endif
12812 +- /* Don't set the change_bits when the device
12813 +- * was powered off.
12814 +- */
12815 +- if (test_bit(port1, hub->power_bits))
12816 +- set_bit(port1, hub->change_bits);
12817 +
12818 + } else {
12819 + /* The power session is gone; tell hub_wq */
12820 +@@ -1685,6 +1682,10 @@ static void hub_disconnect(struct usb_interface *intf)
12821 + kfree(hub->buffer);
12822 +
12823 + pm_suspend_ignore_children(&intf->dev, false);
12824 ++
12825 ++ if (hub->quirk_disable_autosuspend)
12826 ++ usb_autopm_put_interface(intf);
12827 ++
12828 + kref_put(&hub->kref, hub_release);
12829 + }
12830 +
12831 +@@ -1815,6 +1816,11 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id)
12832 + if (id->driver_info & HUB_QUIRK_CHECK_PORT_AUTOSUSPEND)
12833 + hub->quirk_check_port_auto_suspend = 1;
12834 +
12835 ++ if (id->driver_info & HUB_QUIRK_DISABLE_AUTOSUSPEND) {
12836 ++ hub->quirk_disable_autosuspend = 1;
12837 ++ usb_autopm_get_interface(intf);
12838 ++ }
12839 ++
12840 + if (hub_configure(hub, &desc->endpoint[0].desc) >= 0)
12841 + return 0;
12842 +
12843 +@@ -5293,6 +5299,10 @@ out_hdev_lock:
12844 + }
12845 +
12846 + static const struct usb_device_id hub_id_table[] = {
12847 ++ { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS,
12848 ++ .idVendor = USB_VENDOR_SMSC,
12849 ++ .bInterfaceClass = USB_CLASS_HUB,
12850 ++ .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
12851 + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
12852 + | USB_DEVICE_ID_MATCH_INT_CLASS,
12853 + .idVendor = USB_VENDOR_GENESYS_LOGIC,
12854 +diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
12855 +index 34c1a7e22aae..657bacfbe3a7 100644
12856 +--- a/drivers/usb/core/hub.h
12857 ++++ b/drivers/usb/core/hub.h
12858 +@@ -69,6 +69,7 @@ struct usb_hub {
12859 + unsigned quiescing:1;
12860 + unsigned disconnected:1;
12861 + unsigned in_reset:1;
12862 ++ unsigned quirk_disable_autosuspend:1;
12863 +
12864 + unsigned quirk_check_port_auto_suspend:1;
12865 +
12866 +diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
12867 +index 19e819aa2419..ad8307140df8 100644
12868 +--- a/drivers/usb/core/quirks.c
12869 ++++ b/drivers/usb/core/quirks.c
12870 +@@ -291,6 +291,9 @@ static const struct usb_device_id usb_quirk_list[] = {
12871 + /* INTEL VALUE SSD */
12872 + { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
12873 +
12874 ++ /* novation SoundControl XL */
12875 ++ { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME },
12876 ++
12877 + { } /* terminating entry must be last */
12878 + };
12879 +
12880 +diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
12881 +index 4af9a1c652ed..aeb6f7c84ea0 100644
12882 +--- a/drivers/usb/dwc2/gadget.c
12883 ++++ b/drivers/usb/dwc2/gadget.c
12884 +@@ -3933,11 +3933,12 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep,
12885 + * a unique tx-fifo even if it is non-periodic.
12886 + */
12887 + if (dir_in && hsotg->dedicated_fifos) {
12888 ++ unsigned fifo_count = dwc2_hsotg_tx_fifo_count(hsotg);
12889 + u32 fifo_index = 0;
12890 + u32 fifo_size = UINT_MAX;
12891 +
12892 + size = hs_ep->ep.maxpacket * hs_ep->mc;
12893 +- for (i = 1; i < hsotg->num_of_eps; ++i) {
12894 ++ for (i = 1; i <= fifo_count; ++i) {
12895 + if (hsotg->fifo_map & (1 << i))
12896 + continue;
12897 + val = dwc2_readl(hsotg->regs + DPTXFSIZN(i));
12898 +diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
12899 +index b29cd3979391..6e30b177aa22 100644
12900 +--- a/drivers/usb/gadget/composite.c
12901 ++++ b/drivers/usb/gadget/composite.c
12902 +@@ -440,12 +440,10 @@ static u8 encode_bMaxPower(enum usb_device_speed speed,
12903 + val = CONFIG_USB_GADGET_VBUS_DRAW;
12904 + if (!val)
12905 + return 0;
12906 +- switch (speed) {
12907 +- case USB_SPEED_SUPER:
12908 +- return DIV_ROUND_UP(val, 8);
12909 +- default:
12910 ++ if (speed < USB_SPEED_SUPER)
12911 + return DIV_ROUND_UP(val, 2);
12912 +- }
12913 ++ else
12914 ++ return DIV_ROUND_UP(val, 8);
12915 + }
12916 +
12917 + static int config_buf(struct usb_configuration *config,
12918 +diff --git a/drivers/usb/gadget/udc/gr_udc.c b/drivers/usb/gadget/udc/gr_udc.c
12919 +index 1f9941145746..feb73a1c42ef 100644
12920 +--- a/drivers/usb/gadget/udc/gr_udc.c
12921 ++++ b/drivers/usb/gadget/udc/gr_udc.c
12922 +@@ -2200,8 +2200,6 @@ static int gr_probe(struct platform_device *pdev)
12923 + return -ENOMEM;
12924 + }
12925 +
12926 +- spin_lock(&dev->lock);
12927 +-
12928 + /* Inside lock so that no gadget can use this udc until probe is done */
12929 + retval = usb_add_gadget_udc(dev->dev, &dev->gadget);
12930 + if (retval) {
12931 +@@ -2210,15 +2208,21 @@ static int gr_probe(struct platform_device *pdev)
12932 + }
12933 + dev->added = 1;
12934 +
12935 ++ spin_lock(&dev->lock);
12936 ++
12937 + retval = gr_udc_init(dev);
12938 +- if (retval)
12939 ++ if (retval) {
12940 ++ spin_unlock(&dev->lock);
12941 + goto out;
12942 +-
12943 +- gr_dfs_create(dev);
12944 ++ }
12945 +
12946 + /* Clear all interrupt enables that might be left on since last boot */
12947 + gr_disable_interrupts_and_pullup(dev);
12948 +
12949 ++ spin_unlock(&dev->lock);
12950 ++
12951 ++ gr_dfs_create(dev);
12952 ++
12953 + retval = gr_request_irq(dev, dev->irq);
12954 + if (retval) {
12955 + dev_err(dev->dev, "Failed to request irq %d\n", dev->irq);
12956 +@@ -2247,8 +2251,6 @@ static int gr_probe(struct platform_device *pdev)
12957 + dev_info(dev->dev, "regs: %p, irq %d\n", dev->regs, dev->irq);
12958 +
12959 + out:
12960 +- spin_unlock(&dev->lock);
12961 +-
12962 + if (retval)
12963 + gr_remove(pdev);
12964 +
12965 +diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
12966 +index a80a57decda1..70452c881e56 100644
12967 +--- a/drivers/usb/host/xhci-mem.c
12968 ++++ b/drivers/usb/host/xhci-mem.c
12969 +@@ -1479,9 +1479,15 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
12970 + /* Allow 3 retries for everything but isoc, set CErr = 3 */
12971 + if (!usb_endpoint_xfer_isoc(&ep->desc))
12972 + err_count = 3;
12973 +- /* Some devices get this wrong */
12974 +- if (usb_endpoint_xfer_bulk(&ep->desc) && udev->speed == USB_SPEED_HIGH)
12975 +- max_packet = 512;
12976 ++ /* HS bulk max packet should be 512, FS bulk supports 8, 16, 32 or 64 */
12977 ++ if (usb_endpoint_xfer_bulk(&ep->desc)) {
12978 ++ if (udev->speed == USB_SPEED_HIGH)
12979 ++ max_packet = 512;
12980 ++ if (udev->speed == USB_SPEED_FULL) {
12981 ++ max_packet = rounddown_pow_of_two(max_packet);
12982 ++ max_packet = clamp_val(max_packet, 8, 64);
12983 ++ }
12984 ++ }
12985 + /* xHCI 1.0 and 1.1 indicates that ctrl ep avg TRB Length should be 8 */
12986 + if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version >= 0x100)
12987 + avg_trb_len = 8;
12988 +diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
12989 +index 09f228279c01..908496ed3254 100644
12990 +--- a/drivers/usb/host/xhci-pci.c
12991 ++++ b/drivers/usb/host/xhci-pci.c
12992 +@@ -53,6 +53,7 @@
12993 + #define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI 0x1aa8
12994 + #define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8
12995 + #define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0
12996 ++#define PCI_DEVICE_ID_INTEL_CML_XHCI 0xa3af
12997 +
12998 + #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9
12999 + #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba
13000 +@@ -191,7 +192,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
13001 + pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI ||
13002 + pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI ||
13003 + pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI ||
13004 +- pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) {
13005 ++ pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI ||
13006 ++ pdev->device == PCI_DEVICE_ID_INTEL_CML_XHCI)) {
13007 + xhci->quirks |= XHCI_PME_STUCK_QUIRK;
13008 + }
13009 + if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
13010 +@@ -284,6 +286,9 @@ static int xhci_pci_setup(struct usb_hcd *hcd)
13011 + if (!usb_hcd_is_primary_hcd(hcd))
13012 + return 0;
13013 +
13014 ++ if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
13015 ++ xhci_pme_acpi_rtd3_enable(pdev);
13016 ++
13017 + xhci_dbg(xhci, "Got SBRN %u\n", (unsigned int) xhci->sbrn);
13018 +
13019 + /* Find any debug ports */
13020 +@@ -344,9 +349,6 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
13021 + HCC_MAX_PSA(xhci->hcc_params) >= 4)
13022 + xhci->shared_hcd->can_do_streams = 1;
13023 +
13024 +- if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
13025 +- xhci_pme_acpi_rtd3_enable(dev);
13026 +-
13027 + /* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */
13028 + pm_runtime_put_noidle(&dev->dev);
13029 +
13030 +diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
13031 +index 61fa3007a74a..868878f5b72b 100644
13032 +--- a/drivers/usb/host/xhci-ring.c
13033 ++++ b/drivers/usb/host/xhci-ring.c
13034 +@@ -2758,6 +2758,42 @@ static int xhci_handle_event(struct xhci_hcd *xhci)
13035 + return 1;
13036 + }
13037 +
13038 ++/*
13039 ++ * Update Event Ring Dequeue Pointer:
13040 ++ * - When all events have finished
13041 ++ * - To avoid "Event Ring Full Error" condition
13042 ++ */
13043 ++static void xhci_update_erst_dequeue(struct xhci_hcd *xhci,
13044 ++ union xhci_trb *event_ring_deq)
13045 ++{
13046 ++ u64 temp_64;
13047 ++ dma_addr_t deq;
13048 ++
13049 ++ temp_64 = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue);
13050 ++ /* If necessary, update the HW's version of the event ring deq ptr. */
13051 ++ if (event_ring_deq != xhci->event_ring->dequeue) {
13052 ++ deq = xhci_trb_virt_to_dma(xhci->event_ring->deq_seg,
13053 ++ xhci->event_ring->dequeue);
13054 ++ if (deq == 0)
13055 ++ xhci_warn(xhci, "WARN something wrong with SW event ring dequeue ptr\n");
13056 ++ /*
13057 ++ * Per 4.9.4, Software writes to the ERDP register shall
13058 ++ * always advance the Event Ring Dequeue Pointer value.
13059 ++ */
13060 ++ if ((temp_64 & (u64) ~ERST_PTR_MASK) ==
13061 ++ ((u64) deq & (u64) ~ERST_PTR_MASK))
13062 ++ return;
13063 ++
13064 ++ /* Update HC event ring dequeue pointer */
13065 ++ temp_64 &= ERST_PTR_MASK;
13066 ++ temp_64 |= ((u64) deq & (u64) ~ERST_PTR_MASK);
13067 ++ }
13068 ++
13069 ++ /* Clear the event handler busy flag (RW1C) */
13070 ++ temp_64 |= ERST_EHB;
13071 ++ xhci_write_64(xhci, temp_64, &xhci->ir_set->erst_dequeue);
13072 ++}
13073 ++
13074 + /*
13075 + * xHCI spec says we can get an interrupt, and if the HC has an error condition,
13076 + * we might get bad data out of the event ring. Section 4.10.2.7 has a list of
13077 +@@ -2769,9 +2805,9 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
13078 + union xhci_trb *event_ring_deq;
13079 + irqreturn_t ret = IRQ_NONE;
13080 + unsigned long flags;
13081 +- dma_addr_t deq;
13082 + u64 temp_64;
13083 + u32 status;
13084 ++ int event_loop = 0;
13085 +
13086 + spin_lock_irqsave(&xhci->lock, flags);
13087 + /* Check if the xHC generated the interrupt, or the irq is shared */
13088 +@@ -2825,24 +2861,14 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
13089 + /* FIXME this should be a delayed service routine
13090 + * that clears the EHB.
13091 + */
13092 +- while (xhci_handle_event(xhci) > 0) {}
13093 +-
13094 +- temp_64 = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue);
13095 +- /* If necessary, update the HW's version of the event ring deq ptr. */
13096 +- if (event_ring_deq != xhci->event_ring->dequeue) {
13097 +- deq = xhci_trb_virt_to_dma(xhci->event_ring->deq_seg,
13098 +- xhci->event_ring->dequeue);
13099 +- if (deq == 0)
13100 +- xhci_warn(xhci, "WARN something wrong with SW event "
13101 +- "ring dequeue ptr.\n");
13102 +- /* Update HC event ring dequeue pointer */
13103 +- temp_64 &= ERST_PTR_MASK;
13104 +- temp_64 |= ((u64) deq & (u64) ~ERST_PTR_MASK);
13105 ++ while (xhci_handle_event(xhci) > 0) {
13106 ++ if (event_loop++ < TRBS_PER_SEGMENT / 2)
13107 ++ continue;
13108 ++ xhci_update_erst_dequeue(xhci, event_ring_deq);
13109 ++ event_loop = 0;
13110 + }
13111 +
13112 +- /* Clear the event handler busy flag (RW1C); event ring is empty. */
13113 +- temp_64 |= ERST_EHB;
13114 +- xhci_write_64(xhci, temp_64, &xhci->ir_set->erst_dequeue);
13115 ++ xhci_update_erst_dequeue(xhci, event_ring_deq);
13116 + ret = IRQ_HANDLED;
13117 +
13118 + out:
13119 +diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
13120 +index 7f226cc3ef8a..1ec32e5aa004 100644
13121 +--- a/drivers/usb/misc/iowarrior.c
13122 ++++ b/drivers/usb/misc/iowarrior.c
13123 +@@ -32,6 +32,14 @@
13124 + #define USB_DEVICE_ID_CODEMERCS_IOWPV2 0x1512
13125 + /* full speed iowarrior */
13126 + #define USB_DEVICE_ID_CODEMERCS_IOW56 0x1503
13127 ++/* fuller speed iowarrior */
13128 ++#define USB_DEVICE_ID_CODEMERCS_IOW28 0x1504
13129 ++#define USB_DEVICE_ID_CODEMERCS_IOW28L 0x1505
13130 ++#define USB_DEVICE_ID_CODEMERCS_IOW100 0x1506
13131 ++
13132 ++/* OEMed devices */
13133 ++#define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a
13134 ++#define USB_DEVICE_ID_CODEMERCS_IOW56AM 0x158b
13135 +
13136 + /* Get a minor range for your devices from the usb maintainer */
13137 + #ifdef CONFIG_USB_DYNAMIC_MINORS
13138 +@@ -137,6 +145,11 @@ static const struct usb_device_id iowarrior_ids[] = {
13139 + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV1)},
13140 + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV2)},
13141 + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56)},
13142 ++ {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24SAG)},
13143 ++ {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)},
13144 ++ {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28)},
13145 ++ {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28L)},
13146 ++ {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW100)},
13147 + {} /* Terminating entry */
13148 + };
13149 + MODULE_DEVICE_TABLE(usb, iowarrior_ids);
13150 +@@ -364,6 +377,7 @@ static ssize_t iowarrior_write(struct file *file,
13151 + }
13152 + switch (dev->product_id) {
13153 + case USB_DEVICE_ID_CODEMERCS_IOW24:
13154 ++ case USB_DEVICE_ID_CODEMERCS_IOW24SAG:
13155 + case USB_DEVICE_ID_CODEMERCS_IOWPV1:
13156 + case USB_DEVICE_ID_CODEMERCS_IOWPV2:
13157 + case USB_DEVICE_ID_CODEMERCS_IOW40:
13158 +@@ -378,6 +392,10 @@ static ssize_t iowarrior_write(struct file *file,
13159 + goto exit;
13160 + break;
13161 + case USB_DEVICE_ID_CODEMERCS_IOW56:
13162 ++ case USB_DEVICE_ID_CODEMERCS_IOW56AM:
13163 ++ case USB_DEVICE_ID_CODEMERCS_IOW28:
13164 ++ case USB_DEVICE_ID_CODEMERCS_IOW28L:
13165 ++ case USB_DEVICE_ID_CODEMERCS_IOW100:
13166 + /* The IOW56 uses asynchronous IO and more urbs */
13167 + if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) {
13168 + /* Wait until we are below the limit for submitted urbs */
13169 +@@ -502,6 +520,7 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd,
13170 + switch (cmd) {
13171 + case IOW_WRITE:
13172 + if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24 ||
13173 ++ dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24SAG ||
13174 + dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV1 ||
13175 + dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV2 ||
13176 + dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW40) {
13177 +@@ -786,7 +805,11 @@ static int iowarrior_probe(struct usb_interface *interface,
13178 + goto error;
13179 + }
13180 +
13181 +- if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) {
13182 ++ if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) ||
13183 ++ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) ||
13184 ++ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) ||
13185 ++ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) ||
13186 ++ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100)) {
13187 + res = usb_find_last_int_out_endpoint(iface_desc,
13188 + &dev->int_out_endpoint);
13189 + if (res) {
13190 +@@ -799,7 +822,11 @@ static int iowarrior_probe(struct usb_interface *interface,
13191 + /* we have to check the report_size often, so remember it in the endianness suitable for our machine */
13192 + dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint);
13193 + if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) &&
13194 +- (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56))
13195 ++ ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) ||
13196 ++ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) ||
13197 ++ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) ||
13198 ++ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) ||
13199 ++ (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100)))
13200 + /* IOWarrior56 has wMaxPacketSize different from report size */
13201 + dev->report_size = 7;
13202 +
13203 +diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
13204 +index 456f3e6ecf03..26e69c2766f5 100644
13205 +--- a/drivers/usb/musb/omap2430.c
13206 ++++ b/drivers/usb/musb/omap2430.c
13207 +@@ -388,8 +388,6 @@ static const struct musb_platform_ops omap2430_ops = {
13208 + .init = omap2430_musb_init,
13209 + .exit = omap2430_musb_exit,
13210 +
13211 +- .set_vbus = omap2430_musb_set_vbus,
13212 +-
13213 + .enable = omap2430_musb_enable,
13214 + .disable = omap2430_musb_disable,
13215 +
13216 +diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
13217 +index 8391a88cf90f..9d97543449e6 100644
13218 +--- a/drivers/usb/storage/uas.c
13219 ++++ b/drivers/usb/storage/uas.c
13220 +@@ -46,6 +46,7 @@ struct uas_dev_info {
13221 + struct scsi_cmnd *cmnd[MAX_CMNDS];
13222 + spinlock_t lock;
13223 + struct work_struct work;
13224 ++ struct work_struct scan_work; /* for async scanning */
13225 + };
13226 +
13227 + enum {
13228 +@@ -115,6 +116,17 @@ out:
13229 + spin_unlock_irqrestore(&devinfo->lock, flags);
13230 + }
13231 +
13232 ++static void uas_scan_work(struct work_struct *work)
13233 ++{
13234 ++ struct uas_dev_info *devinfo =
13235 ++ container_of(work, struct uas_dev_info, scan_work);
13236 ++ struct Scsi_Host *shost = usb_get_intfdata(devinfo->intf);
13237 ++
13238 ++ dev_dbg(&devinfo->intf->dev, "starting scan\n");
13239 ++ scsi_scan_host(shost);
13240 ++ dev_dbg(&devinfo->intf->dev, "scan complete\n");
13241 ++}
13242 ++
13243 + static void uas_add_work(struct uas_cmd_info *cmdinfo)
13244 + {
13245 + struct scsi_pointer *scp = (void *)cmdinfo;
13246 +@@ -989,6 +1001,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)
13247 + init_usb_anchor(&devinfo->data_urbs);
13248 + spin_lock_init(&devinfo->lock);
13249 + INIT_WORK(&devinfo->work, uas_do_work);
13250 ++ INIT_WORK(&devinfo->scan_work, uas_scan_work);
13251 +
13252 + result = uas_configure_endpoints(devinfo);
13253 + if (result)
13254 +@@ -1005,7 +1018,9 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)
13255 + if (result)
13256 + goto free_streams;
13257 +
13258 +- scsi_scan_host(shost);
13259 ++ /* Submit the delayed_work for SCSI-device scanning */
13260 ++ schedule_work(&devinfo->scan_work);
13261 ++
13262 + return result;
13263 +
13264 + free_streams:
13265 +@@ -1173,6 +1188,12 @@ static void uas_disconnect(struct usb_interface *intf)
13266 + usb_kill_anchored_urbs(&devinfo->data_urbs);
13267 + uas_zap_pending(devinfo, DID_NO_CONNECT);
13268 +
13269 ++ /*
13270 ++ * Prevent SCSI scanning (if it hasn't started yet)
13271 ++ * or wait for the SCSI-scanning routine to stop.
13272 ++ */
13273 ++ cancel_work_sync(&devinfo->scan_work);
13274 ++
13275 + scsi_remove_host(shost);
13276 + uas_free_streams(devinfo);
13277 + scsi_host_put(shost);
13278 +diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c
13279 +index d059d04c63ac..20195d3dbf08 100644
13280 +--- a/drivers/video/fbdev/pxa168fb.c
13281 ++++ b/drivers/video/fbdev/pxa168fb.c
13282 +@@ -769,8 +769,8 @@ failed_free_cmap:
13283 + failed_free_clk:
13284 + clk_disable_unprepare(fbi->clk);
13285 + failed_free_fbmem:
13286 +- dma_free_coherent(fbi->dev, info->fix.smem_len,
13287 +- info->screen_base, fbi->fb_start_dma);
13288 ++ dma_free_wc(fbi->dev, info->fix.smem_len,
13289 ++ info->screen_base, fbi->fb_start_dma);
13290 + failed_free_info:
13291 + kfree(info);
13292 +
13293 +@@ -804,7 +804,7 @@ static int pxa168fb_remove(struct platform_device *pdev)
13294 +
13295 + irq = platform_get_irq(pdev, 0);
13296 +
13297 +- dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
13298 ++ dma_free_wc(fbi->dev, info->fix.smem_len,
13299 + info->screen_base, info->fix.smem_start);
13300 +
13301 + clk_disable_unprepare(fbi->clk);
13302 +diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
13303 +index 499531608fa2..71970773aad1 100644
13304 +--- a/drivers/virtio/virtio_balloon.c
13305 ++++ b/drivers/virtio/virtio_balloon.c
13306 +@@ -132,6 +132,8 @@ static void set_page_pfns(struct virtio_balloon *vb,
13307 + {
13308 + unsigned int i;
13309 +
13310 ++ BUILD_BUG_ON(VIRTIO_BALLOON_PAGES_PER_PAGE > VIRTIO_BALLOON_ARRAY_PFNS_MAX);
13311 ++
13312 + /*
13313 + * Set balloon pfns pointing at this page.
13314 + * Note that the first pfn points at start of the page.
13315 +diff --git a/drivers/vme/bridges/vme_fake.c b/drivers/vme/bridges/vme_fake.c
13316 +index 30b3acc93833..e81ec763b555 100644
13317 +--- a/drivers/vme/bridges/vme_fake.c
13318 ++++ b/drivers/vme/bridges/vme_fake.c
13319 +@@ -418,8 +418,9 @@ static void fake_lm_check(struct fake_driver *bridge, unsigned long long addr,
13320 + }
13321 + }
13322 +
13323 +-static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr,
13324 +- u32 aspace, u32 cycle)
13325 ++static noinline_for_stack u8 fake_vmeread8(struct fake_driver *bridge,
13326 ++ unsigned long long addr,
13327 ++ u32 aspace, u32 cycle)
13328 + {
13329 + u8 retval = 0xff;
13330 + int i;
13331 +@@ -450,8 +451,9 @@ static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr,
13332 + return retval;
13333 + }
13334 +
13335 +-static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr,
13336 +- u32 aspace, u32 cycle)
13337 ++static noinline_for_stack u16 fake_vmeread16(struct fake_driver *bridge,
13338 ++ unsigned long long addr,
13339 ++ u32 aspace, u32 cycle)
13340 + {
13341 + u16 retval = 0xffff;
13342 + int i;
13343 +@@ -482,8 +484,9 @@ static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr,
13344 + return retval;
13345 + }
13346 +
13347 +-static u32 fake_vmeread32(struct fake_driver *bridge, unsigned long long addr,
13348 +- u32 aspace, u32 cycle)
13349 ++static noinline_for_stack u32 fake_vmeread32(struct fake_driver *bridge,
13350 ++ unsigned long long addr,
13351 ++ u32 aspace, u32 cycle)
13352 + {
13353 + u32 retval = 0xffffffff;
13354 + int i;
13355 +@@ -613,8 +616,9 @@ out:
13356 + return retval;
13357 + }
13358 +
13359 +-static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf,
13360 +- unsigned long long addr, u32 aspace, u32 cycle)
13361 ++static noinline_for_stack void fake_vmewrite8(struct fake_driver *bridge,
13362 ++ u8 *buf, unsigned long long addr,
13363 ++ u32 aspace, u32 cycle)
13364 + {
13365 + int i;
13366 + unsigned long long start, end, offset;
13367 +@@ -643,8 +647,9 @@ static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf,
13368 +
13369 + }
13370 +
13371 +-static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf,
13372 +- unsigned long long addr, u32 aspace, u32 cycle)
13373 ++static noinline_for_stack void fake_vmewrite16(struct fake_driver *bridge,
13374 ++ u16 *buf, unsigned long long addr,
13375 ++ u32 aspace, u32 cycle)
13376 + {
13377 + int i;
13378 + unsigned long long start, end, offset;
13379 +@@ -673,8 +678,9 @@ static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf,
13380 +
13381 + }
13382 +
13383 +-static void fake_vmewrite32(struct fake_driver *bridge, u32 *buf,
13384 +- unsigned long long addr, u32 aspace, u32 cycle)
13385 ++static noinline_for_stack void fake_vmewrite32(struct fake_driver *bridge,
13386 ++ u32 *buf, unsigned long long addr,
13387 ++ u32 aspace, u32 cycle)
13388 + {
13389 + int i;
13390 + unsigned long long start, end, offset;
13391 +diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c
13392 +index 08cb419eb4e6..5f6b77ea34fb 100644
13393 +--- a/drivers/xen/preempt.c
13394 ++++ b/drivers/xen/preempt.c
13395 +@@ -37,7 +37,9 @@ asmlinkage __visible void xen_maybe_preempt_hcall(void)
13396 + * cpu.
13397 + */
13398 + __this_cpu_write(xen_in_preemptible_hcall, false);
13399 +- _cond_resched();
13400 ++ local_irq_enable();
13401 ++ cond_resched();
13402 ++ local_irq_disable();
13403 + __this_cpu_write(xen_in_preemptible_hcall, true);
13404 + }
13405 + }
13406 +diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
13407 +index 7d5a9b51f0d7..4be07cf31d74 100644
13408 +--- a/fs/btrfs/check-integrity.c
13409 ++++ b/fs/btrfs/check-integrity.c
13410 +@@ -642,7 +642,6 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev,
13411 + static int btrfsic_process_superblock(struct btrfsic_state *state,
13412 + struct btrfs_fs_devices *fs_devices)
13413 + {
13414 +- struct btrfs_fs_info *fs_info = state->fs_info;
13415 + struct btrfs_super_block *selected_super;
13416 + struct list_head *dev_head = &fs_devices->devices;
13417 + struct btrfs_device *device;
13418 +@@ -713,7 +712,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
13419 + break;
13420 + }
13421 +
13422 +- num_copies = btrfs_num_copies(fs_info, next_bytenr,
13423 ++ num_copies = btrfs_num_copies(state->fs_info, next_bytenr,
13424 + state->metablock_size);
13425 + if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
13426 + pr_info("num_copies(log_bytenr=%llu) = %d\n",
13427 +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
13428 +index 44b15617c7b9..6b4fee5c79f9 100644
13429 +--- a/fs/btrfs/disk-io.c
13430 ++++ b/fs/btrfs/disk-io.c
13431 +@@ -2913,6 +2913,7 @@ retry_root_backup:
13432 + /* do not make disk changes in broken FS or nologreplay is given */
13433 + if (btrfs_super_log_root(disk_super) != 0 &&
13434 + !btrfs_test_opt(fs_info, NOLOGREPLAY)) {
13435 ++ btrfs_info(fs_info, "start tree-log replay");
13436 + ret = btrfs_replay_log(fs_info, fs_devices);
13437 + if (ret) {
13438 + err = ret;
13439 +@@ -4393,7 +4394,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
13440 + wake_up(&fs_info->transaction_wait);
13441 +
13442 + btrfs_destroy_delayed_inodes(fs_info);
13443 +- btrfs_assert_delayed_root_empty(fs_info);
13444 +
13445 + btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages,
13446 + EXTENT_DIRTY);
13447 +diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
13448 +index 2e348fb0b280..c87d673ce334 100644
13449 +--- a/fs/btrfs/extent_map.c
13450 ++++ b/fs/btrfs/extent_map.c
13451 +@@ -228,6 +228,17 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
13452 + struct extent_map *merge = NULL;
13453 + struct rb_node *rb;
13454 +
13455 ++ /*
13456 ++ * We can't modify an extent map that is in the tree and that is being
13457 ++ * used by another task, as it can cause that other task to see it in
13458 ++ * inconsistent state during the merging. We always have 1 reference for
13459 ++ * the tree and 1 for this task (which is unpinning the extent map or
13460 ++ * clearing the logging flag), so anything > 2 means it's being used by
13461 ++ * other tasks too.
13462 ++ */
13463 ++ if (refcount_read(&em->refs) > 2)
13464 ++ return;
13465 ++
13466 + if (em->start != 0) {
13467 + rb = rb_prev(&em->rb_node);
13468 + if (rb)
13469 +diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
13470 +index 702b3606ad0e..717d82d51bb1 100644
13471 +--- a/fs/btrfs/file-item.c
13472 ++++ b/fs/btrfs/file-item.c
13473 +@@ -288,7 +288,8 @@ found:
13474 + csum += count * csum_size;
13475 + nblocks -= count;
13476 + next:
13477 +- while (count--) {
13478 ++ while (count > 0) {
13479 ++ count--;
13480 + disk_bytenr += fs_info->sectorsize;
13481 + offset += fs_info->sectorsize;
13482 + page_bytes_left -= fs_info->sectorsize;
13483 +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
13484 +index abecc4724a3b..2a196bb134d9 100644
13485 +--- a/fs/btrfs/inode.c
13486 ++++ b/fs/btrfs/inode.c
13487 +@@ -10639,6 +10639,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
13488 + struct btrfs_root *root = BTRFS_I(inode)->root;
13489 + struct btrfs_key ins;
13490 + u64 cur_offset = start;
13491 ++ u64 clear_offset = start;
13492 + u64 i_size;
13493 + u64 cur_bytes;
13494 + u64 last_alloc = (u64)-1;
13495 +@@ -10673,6 +10674,15 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
13496 + btrfs_end_transaction(trans);
13497 + break;
13498 + }
13499 ++
13500 ++ /*
13501 ++ * We've reserved this space, and thus converted it from
13502 ++ * ->bytes_may_use to ->bytes_reserved. Any error that happens
13503 ++ * from here on out we will only need to clear our reservation
13504 ++ * for the remaining unreserved area, so advance our
13505 ++ * clear_offset by our extent size.
13506 ++ */
13507 ++ clear_offset += ins.offset;
13508 + btrfs_dec_block_group_reservations(fs_info, ins.objectid);
13509 +
13510 + last_alloc = ins.offset;
13511 +@@ -10753,9 +10763,9 @@ next:
13512 + if (own_trans)
13513 + btrfs_end_transaction(trans);
13514 + }
13515 +- if (cur_offset < end)
13516 +- btrfs_free_reserved_data_space(inode, NULL, cur_offset,
13517 +- end - cur_offset + 1);
13518 ++ if (clear_offset < end)
13519 ++ btrfs_free_reserved_data_space(inode, NULL, clear_offset,
13520 ++ end - clear_offset + 1);
13521 + return ret;
13522 + }
13523 +
13524 +diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
13525 +index a3aca495e33e..d2287ea9fc50 100644
13526 +--- a/fs/btrfs/ordered-data.c
13527 ++++ b/fs/btrfs/ordered-data.c
13528 +@@ -838,10 +838,15 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
13529 + }
13530 + btrfs_start_ordered_extent(inode, ordered, 1);
13531 + end = ordered->file_offset;
13532 ++ /*
13533 ++ * If the ordered extent had an error save the error but don't
13534 ++ * exit without waiting first for all other ordered extents in
13535 ++ * the range to complete.
13536 ++ */
13537 + if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
13538 + ret = -EIO;
13539 + btrfs_put_ordered_extent(ordered);
13540 +- if (ret || end == 0 || end == start)
13541 ++ if (end == 0 || end == start)
13542 + break;
13543 + end--;
13544 + }
13545 +diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
13546 +index 3ab79fa00dc7..17a8463ef35c 100644
13547 +--- a/fs/btrfs/super.c
13548 ++++ b/fs/btrfs/super.c
13549 +@@ -1801,6 +1801,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
13550 + }
13551 +
13552 + if (btrfs_super_log_root(fs_info->super_copy) != 0) {
13553 ++ btrfs_warn(fs_info,
13554 ++ "mount required to replay tree-log, cannot remount read-write");
13555 + ret = -EINVAL;
13556 + goto restore;
13557 + }
13558 +diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
13559 +index 358e930df4ac..6d34842912e8 100644
13560 +--- a/fs/btrfs/volumes.c
13561 ++++ b/fs/btrfs/volumes.c
13562 +@@ -7227,6 +7227,8 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
13563 + else
13564 + btrfs_dev_stat_reset(dev, i);
13565 + }
13566 ++ btrfs_info(fs_info, "device stats zeroed by %s (%d)",
13567 ++ current->comm, task_pid_nr(current));
13568 + } else {
13569 + for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
13570 + if (stats->nr_items > i)
13571 +diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
13572 +index b968334f841e..f36ddfea4997 100644
13573 +--- a/fs/ceph/mds_client.c
13574 ++++ b/fs/ceph/mds_client.c
13575 +@@ -2261,8 +2261,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
13576 + if (!(mdsc->fsc->mount_options->flags &
13577 + CEPH_MOUNT_OPT_MOUNTWAIT) &&
13578 + !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) {
13579 +- err = -ENOENT;
13580 +- pr_info("probably no mds server is up\n");
13581 ++ err = -EHOSTUNREACH;
13582 + goto finish;
13583 + }
13584 + }
13585 +diff --git a/fs/ceph/super.c b/fs/ceph/super.c
13586 +index 088c4488b449..6b10b20bfe32 100644
13587 +--- a/fs/ceph/super.c
13588 ++++ b/fs/ceph/super.c
13589 +@@ -1055,6 +1055,11 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
13590 + return res;
13591 +
13592 + out_splat:
13593 ++ if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
13594 ++ pr_info("No mds server is up or the cluster is laggy\n");
13595 ++ err = -EHOSTUNREACH;
13596 ++ }
13597 ++
13598 + ceph_mdsc_close_sessions(fsc->mdsc);
13599 + deactivate_locked_super(sb);
13600 + goto out_final;
13601 +diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
13602 +index f0b1279a7de6..6e5ecf70996a 100644
13603 +--- a/fs/cifs/connect.c
13604 ++++ b/fs/cifs/connect.c
13605 +@@ -3047,8 +3047,10 @@ match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data)
13606 + {
13607 + struct cifs_sb_info *old = CIFS_SB(sb);
13608 + struct cifs_sb_info *new = mnt_data->cifs_sb;
13609 +- bool old_set = old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH;
13610 +- bool new_set = new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH;
13611 ++ bool old_set = (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) &&
13612 ++ old->prepath;
13613 ++ bool new_set = (new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) &&
13614 ++ new->prepath;
13615 +
13616 + if (old_set && new_set && !strcmp(new->prepath, old->prepath))
13617 + return 1;
13618 +diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
13619 +index bd25ab837011..eed38ae86c6c 100644
13620 +--- a/fs/ecryptfs/crypto.c
13621 ++++ b/fs/ecryptfs/crypto.c
13622 +@@ -339,8 +339,10 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
13623 + struct extent_crypt_result ecr;
13624 + int rc = 0;
13625 +
13626 +- BUG_ON(!crypt_stat || !crypt_stat->tfm
13627 +- || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
13628 ++ if (!crypt_stat || !crypt_stat->tfm
13629 ++ || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
13630 ++ return -EINVAL;
13631 ++
13632 + if (unlikely(ecryptfs_verbosity > 0)) {
13633 + ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
13634 + crypt_stat->key_size);
13635 +diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
13636 +index fa218cd64f74..3f3ec50bf773 100644
13637 +--- a/fs/ecryptfs/keystore.c
13638 ++++ b/fs/ecryptfs/keystore.c
13639 +@@ -1285,7 +1285,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
13640 + printk(KERN_ERR "Enter w/ first byte != 0x%.2x\n",
13641 + ECRYPTFS_TAG_1_PACKET_TYPE);
13642 + rc = -EINVAL;
13643 +- goto out;
13644 ++ goto out_free;
13645 + }
13646 + /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
13647 + * at end of function upon failure */
13648 +diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
13649 +index 4f457d5c4933..26464f9d9b76 100644
13650 +--- a/fs/ecryptfs/messaging.c
13651 ++++ b/fs/ecryptfs/messaging.c
13652 +@@ -397,6 +397,7 @@ int __init ecryptfs_init_messaging(void)
13653 + * ecryptfs_message_buf_len),
13654 + GFP_KERNEL);
13655 + if (!ecryptfs_msg_ctx_arr) {
13656 ++ kfree(ecryptfs_daemon_hash);
13657 + rc = -ENOMEM;
13658 + printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
13659 + goto out;
13660 +diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
13661 +index c17855fead7b..90beca85c416 100644
13662 +--- a/fs/ext4/dir.c
13663 ++++ b/fs/ext4/dir.c
13664 +@@ -125,12 +125,14 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
13665 + if (err != ERR_BAD_DX_DIR) {
13666 + return err;
13667 + }
13668 +- /*
13669 +- * We don't set the inode dirty flag since it's not
13670 +- * critical that it get flushed back to the disk.
13671 +- */
13672 +- ext4_clear_inode_flag(file_inode(file),
13673 +- EXT4_INODE_INDEX);
13674 ++ /* Can we just clear INDEX flag to ignore htree information? */
13675 ++ if (!ext4_has_metadata_csum(sb)) {
13676 ++ /*
13677 ++ * We don't set the inode dirty flag since it's not
13678 ++ * critical that it gets flushed back to the disk.
13679 ++ */
13680 ++ ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
13681 ++ }
13682 + }
13683 +
13684 + if (ext4_has_inline_data(inode)) {
13685 +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
13686 +index 95ef26b39e69..b162f602c430 100644
13687 +--- a/fs/ext4/ext4.h
13688 ++++ b/fs/ext4/ext4.h
13689 +@@ -1532,8 +1532,11 @@ struct ext4_sb_info {
13690 + struct ratelimit_state s_warning_ratelimit_state;
13691 + struct ratelimit_state s_msg_ratelimit_state;
13692 +
13693 +- /* Barrier between changing inodes' journal flags and writepages ops. */
13694 +- struct percpu_rw_semaphore s_journal_flag_rwsem;
13695 ++ /*
13696 ++ * Barrier between writepages ops and changing any inode's JOURNAL_DATA
13697 ++ * or EXTENTS flag.
13698 ++ */
13699 ++ struct percpu_rw_semaphore s_writepages_rwsem;
13700 + struct dax_device *s_daxdev;
13701 + };
13702 +
13703 +@@ -2386,8 +2389,11 @@ void ext4_insert_dentry(struct inode *inode,
13704 + struct ext4_filename *fname);
13705 + static inline void ext4_update_dx_flag(struct inode *inode)
13706 + {
13707 +- if (!ext4_has_feature_dir_index(inode->i_sb))
13708 ++ if (!ext4_has_feature_dir_index(inode->i_sb)) {
13709 ++ /* ext4_iget() should have caught this... */
13710 ++ WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb));
13711 + ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
13712 ++ }
13713 + }
13714 + static const unsigned char ext4_filetype_table[] = {
13715 + DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
13716 +@@ -2864,7 +2870,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
13717 + !inode_is_locked(inode));
13718 + down_write(&EXT4_I(inode)->i_data_sem);
13719 + if (newsize > EXT4_I(inode)->i_disksize)
13720 +- EXT4_I(inode)->i_disksize = newsize;
13721 ++ WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize);
13722 + up_write(&EXT4_I(inode)->i_data_sem);
13723 + }
13724 +
13725 +diff --git a/fs/ext4/file.c b/fs/ext4/file.c
13726 +index 4ede0af9d6fe..acec134da57d 100644
13727 +--- a/fs/ext4/file.c
13728 ++++ b/fs/ext4/file.c
13729 +@@ -38,9 +38,10 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
13730 + struct inode *inode = file_inode(iocb->ki_filp);
13731 + ssize_t ret;
13732 +
13733 +- if (!inode_trylock_shared(inode)) {
13734 +- if (iocb->ki_flags & IOCB_NOWAIT)
13735 ++ if (iocb->ki_flags & IOCB_NOWAIT) {
13736 ++ if (!inode_trylock_shared(inode))
13737 + return -EAGAIN;
13738 ++ } else {
13739 + inode_lock_shared(inode);
13740 + }
13741 + /*
13742 +@@ -188,9 +189,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
13743 + struct inode *inode = file_inode(iocb->ki_filp);
13744 + ssize_t ret;
13745 +
13746 +- if (!inode_trylock(inode)) {
13747 +- if (iocb->ki_flags & IOCB_NOWAIT)
13748 ++ if (iocb->ki_flags & IOCB_NOWAIT) {
13749 ++ if (!inode_trylock(inode))
13750 + return -EAGAIN;
13751 ++ } else {
13752 + inode_lock(inode);
13753 + }
13754 + ret = ext4_write_checks(iocb, from);
13755 +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
13756 +index a91b8404d3dc..1e2edebd0929 100644
13757 +--- a/fs/ext4/inode.c
13758 ++++ b/fs/ext4/inode.c
13759 +@@ -2564,7 +2564,7 @@ update_disksize:
13760 + * truncate are avoided by checking i_size under i_data_sem.
13761 + */
13762 + disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT;
13763 +- if (disksize > EXT4_I(inode)->i_disksize) {
13764 ++ if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) {
13765 + int err2;
13766 + loff_t i_size;
13767 +
13768 +@@ -2744,7 +2744,7 @@ static int ext4_writepages(struct address_space *mapping,
13769 + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
13770 + return -EIO;
13771 +
13772 +- percpu_down_read(&sbi->s_journal_flag_rwsem);
13773 ++ percpu_down_read(&sbi->s_writepages_rwsem);
13774 + trace_ext4_writepages(inode, wbc);
13775 +
13776 + if (dax_mapping(mapping)) {
13777 +@@ -2974,7 +2974,7 @@ unplug:
13778 + out_writepages:
13779 + trace_ext4_writepages_result(inode, wbc, ret,
13780 + nr_to_write - wbc->nr_to_write);
13781 +- percpu_up_read(&sbi->s_journal_flag_rwsem);
13782 ++ percpu_up_read(&sbi->s_writepages_rwsem);
13783 + return ret;
13784 + }
13785 +
13786 +@@ -4817,6 +4817,18 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
13787 + ret = -EFSCORRUPTED;
13788 + goto bad_inode;
13789 + }
13790 ++ /*
13791 ++ * If dir_index is not enabled but there's dir with INDEX flag set,
13792 ++ * we'd normally treat htree data as empty space. But with metadata
13793 ++ * checksumming that corrupts checksums so forbid that.
13794 ++ */
13795 ++ if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) &&
13796 ++ ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) {
13797 ++ EXT4_ERROR_INODE(inode,
13798 ++ "iget: Dir with htree data on filesystem without dir_index feature.");
13799 ++ ret = -EFSCORRUPTED;
13800 ++ goto bad_inode;
13801 ++ }
13802 + ei->i_disksize = inode->i_size;
13803 + #ifdef CONFIG_QUOTA
13804 + ei->i_reserved_quota = 0;
13805 +@@ -6038,7 +6050,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
13806 + }
13807 + }
13808 +
13809 +- percpu_down_write(&sbi->s_journal_flag_rwsem);
13810 ++ percpu_down_write(&sbi->s_writepages_rwsem);
13811 + jbd2_journal_lock_updates(journal);
13812 +
13813 + /*
13814 +@@ -6055,7 +6067,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
13815 + err = jbd2_journal_flush(journal);
13816 + if (err < 0) {
13817 + jbd2_journal_unlock_updates(journal);
13818 +- percpu_up_write(&sbi->s_journal_flag_rwsem);
13819 ++ percpu_up_write(&sbi->s_writepages_rwsem);
13820 + ext4_inode_resume_unlocked_dio(inode);
13821 + return err;
13822 + }
13823 +@@ -6064,7 +6076,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
13824 + ext4_set_aops(inode);
13825 +
13826 + jbd2_journal_unlock_updates(journal);
13827 +- percpu_up_write(&sbi->s_journal_flag_rwsem);
13828 ++ percpu_up_write(&sbi->s_writepages_rwsem);
13829 +
13830 + if (val)
13831 + up_write(&EXT4_I(inode)->i_mmap_sem);
13832 +diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
13833 +index 78d45c7d3fa7..0d785868cc50 100644
13834 +--- a/fs/ext4/migrate.c
13835 ++++ b/fs/ext4/migrate.c
13836 +@@ -434,6 +434,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
13837 +
13838 + int ext4_ext_migrate(struct inode *inode)
13839 + {
13840 ++ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
13841 + handle_t *handle;
13842 + int retval = 0, i;
13843 + __le32 *i_data;
13844 +@@ -458,6 +459,8 @@ int ext4_ext_migrate(struct inode *inode)
13845 + */
13846 + return retval;
13847 +
13848 ++ percpu_down_write(&sbi->s_writepages_rwsem);
13849 ++
13850 + /*
13851 + * Worst case we can touch the allocation bitmaps, a bgd
13852 + * block, and a block to link in the orphan list. We do need
13853 +@@ -468,7 +471,7 @@ int ext4_ext_migrate(struct inode *inode)
13854 +
13855 + if (IS_ERR(handle)) {
13856 + retval = PTR_ERR(handle);
13857 +- return retval;
13858 ++ goto out_unlock;
13859 + }
13860 + goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
13861 + EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
13862 +@@ -479,7 +482,7 @@ int ext4_ext_migrate(struct inode *inode)
13863 + if (IS_ERR(tmp_inode)) {
13864 + retval = PTR_ERR(tmp_inode);
13865 + ext4_journal_stop(handle);
13866 +- return retval;
13867 ++ goto out_unlock;
13868 + }
13869 + i_size_write(tmp_inode, i_size_read(inode));
13870 + /*
13871 +@@ -521,7 +524,7 @@ int ext4_ext_migrate(struct inode *inode)
13872 + */
13873 + ext4_orphan_del(NULL, tmp_inode);
13874 + retval = PTR_ERR(handle);
13875 +- goto out;
13876 ++ goto out_tmp_inode;
13877 + }
13878 +
13879 + ei = EXT4_I(inode);
13880 +@@ -602,10 +605,11 @@ err_out:
13881 + /* Reset the extent details */
13882 + ext4_ext_tree_init(handle, tmp_inode);
13883 + ext4_journal_stop(handle);
13884 +-out:
13885 ++out_tmp_inode:
13886 + unlock_new_inode(tmp_inode);
13887 + iput(tmp_inode);
13888 +-
13889 ++out_unlock:
13890 ++ percpu_up_write(&sbi->s_writepages_rwsem);
13891 + return retval;
13892 + }
13893 +
13894 +@@ -615,7 +619,8 @@ out:
13895 + int ext4_ind_migrate(struct inode *inode)
13896 + {
13897 + struct ext4_extent_header *eh;
13898 +- struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
13899 ++ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
13900 ++ struct ext4_super_block *es = sbi->s_es;
13901 + struct ext4_inode_info *ei = EXT4_I(inode);
13902 + struct ext4_extent *ex;
13903 + unsigned int i, len;
13904 +@@ -639,9 +644,13 @@ int ext4_ind_migrate(struct inode *inode)
13905 + if (test_opt(inode->i_sb, DELALLOC))
13906 + ext4_alloc_da_blocks(inode);
13907 +
13908 ++ percpu_down_write(&sbi->s_writepages_rwsem);
13909 ++
13910 + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
13911 +- if (IS_ERR(handle))
13912 +- return PTR_ERR(handle);
13913 ++ if (IS_ERR(handle)) {
13914 ++ ret = PTR_ERR(handle);
13915 ++ goto out_unlock;
13916 ++ }
13917 +
13918 + down_write(&EXT4_I(inode)->i_data_sem);
13919 + ret = ext4_ext_check_inode(inode);
13920 +@@ -676,5 +685,7 @@ int ext4_ind_migrate(struct inode *inode)
13921 + errout:
13922 + ext4_journal_stop(handle);
13923 + up_write(&EXT4_I(inode)->i_data_sem);
13924 ++out_unlock:
13925 ++ percpu_up_write(&sbi->s_writepages_rwsem);
13926 + return ret;
13927 + }
13928 +diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
13929 +index 38e6a846aac1..0c042bd43246 100644
13930 +--- a/fs/ext4/mmp.c
13931 ++++ b/fs/ext4/mmp.c
13932 +@@ -120,10 +120,10 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
13933 + {
13934 + __ext4_warning(sb, function, line, "%s", msg);
13935 + __ext4_warning(sb, function, line,
13936 +- "MMP failure info: last update time: %llu, last update "
13937 +- "node: %s, last update device: %s",
13938 +- (long long unsigned int) le64_to_cpu(mmp->mmp_time),
13939 +- mmp->mmp_nodename, mmp->mmp_bdevname);
13940 ++ "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s",
13941 ++ (unsigned long long)le64_to_cpu(mmp->mmp_time),
13942 ++ (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename,
13943 ++ (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname);
13944 + }
13945 +
13946 + /*
13947 +@@ -154,6 +154,7 @@ static int kmmpd(void *data)
13948 + mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
13949 + EXT4_MMP_MIN_CHECK_INTERVAL);
13950 + mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
13951 ++ BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE);
13952 + bdevname(bh->b_bdev, mmp->mmp_bdevname);
13953 +
13954 + memcpy(mmp->mmp_nodename, init_utsname()->nodename,
13955 +@@ -375,7 +376,8 @@ skip:
13956 + /*
13957 + * Start a kernel thread to update the MMP block periodically.
13958 + */
13959 +- EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
13960 ++ EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%.*s",
13961 ++ (int)sizeof(mmp->mmp_bdevname),
13962 + bdevname(bh->b_bdev,
13963 + mmp->mmp_bdevname));
13964 + if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
13965 +diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
13966 +index 0b5c36bd5418..3f7b3836166c 100644
13967 +--- a/fs/ext4/namei.c
13968 ++++ b/fs/ext4/namei.c
13969 +@@ -1430,6 +1430,7 @@ restart:
13970 + /*
13971 + * We deal with the read-ahead logic here.
13972 + */
13973 ++ cond_resched();
13974 + if (ra_ptr >= ra_max) {
13975 + /* Refill the readahead buffer */
13976 + ra_ptr = 0;
13977 +@@ -2094,6 +2095,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
13978 + retval = ext4_dx_add_entry(handle, &fname, dir, inode);
13979 + if (!retval || (retval != ERR_BAD_DX_DIR))
13980 + goto out;
13981 ++ /* Can we just ignore htree data? */
13982 ++ if (ext4_has_metadata_csum(sb)) {
13983 ++ EXT4_ERROR_INODE(dir,
13984 ++ "Directory has corrupted htree index.");
13985 ++ retval = -EFSCORRUPTED;
13986 ++ goto out;
13987 ++ }
13988 + ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
13989 + dx_fallback++;
13990 + ext4_mark_inode_dirty(handle, dir);
13991 +diff --git a/fs/ext4/super.c b/fs/ext4/super.c
13992 +index 93d8aa6ef661..09b443709bca 100644
13993 +--- a/fs/ext4/super.c
13994 ++++ b/fs/ext4/super.c
13995 +@@ -939,7 +939,7 @@ static void ext4_put_super(struct super_block *sb)
13996 + percpu_counter_destroy(&sbi->s_freeinodes_counter);
13997 + percpu_counter_destroy(&sbi->s_dirs_counter);
13998 + percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
13999 +- percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
14000 ++ percpu_free_rwsem(&sbi->s_writepages_rwsem);
14001 + #ifdef CONFIG_QUOTA
14002 + for (i = 0; i < EXT4_MAXQUOTAS; i++)
14003 + kfree(get_qf_name(sb, sbi, i));
14004 +@@ -2863,17 +2863,11 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
14005 + return 0;
14006 + }
14007 +
14008 +-#ifndef CONFIG_QUOTA
14009 +- if (ext4_has_feature_quota(sb) && !readonly) {
14010 ++#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
14011 ++ if (!readonly && (ext4_has_feature_quota(sb) ||
14012 ++ ext4_has_feature_project(sb))) {
14013 + ext4_msg(sb, KERN_ERR,
14014 +- "Filesystem with quota feature cannot be mounted RDWR "
14015 +- "without CONFIG_QUOTA");
14016 +- return 0;
14017 +- }
14018 +- if (ext4_has_feature_project(sb) && !readonly) {
14019 +- ext4_msg(sb, KERN_ERR,
14020 +- "Filesystem with project quota feature cannot be mounted RDWR "
14021 +- "without CONFIG_QUOTA");
14022 ++ "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
14023 + return 0;
14024 + }
14025 + #endif /* CONFIG_QUOTA */
14026 +@@ -3668,6 +3662,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
14027 + */
14028 + sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
14029 +
14030 ++ blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
14031 ++ if (blocksize < EXT4_MIN_BLOCK_SIZE ||
14032 ++ blocksize > EXT4_MAX_BLOCK_SIZE) {
14033 ++ ext4_msg(sb, KERN_ERR,
14034 ++ "Unsupported filesystem blocksize %d (%d log_block_size)",
14035 ++ blocksize, le32_to_cpu(es->s_log_block_size));
14036 ++ goto failed_mount;
14037 ++ }
14038 ++
14039 + if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
14040 + sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
14041 + sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
14042 +@@ -3685,6 +3688,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
14043 + ext4_msg(sb, KERN_ERR,
14044 + "unsupported inode size: %d",
14045 + sbi->s_inode_size);
14046 ++ ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize);
14047 + goto failed_mount;
14048 + }
14049 + /*
14050 +@@ -3848,14 +3852,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
14051 + if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
14052 + goto failed_mount;
14053 +
14054 +- blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
14055 +- if (blocksize < EXT4_MIN_BLOCK_SIZE ||
14056 +- blocksize > EXT4_MAX_BLOCK_SIZE) {
14057 +- ext4_msg(sb, KERN_ERR,
14058 +- "Unsupported filesystem blocksize %d (%d log_block_size)",
14059 +- blocksize, le32_to_cpu(es->s_log_block_size));
14060 +- goto failed_mount;
14061 +- }
14062 + if (le32_to_cpu(es->s_log_block_size) >
14063 + (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
14064 + ext4_msg(sb, KERN_ERR,
14065 +@@ -4400,7 +4396,7 @@ no_journal:
14066 + err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
14067 + GFP_KERNEL);
14068 + if (!err)
14069 +- err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem);
14070 ++ err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
14071 +
14072 + if (err) {
14073 + ext4_msg(sb, KERN_ERR, "insufficient memory");
14074 +@@ -4494,7 +4490,7 @@ failed_mount6:
14075 + percpu_counter_destroy(&sbi->s_freeinodes_counter);
14076 + percpu_counter_destroy(&sbi->s_dirs_counter);
14077 + percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
14078 +- percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
14079 ++ percpu_free_rwsem(&sbi->s_writepages_rwsem);
14080 + failed_mount5:
14081 + ext4_ext_release(sb);
14082 + ext4_release_system_zone(sb);
14083 +diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
14084 +index 93af9d7dfcdc..a55919eec035 100644
14085 +--- a/fs/f2fs/sysfs.c
14086 ++++ b/fs/f2fs/sysfs.c
14087 +@@ -507,10 +507,12 @@ int __init f2fs_init_sysfs(void)
14088 +
14089 + ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype,
14090 + NULL, "features");
14091 +- if (ret)
14092 ++ if (ret) {
14093 ++ kobject_put(&f2fs_feat);
14094 + kset_unregister(&f2fs_kset);
14095 +- else
14096 ++ } else {
14097 + f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
14098 ++ }
14099 + return ret;
14100 + }
14101 +
14102 +@@ -531,8 +533,11 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
14103 + init_completion(&sbi->s_kobj_unregister);
14104 + err = kobject_init_and_add(&sbi->s_kobj, &f2fs_sb_ktype, NULL,
14105 + "%s", sb->s_id);
14106 +- if (err)
14107 ++ if (err) {
14108 ++ kobject_put(&sbi->s_kobj);
14109 ++ wait_for_completion(&sbi->s_kobj_unregister);
14110 + return err;
14111 ++ }
14112 +
14113 + if (f2fs_proc_root)
14114 + sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
14115 +@@ -557,4 +562,5 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
14116 + remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
14117 + }
14118 + kobject_del(&sbi->s_kobj);
14119 ++ kobject_put(&sbi->s_kobj);
14120 + }
14121 +diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
14122 +index fe4fe155b7fb..15d129b7494b 100644
14123 +--- a/fs/jbd2/checkpoint.c
14124 ++++ b/fs/jbd2/checkpoint.c
14125 +@@ -168,7 +168,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
14126 + "journal space in %s\n", __func__,
14127 + journal->j_devname);
14128 + WARN_ON(1);
14129 +- jbd2_journal_abort(journal, 0);
14130 ++ jbd2_journal_abort(journal, -EIO);
14131 + }
14132 + write_lock(&journal->j_state_lock);
14133 + } else {
14134 +diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
14135 +index 7dd613392592..1a4bd8d9636e 100644
14136 +--- a/fs/jbd2/commit.c
14137 ++++ b/fs/jbd2/commit.c
14138 +@@ -783,7 +783,7 @@ start_journal_io:
14139 + err = journal_submit_commit_record(journal, commit_transaction,
14140 + &cbh, crc32_sum);
14141 + if (err)
14142 +- __jbd2_journal_abort_hard(journal);
14143 ++ jbd2_journal_abort(journal, err);
14144 + }
14145 +
14146 + blk_finish_plug(&plug);
14147 +@@ -876,7 +876,7 @@ start_journal_io:
14148 + err = journal_submit_commit_record(journal, commit_transaction,
14149 + &cbh, crc32_sum);
14150 + if (err)
14151 +- __jbd2_journal_abort_hard(journal);
14152 ++ jbd2_journal_abort(journal, err);
14153 + }
14154 + if (cbh)
14155 + err = journal_wait_on_commit_record(journal, cbh);
14156 +@@ -973,29 +973,33 @@ restart_loop:
14157 + * it. */
14158 +
14159 + /*
14160 +- * A buffer which has been freed while still being journaled by
14161 +- * a previous transaction.
14162 +- */
14163 +- if (buffer_freed(bh)) {
14164 ++ * A buffer which has been freed while still being journaled
14165 ++ * by a previous transaction, refile the buffer to BJ_Forget of
14166 ++ * the running transaction. If the just committed transaction
14167 ++ * contains "add to orphan" operation, we can completely
14168 ++ * invalidate the buffer now. We are rather through in that
14169 ++ * since the buffer may be still accessible when blocksize <
14170 ++ * pagesize and it is attached to the last partial page.
14171 ++ */
14172 ++ if (buffer_freed(bh) && !jh->b_next_transaction) {
14173 ++ struct address_space *mapping;
14174 ++
14175 ++ clear_buffer_freed(bh);
14176 ++ clear_buffer_jbddirty(bh);
14177 ++
14178 + /*
14179 +- * If the running transaction is the one containing
14180 +- * "add to orphan" operation (b_next_transaction !=
14181 +- * NULL), we have to wait for that transaction to
14182 +- * commit before we can really get rid of the buffer.
14183 +- * So just clear b_modified to not confuse transaction
14184 +- * credit accounting and refile the buffer to
14185 +- * BJ_Forget of the running transaction. If the just
14186 +- * committed transaction contains "add to orphan"
14187 +- * operation, we can completely invalidate the buffer
14188 +- * now. We are rather through in that since the
14189 +- * buffer may be still accessible when blocksize <
14190 +- * pagesize and it is attached to the last partial
14191 +- * page.
14192 ++ * Block device buffers need to stay mapped all the
14193 ++ * time, so it is enough to clear buffer_jbddirty and
14194 ++ * buffer_freed bits. For the file mapping buffers (i.e.
14195 ++ * journalled data) we need to unmap buffer and clear
14196 ++ * more bits. We also need to be careful about the check
14197 ++ * because the data page mapping can get cleared under
14198 ++ * out hands, which alse need not to clear more bits
14199 ++ * because the page and buffers will be freed and can
14200 ++ * never be reused once we are done with them.
14201 + */
14202 +- jh->b_modified = 0;
14203 +- if (!jh->b_next_transaction) {
14204 +- clear_buffer_freed(bh);
14205 +- clear_buffer_jbddirty(bh);
14206 ++ mapping = READ_ONCE(bh->b_page->mapping);
14207 ++ if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) {
14208 + clear_buffer_mapped(bh);
14209 + clear_buffer_new(bh);
14210 + clear_buffer_req(bh);
14211 +diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
14212 +index d3cce5c86fd9..6e054b368b5f 100644
14213 +--- a/fs/jbd2/journal.c
14214 ++++ b/fs/jbd2/journal.c
14215 +@@ -1687,6 +1687,11 @@ int jbd2_journal_load(journal_t *journal)
14216 + journal->j_devname);
14217 + return -EFSCORRUPTED;
14218 + }
14219 ++ /*
14220 ++ * clear JBD2_ABORT flag initialized in journal_init_common
14221 ++ * here to update log tail information with the newest seq.
14222 ++ */
14223 ++ journal->j_flags &= ~JBD2_ABORT;
14224 +
14225 + /* OK, we've finished with the dynamic journal bits:
14226 + * reinitialise the dynamic contents of the superblock in memory
14227 +@@ -1694,7 +1699,6 @@ int jbd2_journal_load(journal_t *journal)
14228 + if (journal_reset(journal))
14229 + goto recovery_error;
14230 +
14231 +- journal->j_flags &= ~JBD2_ABORT;
14232 + journal->j_flags |= JBD2_LOADED;
14233 + return 0;
14234 +
14235 +@@ -2115,8 +2119,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
14236 +
14237 + if (journal->j_flags & JBD2_ABORT) {
14238 + write_unlock(&journal->j_state_lock);
14239 +- if (!old_errno && old_errno != -ESHUTDOWN &&
14240 +- errno == -ESHUTDOWN)
14241 ++ if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN)
14242 + jbd2_journal_update_sb_errno(journal);
14243 + return;
14244 + }
14245 +@@ -2124,12 +2127,10 @@ static void __journal_abort_soft (journal_t *journal, int errno)
14246 +
14247 + __jbd2_journal_abort_hard(journal);
14248 +
14249 +- if (errno) {
14250 +- jbd2_journal_update_sb_errno(journal);
14251 +- write_lock(&journal->j_state_lock);
14252 +- journal->j_flags |= JBD2_REC_ERR;
14253 +- write_unlock(&journal->j_state_lock);
14254 +- }
14255 ++ jbd2_journal_update_sb_errno(journal);
14256 ++ write_lock(&journal->j_state_lock);
14257 ++ journal->j_flags |= JBD2_REC_ERR;
14258 ++ write_unlock(&journal->j_state_lock);
14259 + }
14260 +
14261 + /**
14262 +@@ -2171,11 +2172,6 @@ static void __journal_abort_soft (journal_t *journal, int errno)
14263 + * failure to disk. ext3_error, for example, now uses this
14264 + * functionality.
14265 + *
14266 +- * Errors which originate from within the journaling layer will NOT
14267 +- * supply an errno; a null errno implies that absolutely no further
14268 +- * writes are done to the journal (unless there are any already in
14269 +- * progress).
14270 +- *
14271 + */
14272 +
14273 + void jbd2_journal_abort(journal_t *journal, int errno)
14274 +diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
14275 +index 7fe422eced89..f2ff141a4479 100644
14276 +--- a/fs/jbd2/transaction.c
14277 ++++ b/fs/jbd2/transaction.c
14278 +@@ -2231,14 +2231,16 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
14279 + return -EBUSY;
14280 + }
14281 + /*
14282 +- * OK, buffer won't be reachable after truncate. We just set
14283 +- * j_next_transaction to the running transaction (if there is
14284 +- * one) and mark buffer as freed so that commit code knows it
14285 +- * should clear dirty bits when it is done with the buffer.
14286 ++ * OK, buffer won't be reachable after truncate. We just clear
14287 ++ * b_modified to not confuse transaction credit accounting, and
14288 ++ * set j_next_transaction to the running transaction (if there
14289 ++ * is one) and mark buffer as freed so that commit code knows
14290 ++ * it should clear dirty bits when it is done with the buffer.
14291 + */
14292 + set_buffer_freed(bh);
14293 + if (journal->j_running_transaction && buffer_jbddirty(bh))
14294 + jh->b_next_transaction = journal->j_running_transaction;
14295 ++ jh->b_modified = 0;
14296 + jbd2_journal_put_journal_head(jh);
14297 + spin_unlock(&journal->j_list_lock);
14298 + jbd_unlock_bh_state(bh);
14299 +diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
14300 +index 497a4171ef61..bfb50fc51528 100644
14301 +--- a/fs/ocfs2/journal.h
14302 ++++ b/fs/ocfs2/journal.h
14303 +@@ -637,9 +637,11 @@ static inline void ocfs2_update_inode_fsync_trans(handle_t *handle,
14304 + {
14305 + struct ocfs2_inode_info *oi = OCFS2_I(inode);
14306 +
14307 +- oi->i_sync_tid = handle->h_transaction->t_tid;
14308 +- if (datasync)
14309 +- oi->i_datasync_tid = handle->h_transaction->t_tid;
14310 ++ if (!is_handle_aborted(handle)) {
14311 ++ oi->i_sync_tid = handle->h_transaction->t_tid;
14312 ++ if (datasync)
14313 ++ oi->i_datasync_tid = handle->h_transaction->t_tid;
14314 ++ }
14315 + }
14316 +
14317 + #endif /* OCFS2_JOURNAL_H */
14318 +diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
14319 +index 1c59dff530de..34d1cc98260d 100644
14320 +--- a/fs/orangefs/orangefs-debugfs.c
14321 ++++ b/fs/orangefs/orangefs-debugfs.c
14322 +@@ -305,6 +305,7 @@ static void *help_start(struct seq_file *m, loff_t *pos)
14323 +
14324 + static void *help_next(struct seq_file *m, void *v, loff_t *pos)
14325 + {
14326 ++ (*pos)++;
14327 + gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n");
14328 +
14329 + return NULL;
14330 +diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
14331 +index 0037aea97d39..2946713cb00d 100644
14332 +--- a/fs/reiserfs/stree.c
14333 ++++ b/fs/reiserfs/stree.c
14334 +@@ -2250,7 +2250,8 @@ error_out:
14335 + /* also releases the path */
14336 + unfix_nodes(&s_ins_balance);
14337 + #ifdef REISERQUOTA_DEBUG
14338 +- reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
14339 ++ if (inode)
14340 ++ reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
14341 + "reiserquota insert_item(): freeing %u id=%u type=%c",
14342 + quota_bytes, inode->i_uid, head2type(ih));
14343 + #endif
14344 +diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
14345 +index 5208d85dd30c..9caf3948417c 100644
14346 +--- a/fs/reiserfs/super.c
14347 ++++ b/fs/reiserfs/super.c
14348 +@@ -1954,7 +1954,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
14349 + if (!sbi->s_jdev) {
14350 + SWARN(silent, s, "", "Cannot allocate memory for "
14351 + "journal device name");
14352 +- goto error;
14353 ++ goto error_unlocked;
14354 + }
14355 + }
14356 + #ifdef CONFIG_QUOTA
14357 +diff --git a/fs/udf/super.c b/fs/udf/super.c
14358 +index 242d960df9a1..51de27685e18 100644
14359 +--- a/fs/udf/super.c
14360 ++++ b/fs/udf/super.c
14361 +@@ -2467,17 +2467,29 @@ static unsigned int udf_count_free_table(struct super_block *sb,
14362 + static unsigned int udf_count_free(struct super_block *sb)
14363 + {
14364 + unsigned int accum = 0;
14365 +- struct udf_sb_info *sbi;
14366 ++ struct udf_sb_info *sbi = UDF_SB(sb);
14367 + struct udf_part_map *map;
14368 ++ unsigned int part = sbi->s_partition;
14369 ++ int ptype = sbi->s_partmaps[part].s_partition_type;
14370 ++
14371 ++ if (ptype == UDF_METADATA_MAP25) {
14372 ++ part = sbi->s_partmaps[part].s_type_specific.s_metadata.
14373 ++ s_phys_partition_ref;
14374 ++ } else if (ptype == UDF_VIRTUAL_MAP15 || ptype == UDF_VIRTUAL_MAP20) {
14375 ++ /*
14376 ++ * Filesystems with VAT are append-only and we cannot write to
14377 ++ * them. Let's just report 0 here.
14378 ++ */
14379 ++ return 0;
14380 ++ }
14381 +
14382 +- sbi = UDF_SB(sb);
14383 + if (sbi->s_lvid_bh) {
14384 + struct logicalVolIntegrityDesc *lvid =
14385 + (struct logicalVolIntegrityDesc *)
14386 + sbi->s_lvid_bh->b_data;
14387 +- if (le32_to_cpu(lvid->numOfPartitions) > sbi->s_partition) {
14388 ++ if (le32_to_cpu(lvid->numOfPartitions) > part) {
14389 + accum = le32_to_cpu(
14390 +- lvid->freeSpaceTable[sbi->s_partition]);
14391 ++ lvid->freeSpaceTable[part]);
14392 + if (accum == 0xFFFFFFFF)
14393 + accum = 0;
14394 + }
14395 +@@ -2486,7 +2498,7 @@ static unsigned int udf_count_free(struct super_block *sb)
14396 + if (accum)
14397 + return accum;
14398 +
14399 +- map = &sbi->s_partmaps[sbi->s_partition];
14400 ++ map = &sbi->s_partmaps[part];
14401 + if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) {
14402 + accum += udf_count_free_bitmap(sb,
14403 + map->s_uspace.s_bitmap);
14404 +diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
14405 +index 087cbe776868..8089e28539f1 100644
14406 +--- a/include/linux/dmaengine.h
14407 ++++ b/include/linux/dmaengine.h
14408 +@@ -677,6 +677,7 @@ struct dma_filter {
14409 + * @fill_align: alignment shift for memset operations
14410 + * @dev_id: unique device ID
14411 + * @dev: struct device reference for dma mapping api
14412 ++ * @owner: owner module (automatically set based on the provided dev)
14413 + * @src_addr_widths: bit mask of src addr widths the device supports
14414 + * @dst_addr_widths: bit mask of dst addr widths the device supports
14415 + * @directions: bit mask of slave direction the device supports since
14416 +@@ -738,6 +739,7 @@ struct dma_device {
14417 +
14418 + int dev_id;
14419 + struct device *dev;
14420 ++ struct module *owner;
14421 +
14422 + u32 src_addr_widths;
14423 + u32 dst_addr_widths;
14424 +diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
14425 +index 99bc5b3ae26e..733eaf95e207 100644
14426 +--- a/include/linux/intel-svm.h
14427 ++++ b/include/linux/intel-svm.h
14428 +@@ -130,7 +130,7 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid)
14429 + BUG();
14430 + }
14431 +
14432 +-static int intel_svm_is_pasid_valid(struct device *dev, int pasid)
14433 ++static inline int intel_svm_is_pasid_valid(struct device *dev, int pasid)
14434 + {
14435 + return -EINVAL;
14436 + }
14437 +diff --git a/include/linux/libata.h b/include/linux/libata.h
14438 +index c5188dc389c8..93838d98e3f3 100644
14439 +--- a/include/linux/libata.h
14440 ++++ b/include/linux/libata.h
14441 +@@ -1229,6 +1229,7 @@ struct pci_bits {
14442 + };
14443 +
14444 + extern int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits);
14445 ++extern void ata_pci_shutdown_one(struct pci_dev *pdev);
14446 + extern void ata_pci_remove_one(struct pci_dev *pdev);
14447 +
14448 + #ifdef CONFIG_PM
14449 +diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
14450 +index 3ef96743db8d..1ecd35664e0d 100644
14451 +--- a/include/linux/list_nulls.h
14452 ++++ b/include/linux/list_nulls.h
14453 +@@ -72,10 +72,10 @@ static inline void hlist_nulls_add_head(struct hlist_nulls_node *n,
14454 + struct hlist_nulls_node *first = h->first;
14455 +
14456 + n->next = first;
14457 +- n->pprev = &h->first;
14458 ++ WRITE_ONCE(n->pprev, &h->first);
14459 + h->first = n;
14460 + if (!is_a_nulls(first))
14461 +- first->pprev = &n->next;
14462 ++ WRITE_ONCE(first->pprev, &n->next);
14463 + }
14464 +
14465 + static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
14466 +@@ -85,13 +85,13 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
14467 +
14468 + WRITE_ONCE(*pprev, next);
14469 + if (!is_a_nulls(next))
14470 +- next->pprev = pprev;
14471 ++ WRITE_ONCE(next->pprev, pprev);
14472 + }
14473 +
14474 + static inline void hlist_nulls_del(struct hlist_nulls_node *n)
14475 + {
14476 + __hlist_nulls_del(n);
14477 +- n->pprev = LIST_POISON2;
14478 ++ WRITE_ONCE(n->pprev, LIST_POISON2);
14479 + }
14480 +
14481 + /**
14482 +diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
14483 +index a10da545b3f6..cf64a9492256 100644
14484 +--- a/include/linux/rculist_nulls.h
14485 ++++ b/include/linux/rculist_nulls.h
14486 +@@ -34,7 +34,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
14487 + {
14488 + if (!hlist_nulls_unhashed(n)) {
14489 + __hlist_nulls_del(n);
14490 +- n->pprev = NULL;
14491 ++ WRITE_ONCE(n->pprev, NULL);
14492 + }
14493 + }
14494 +
14495 +@@ -66,7 +66,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
14496 + static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n)
14497 + {
14498 + __hlist_nulls_del(n);
14499 +- n->pprev = LIST_POISON2;
14500 ++ WRITE_ONCE(n->pprev, LIST_POISON2);
14501 + }
14502 +
14503 + /**
14504 +@@ -94,10 +94,10 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
14505 + struct hlist_nulls_node *first = h->first;
14506 +
14507 + n->next = first;
14508 +- n->pprev = &h->first;
14509 ++ WRITE_ONCE(n->pprev, &h->first);
14510 + rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
14511 + if (!is_a_nulls(first))
14512 +- first->pprev = &n->next;
14513 ++ WRITE_ONCE(first->pprev, &n->next);
14514 + }
14515 +
14516 + /**
14517 +diff --git a/include/linux/tty.h b/include/linux/tty.h
14518 +index 0cd621d8c7f0..ead308e996c0 100644
14519 +--- a/include/linux/tty.h
14520 ++++ b/include/linux/tty.h
14521 +@@ -224,6 +224,8 @@ struct tty_port_client_operations {
14522 + void (*write_wakeup)(struct tty_port *port);
14523 + };
14524 +
14525 ++extern const struct tty_port_client_operations tty_port_default_client_ops;
14526 ++
14527 + struct tty_port {
14528 + struct tty_bufhead buf; /* Locked internally */
14529 + struct tty_struct *tty; /* Back pointer */
14530 +diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h
14531 +index 8ffa94009d1a..76002416cead 100644
14532 +--- a/include/media/v4l2-device.h
14533 ++++ b/include/media/v4l2-device.h
14534 +@@ -268,7 +268,7 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd,
14535 + struct v4l2_subdev *__sd; \
14536 + \
14537 + __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \
14538 +- !(grpid) || __sd->grp_id == (grpid), o, f , \
14539 ++ (grpid) == 0 || __sd->grp_id == (grpid), o, f , \
14540 + ##args); \
14541 + } while (0)
14542 +
14543 +@@ -280,7 +280,7 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd,
14544 + ({ \
14545 + struct v4l2_subdev *__sd; \
14546 + __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \
14547 +- !(grpid) || __sd->grp_id == (grpid), o, f , \
14548 ++ (grpid) == 0 || __sd->grp_id == (grpid), o, f , \
14549 + ##args); \
14550 + })
14551 +
14552 +@@ -294,8 +294,8 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd,
14553 + struct v4l2_subdev *__sd; \
14554 + \
14555 + __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \
14556 +- !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \
14557 +- ##args); \
14558 ++ (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \
14559 ++ f , ##args); \
14560 + } while (0)
14561 +
14562 + /*
14563 +@@ -308,8 +308,8 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd,
14564 + ({ \
14565 + struct v4l2_subdev *__sd; \
14566 + __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \
14567 +- !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \
14568 +- ##args); \
14569 ++ (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \
14570 ++ f , ##args); \
14571 + })
14572 +
14573 + /*
14574 +diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h
14575 +index f0a01a54bd15..df156f1d50b2 100644
14576 +--- a/include/scsi/iscsi_proto.h
14577 ++++ b/include/scsi/iscsi_proto.h
14578 +@@ -638,7 +638,6 @@ struct iscsi_reject {
14579 + #define ISCSI_REASON_BOOKMARK_INVALID 9
14580 + #define ISCSI_REASON_BOOKMARK_NO_RESOURCES 10
14581 + #define ISCSI_REASON_NEGOTIATION_RESET 11
14582 +-#define ISCSI_REASON_WAITING_FOR_LOGOUT 12
14583 +
14584 + /* Max. number of Key=Value pairs in a text message */
14585 + #define MAX_KEY_VALUE_PAIRS 8192
14586 +diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
14587 +index 6665cb29e1a2..c2a71fd8dfaf 100644
14588 +--- a/include/sound/rawmidi.h
14589 ++++ b/include/sound/rawmidi.h
14590 +@@ -92,9 +92,9 @@ struct snd_rawmidi_substream {
14591 + struct list_head list; /* list of all substream for given stream */
14592 + int stream; /* direction */
14593 + int number; /* substream number */
14594 +- unsigned int opened: 1, /* open flag */
14595 +- append: 1, /* append flag (merge more streams) */
14596 +- active_sensing: 1; /* send active sensing when close */
14597 ++ bool opened; /* open flag */
14598 ++ bool append; /* append flag (merge more streams) */
14599 ++ bool active_sensing; /* send active sensing when close */
14600 + int use_count; /* use counter (for output) */
14601 + size_t bytes;
14602 + struct snd_rawmidi *rmidi;
14603 +diff --git a/ipc/sem.c b/ipc/sem.c
14604 +index d6dd2dc9ddad..6adc245f3e02 100644
14605 +--- a/ipc/sem.c
14606 ++++ b/ipc/sem.c
14607 +@@ -2248,11 +2248,9 @@ void exit_sem(struct task_struct *tsk)
14608 + ipc_assert_locked_object(&sma->sem_perm);
14609 + list_del(&un->list_id);
14610 +
14611 +- /* we are the last process using this ulp, acquiring ulp->lock
14612 +- * isn't required. Besides that, we are also protected against
14613 +- * IPC_RMID as we hold sma->sem_perm lock now
14614 +- */
14615 ++ spin_lock(&ulp->lock);
14616 + list_del_rcu(&un->list_proc);
14617 ++ spin_unlock(&ulp->lock);
14618 +
14619 + /* perform adjustments registered in un */
14620 + for (i = 0; i < sma->sem_nsems; i++) {
14621 +diff --git a/kernel/cpu.c b/kernel/cpu.c
14622 +index 49273130e4f1..96c0a868232e 100644
14623 +--- a/kernel/cpu.c
14624 ++++ b/kernel/cpu.c
14625 +@@ -494,8 +494,7 @@ static int bringup_wait_for_ap(unsigned int cpu)
14626 + if (WARN_ON_ONCE((!cpu_online(cpu))))
14627 + return -ECANCELED;
14628 +
14629 +- /* Unpark the stopper thread and the hotplug thread of the target cpu */
14630 +- stop_machine_unpark(cpu);
14631 ++ /* Unpark the hotplug thread of the target cpu */
14632 + kthread_unpark(st->thread);
14633 +
14634 + /*
14635 +@@ -1064,8 +1063,8 @@ void notify_cpu_starting(unsigned int cpu)
14636 +
14637 + /*
14638 + * Called from the idle task. Wake up the controlling task which brings the
14639 +- * stopper and the hotplug thread of the upcoming CPU up and then delegates
14640 +- * the rest of the online bringup to the hotplug thread.
14641 ++ * hotplug thread of the upcoming CPU up and then delegates the rest of the
14642 ++ * online bringup to the hotplug thread.
14643 + */
14644 + void cpuhp_online_idle(enum cpuhp_state state)
14645 + {
14646 +@@ -1075,6 +1074,12 @@ void cpuhp_online_idle(enum cpuhp_state state)
14647 + if (state != CPUHP_AP_ONLINE_IDLE)
14648 + return;
14649 +
14650 ++ /*
14651 ++ * Unpart the stopper thread before we start the idle loop (and start
14652 ++ * scheduling); this ensures the stopper task is always available.
14653 ++ */
14654 ++ stop_machine_unpark(smp_processor_id());
14655 ++
14656 + st->state = CPUHP_AP_ONLINE_IDLE;
14657 + complete_ap_thread(st, true);
14658 + }
14659 +diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
14660 +index 4ef7f3b820ce..5230c47fc43e 100644
14661 +--- a/kernel/irq/internals.h
14662 ++++ b/kernel/irq/internals.h
14663 +@@ -119,8 +119,6 @@ static inline void unregister_handler_proc(unsigned int irq,
14664 +
14665 + extern bool irq_can_set_affinity_usr(unsigned int irq);
14666 +
14667 +-extern int irq_select_affinity_usr(unsigned int irq);
14668 +-
14669 + extern void irq_set_thread_affinity(struct irq_desc *desc);
14670 +
14671 + extern int irq_do_set_affinity(struct irq_data *data,
14672 +diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
14673 +index 9c86a3e45110..037e8fc1b008 100644
14674 +--- a/kernel/irq/manage.c
14675 ++++ b/kernel/irq/manage.c
14676 +@@ -382,23 +382,9 @@ int irq_setup_affinity(struct irq_desc *desc)
14677 + {
14678 + return irq_select_affinity(irq_desc_get_irq(desc));
14679 + }
14680 +-#endif
14681 ++#endif /* CONFIG_AUTO_IRQ_AFFINITY */
14682 ++#endif /* CONFIG_SMP */
14683 +
14684 +-/*
14685 +- * Called when a bogus affinity is set via /proc/irq
14686 +- */
14687 +-int irq_select_affinity_usr(unsigned int irq)
14688 +-{
14689 +- struct irq_desc *desc = irq_to_desc(irq);
14690 +- unsigned long flags;
14691 +- int ret;
14692 +-
14693 +- raw_spin_lock_irqsave(&desc->lock, flags);
14694 +- ret = irq_setup_affinity(desc);
14695 +- raw_spin_unlock_irqrestore(&desc->lock, flags);
14696 +- return ret;
14697 +-}
14698 +-#endif
14699 +
14700 + /**
14701 + * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt
14702 +diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
14703 +index c010cc0daf79..b031db9d56c6 100644
14704 +--- a/kernel/irq/proc.c
14705 ++++ b/kernel/irq/proc.c
14706 +@@ -117,6 +117,28 @@ static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
14707 + return show_irq_affinity(AFFINITY_LIST, m);
14708 + }
14709 +
14710 ++#ifndef CONFIG_AUTO_IRQ_AFFINITY
14711 ++static inline int irq_select_affinity_usr(unsigned int irq)
14712 ++{
14713 ++ /*
14714 ++ * If the interrupt is started up already then this fails. The
14715 ++ * interrupt is assigned to an online CPU already. There is no
14716 ++ * point to move it around randomly. Tell user space that the
14717 ++ * selected mask is bogus.
14718 ++ *
14719 ++ * If not then any change to the affinity is pointless because the
14720 ++ * startup code invokes irq_setup_affinity() which will select
14721 ++ * a online CPU anyway.
14722 ++ */
14723 ++ return -EINVAL;
14724 ++}
14725 ++#else
14726 ++/* ALPHA magic affinity auto selector. Keep it for historical reasons. */
14727 ++static inline int irq_select_affinity_usr(unsigned int irq)
14728 ++{
14729 ++ return irq_select_affinity(irq);
14730 ++}
14731 ++#endif
14732 +
14733 + static ssize_t write_irq_affinity(int type, struct file *file,
14734 + const char __user *buffer, size_t count, loff_t *pos)
14735 +diff --git a/kernel/padata.c b/kernel/padata.c
14736 +index 87540ce72aea..528a251217df 100644
14737 +--- a/kernel/padata.c
14738 ++++ b/kernel/padata.c
14739 +@@ -34,6 +34,8 @@
14740 +
14741 + #define MAX_OBJ_NUM 1000
14742 +
14743 ++static void padata_free_pd(struct parallel_data *pd);
14744 ++
14745 + static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
14746 + {
14747 + int cpu, target_cpu;
14748 +@@ -292,6 +294,7 @@ static void padata_serial_worker(struct work_struct *serial_work)
14749 + struct padata_serial_queue *squeue;
14750 + struct parallel_data *pd;
14751 + LIST_HEAD(local_list);
14752 ++ int cnt;
14753 +
14754 + local_bh_disable();
14755 + squeue = container_of(serial_work, struct padata_serial_queue, work);
14756 +@@ -301,6 +304,8 @@ static void padata_serial_worker(struct work_struct *serial_work)
14757 + list_replace_init(&squeue->serial.list, &local_list);
14758 + spin_unlock(&squeue->serial.lock);
14759 +
14760 ++ cnt = 0;
14761 ++
14762 + while (!list_empty(&local_list)) {
14763 + struct padata_priv *padata;
14764 +
14765 +@@ -310,9 +315,12 @@ static void padata_serial_worker(struct work_struct *serial_work)
14766 + list_del_init(&padata->list);
14767 +
14768 + padata->serial(padata);
14769 +- atomic_dec(&pd->refcnt);
14770 ++ cnt++;
14771 + }
14772 + local_bh_enable();
14773 ++
14774 ++ if (atomic_sub_and_test(cnt, &pd->refcnt))
14775 ++ padata_free_pd(pd);
14776 + }
14777 +
14778 + /**
14779 +@@ -435,7 +443,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
14780 + setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
14781 + atomic_set(&pd->seq_nr, -1);
14782 + atomic_set(&pd->reorder_objects, 0);
14783 +- atomic_set(&pd->refcnt, 0);
14784 ++ atomic_set(&pd->refcnt, 1);
14785 + pd->pinst = pinst;
14786 + spin_lock_init(&pd->lock);
14787 +
14788 +@@ -460,31 +468,6 @@ static void padata_free_pd(struct parallel_data *pd)
14789 + kfree(pd);
14790 + }
14791 +
14792 +-/* Flush all objects out of the padata queues. */
14793 +-static void padata_flush_queues(struct parallel_data *pd)
14794 +-{
14795 +- int cpu;
14796 +- struct padata_parallel_queue *pqueue;
14797 +- struct padata_serial_queue *squeue;
14798 +-
14799 +- for_each_cpu(cpu, pd->cpumask.pcpu) {
14800 +- pqueue = per_cpu_ptr(pd->pqueue, cpu);
14801 +- flush_work(&pqueue->work);
14802 +- }
14803 +-
14804 +- del_timer_sync(&pd->timer);
14805 +-
14806 +- if (atomic_read(&pd->reorder_objects))
14807 +- padata_reorder(pd);
14808 +-
14809 +- for_each_cpu(cpu, pd->cpumask.cbcpu) {
14810 +- squeue = per_cpu_ptr(pd->squeue, cpu);
14811 +- flush_work(&squeue->work);
14812 +- }
14813 +-
14814 +- BUG_ON(atomic_read(&pd->refcnt) != 0);
14815 +-}
14816 +-
14817 + static void __padata_start(struct padata_instance *pinst)
14818 + {
14819 + pinst->flags |= PADATA_INIT;
14820 +@@ -498,10 +481,6 @@ static void __padata_stop(struct padata_instance *pinst)
14821 + pinst->flags &= ~PADATA_INIT;
14822 +
14823 + synchronize_rcu();
14824 +-
14825 +- get_online_cpus();
14826 +- padata_flush_queues(pinst->pd);
14827 +- put_online_cpus();
14828 + }
14829 +
14830 + /* Replace the internal control structure with a new one. */
14831 +@@ -522,8 +501,8 @@ static void padata_replace(struct padata_instance *pinst,
14832 + if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
14833 + notification_mask |= PADATA_CPU_SERIAL;
14834 +
14835 +- padata_flush_queues(pd_old);
14836 +- padata_free_pd(pd_old);
14837 ++ if (atomic_dec_and_test(&pd_old->refcnt))
14838 ++ padata_free_pd(pd_old);
14839 +
14840 + if (notification_mask)
14841 + blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
14842 +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
14843 +index 8974ecbcca3c..8a8d92a8045b 100644
14844 +--- a/kernel/trace/ftrace.c
14845 ++++ b/kernel/trace/ftrace.c
14846 +@@ -6317,9 +6317,10 @@ static void *fpid_next(struct seq_file *m, void *v, loff_t *pos)
14847 + struct trace_array *tr = m->private;
14848 + struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids);
14849 +
14850 +- if (v == FTRACE_NO_PIDS)
14851 ++ if (v == FTRACE_NO_PIDS) {
14852 ++ (*pos)++;
14853 + return NULL;
14854 +-
14855 ++ }
14856 + return trace_pid_next(pid_list, v, pos);
14857 + }
14858 +
14859 +diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
14860 +index e2da180ca172..31e91efe243e 100644
14861 +--- a/kernel/trace/trace_events_trigger.c
14862 ++++ b/kernel/trace/trace_events_trigger.c
14863 +@@ -127,9 +127,10 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos)
14864 + {
14865 + struct trace_event_file *event_file = event_file_data(m->private);
14866 +
14867 +- if (t == SHOW_AVAILABLE_TRIGGERS)
14868 ++ if (t == SHOW_AVAILABLE_TRIGGERS) {
14869 ++ (*pos)++;
14870 + return NULL;
14871 +-
14872 ++ }
14873 + return seq_list_next(t, &event_file->triggers, pos);
14874 + }
14875 +
14876 +diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
14877 +index 75bf1bcb4a8a..92b76f9e25ed 100644
14878 +--- a/kernel/trace/trace_stat.c
14879 ++++ b/kernel/trace/trace_stat.c
14880 +@@ -278,18 +278,22 @@ static int tracing_stat_init(void)
14881 +
14882 + d_tracing = tracing_init_dentry();
14883 + if (IS_ERR(d_tracing))
14884 +- return 0;
14885 ++ return -ENODEV;
14886 +
14887 + stat_dir = tracefs_create_dir("trace_stat", d_tracing);
14888 +- if (!stat_dir)
14889 ++ if (!stat_dir) {
14890 + pr_warn("Could not create tracefs 'trace_stat' entry\n");
14891 ++ return -ENOMEM;
14892 ++ }
14893 + return 0;
14894 + }
14895 +
14896 + static int init_stat_file(struct stat_session *session)
14897 + {
14898 +- if (!stat_dir && tracing_stat_init())
14899 +- return -ENODEV;
14900 ++ int ret;
14901 ++
14902 ++ if (!stat_dir && (ret = tracing_stat_init()))
14903 ++ return ret;
14904 +
14905 + session->file = tracefs_create_file(session->ts->name, 0644,
14906 + stat_dir,
14907 +@@ -302,7 +306,7 @@ static int init_stat_file(struct stat_session *session)
14908 + int register_stat_tracer(struct tracer_stat *trace)
14909 + {
14910 + struct stat_session *session, *node;
14911 +- int ret;
14912 ++ int ret = -EINVAL;
14913 +
14914 + if (!trace)
14915 + return -EINVAL;
14916 +@@ -313,17 +317,15 @@ int register_stat_tracer(struct tracer_stat *trace)
14917 + /* Already registered? */
14918 + mutex_lock(&all_stat_sessions_mutex);
14919 + list_for_each_entry(node, &all_stat_sessions, session_list) {
14920 +- if (node->ts == trace) {
14921 +- mutex_unlock(&all_stat_sessions_mutex);
14922 +- return -EINVAL;
14923 +- }
14924 ++ if (node->ts == trace)
14925 ++ goto out;
14926 + }
14927 +- mutex_unlock(&all_stat_sessions_mutex);
14928 +
14929 ++ ret = -ENOMEM;
14930 + /* Init the session */
14931 + session = kzalloc(sizeof(*session), GFP_KERNEL);
14932 + if (!session)
14933 +- return -ENOMEM;
14934 ++ goto out;
14935 +
14936 + session->ts = trace;
14937 + INIT_LIST_HEAD(&session->session_list);
14938 +@@ -332,15 +334,16 @@ int register_stat_tracer(struct tracer_stat *trace)
14939 + ret = init_stat_file(session);
14940 + if (ret) {
14941 + destroy_session(session);
14942 +- return ret;
14943 ++ goto out;
14944 + }
14945 +
14946 ++ ret = 0;
14947 + /* Register */
14948 +- mutex_lock(&all_stat_sessions_mutex);
14949 + list_add_tail(&session->session_list, &all_stat_sessions);
14950 ++ out:
14951 + mutex_unlock(&all_stat_sessions_mutex);
14952 +
14953 +- return 0;
14954 ++ return ret;
14955 + }
14956 +
14957 + void unregister_stat_tracer(struct tracer_stat *trace)
14958 +diff --git a/kernel/watchdog.c b/kernel/watchdog.c
14959 +index 087994b23f8b..e4db5d54c07c 100644
14960 +--- a/kernel/watchdog.c
14961 ++++ b/kernel/watchdog.c
14962 +@@ -164,6 +164,8 @@ static void lockup_detector_update_enable(void)
14963 +
14964 + #ifdef CONFIG_SOFTLOCKUP_DETECTOR
14965 +
14966 ++#define SOFTLOCKUP_RESET ULONG_MAX
14967 ++
14968 + /* Global variables, exported for sysctl */
14969 + unsigned int __read_mostly softlockup_panic =
14970 + CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
14971 +@@ -271,7 +273,7 @@ notrace void touch_softlockup_watchdog_sched(void)
14972 + * Preemption can be enabled. It doesn't matter which CPU's timestamp
14973 + * gets zeroed here, so use the raw_ operation.
14974 + */
14975 +- raw_cpu_write(watchdog_touch_ts, 0);
14976 ++ raw_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET);
14977 + }
14978 +
14979 + notrace void touch_softlockup_watchdog(void)
14980 +@@ -295,14 +297,14 @@ void touch_all_softlockup_watchdogs(void)
14981 + * the softlockup check.
14982 + */
14983 + for_each_cpu(cpu, &watchdog_allowed_mask)
14984 +- per_cpu(watchdog_touch_ts, cpu) = 0;
14985 ++ per_cpu(watchdog_touch_ts, cpu) = SOFTLOCKUP_RESET;
14986 + wq_watchdog_touch(-1);
14987 + }
14988 +
14989 + void touch_softlockup_watchdog_sync(void)
14990 + {
14991 + __this_cpu_write(softlockup_touch_sync, true);
14992 +- __this_cpu_write(watchdog_touch_ts, 0);
14993 ++ __this_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET);
14994 + }
14995 +
14996 + static int is_softlockup(unsigned long touch_ts)
14997 +@@ -354,7 +356,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
14998 + /* .. and repeat */
14999 + hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
15000 +
15001 +- if (touch_ts == 0) {
15002 ++ if (touch_ts == SOFTLOCKUP_RESET) {
15003 + if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
15004 + /*
15005 + * If the time stamp was touched atomically
15006 +diff --git a/lib/scatterlist.c b/lib/scatterlist.c
15007 +index 11fce289d116..834c846c5af8 100644
15008 +--- a/lib/scatterlist.c
15009 ++++ b/lib/scatterlist.c
15010 +@@ -317,7 +317,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
15011 + if (prv)
15012 + table->nents = ++table->orig_nents;
15013 +
15014 +- return -ENOMEM;
15015 ++ return -ENOMEM;
15016 + }
15017 +
15018 + sg_init_table(sg, alloc_size);
15019 +diff --git a/lib/stackdepot.c b/lib/stackdepot.c
15020 +index f87d138e9672..759ff419fe61 100644
15021 +--- a/lib/stackdepot.c
15022 ++++ b/lib/stackdepot.c
15023 +@@ -92,15 +92,19 @@ static bool init_stack_slab(void **prealloc)
15024 + return true;
15025 + if (stack_slabs[depot_index] == NULL) {
15026 + stack_slabs[depot_index] = *prealloc;
15027 ++ *prealloc = NULL;
15028 + } else {
15029 +- stack_slabs[depot_index + 1] = *prealloc;
15030 ++ /* If this is the last depot slab, do not touch the next one. */
15031 ++ if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) {
15032 ++ stack_slabs[depot_index + 1] = *prealloc;
15033 ++ *prealloc = NULL;
15034 ++ }
15035 + /*
15036 + * This smp_store_release pairs with smp_load_acquire() from
15037 + * |next_slab_inited| above and in depot_save_stack().
15038 + */
15039 + smp_store_release(&next_slab_inited, 1);
15040 + }
15041 +- *prealloc = NULL;
15042 + return true;
15043 + }
15044 +
15045 +diff --git a/mm/vmscan.c b/mm/vmscan.c
15046 +index 0cc3c1eb15f5..c6962aa5ddb4 100644
15047 +--- a/mm/vmscan.c
15048 ++++ b/mm/vmscan.c
15049 +@@ -2369,10 +2369,13 @@ out:
15050 + /*
15051 + * Scan types proportional to swappiness and
15052 + * their relative recent reclaim efficiency.
15053 +- * Make sure we don't miss the last page
15054 +- * because of a round-off error.
15055 ++ * Make sure we don't miss the last page on
15056 ++ * the offlined memory cgroups because of a
15057 ++ * round-off error.
15058 + */
15059 +- scan = DIV64_U64_ROUND_UP(scan * fraction[file],
15060 ++ scan = mem_cgroup_online(memcg) ?
15061 ++ div64_u64(scan * fraction[file], denominator) :
15062 ++ DIV64_U64_ROUND_UP(scan * fraction[file],
15063 + denominator);
15064 + break;
15065 + case SCAN_FILE:
15066 +diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
15067 +index af3a12a36d88..f268c5c3eedb 100644
15068 +--- a/net/dsa/tag_qca.c
15069 ++++ b/net/dsa/tag_qca.c
15070 +@@ -41,7 +41,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
15071 + struct dsa_slave_priv *p = netdev_priv(dev);
15072 + u16 *phdr, hdr;
15073 +
15074 +- if (skb_cow_head(skb, 0) < 0)
15075 ++ if (skb_cow_head(skb, QCA_HDR_LEN) < 0)
15076 + return NULL;
15077 +
15078 + skb_push(skb, QCA_HDR_LEN);
15079 +diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
15080 +index fe8e8a1622b5..186f97f1c6c0 100644
15081 +--- a/net/netfilter/xt_hashlimit.c
15082 ++++ b/net/netfilter/xt_hashlimit.c
15083 +@@ -845,6 +845,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
15084 + return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3);
15085 + }
15086 +
15087 ++#define HASHLIMIT_MAX_SIZE 1048576
15088 ++
15089 + static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
15090 + struct xt_hashlimit_htable **hinfo,
15091 + struct hashlimit_cfg3 *cfg,
15092 +@@ -855,6 +857,14 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
15093 +
15094 + if (cfg->gc_interval == 0 || cfg->expire == 0)
15095 + return -EINVAL;
15096 ++ if (cfg->size > HASHLIMIT_MAX_SIZE) {
15097 ++ cfg->size = HASHLIMIT_MAX_SIZE;
15098 ++ pr_info_ratelimited("size too large, truncated to %u\n", cfg->size);
15099 ++ }
15100 ++ if (cfg->max > HASHLIMIT_MAX_SIZE) {
15101 ++ cfg->max = HASHLIMIT_MAX_SIZE;
15102 ++ pr_info_ratelimited("max too large, truncated to %u\n", cfg->max);
15103 ++ }
15104 + if (par->family == NFPROTO_IPV4) {
15105 + if (cfg->srcmask > 32 || cfg->dstmask > 32)
15106 + return -EINVAL;
15107 +diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
15108 +index 1879665e5a2b..80a5a6d503c8 100644
15109 +--- a/net/sched/cls_flower.c
15110 ++++ b/net/sched/cls_flower.c
15111 +@@ -445,6 +445,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
15112 + [TCA_FLOWER_KEY_IP_TOS_MASK] = { .type = NLA_U8 },
15113 + [TCA_FLOWER_KEY_IP_TTL] = { .type = NLA_U8 },
15114 + [TCA_FLOWER_KEY_IP_TTL_MASK] = { .type = NLA_U8 },
15115 ++ [TCA_FLOWER_FLAGS] = { .type = NLA_U32 },
15116 + };
15117 +
15118 + static void fl_set_key_val(struct nlattr **tb,
15119 +diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
15120 +index d8fd152779c8..a985f91e8b47 100644
15121 +--- a/net/sched/cls_matchall.c
15122 ++++ b/net/sched/cls_matchall.c
15123 +@@ -136,6 +136,7 @@ static void *mall_get(struct tcf_proto *tp, u32 handle)
15124 + static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
15125 + [TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC },
15126 + [TCA_MATCHALL_CLASSID] = { .type = NLA_U32 },
15127 ++ [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 },
15128 + };
15129 +
15130 + static int mall_set_parms(struct net *net, struct tcf_proto *tp,
15131 +diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
15132 +index d2d01cf70224..576c37d86051 100644
15133 +--- a/net/smc/smc_diag.c
15134 ++++ b/net/smc/smc_diag.c
15135 +@@ -38,15 +38,14 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk)
15136 + {
15137 + struct smc_sock *smc = smc_sk(sk);
15138 +
15139 ++ memset(r, 0, sizeof(*r));
15140 + r->diag_family = sk->sk_family;
15141 ++ sock_diag_save_cookie(sk, r->id.idiag_cookie);
15142 + if (!smc->clcsock)
15143 + return;
15144 + r->id.idiag_sport = htons(smc->clcsock->sk->sk_num);
15145 + r->id.idiag_dport = smc->clcsock->sk->sk_dport;
15146 + r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if;
15147 +- sock_diag_save_cookie(sk, r->id.idiag_cookie);
15148 +- memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
15149 +- memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
15150 + r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr;
15151 + r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr;
15152 + }
15153 +diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
15154 +index 27aac273205b..fa423fcd1a92 100644
15155 +--- a/scripts/kconfig/confdata.c
15156 ++++ b/scripts/kconfig/confdata.c
15157 +@@ -1238,7 +1238,7 @@ bool conf_set_all_new_symbols(enum conf_def_mode mode)
15158 +
15159 + sym_calc_value(csym);
15160 + if (mode == def_random)
15161 +- has_changed = randomize_choice_values(csym);
15162 ++ has_changed |= randomize_choice_values(csym);
15163 + else {
15164 + set_all_choice_values(csym);
15165 + has_changed = true;
15166 +diff --git a/security/selinux/avc.c b/security/selinux/avc.c
15167 +index 2380b8d72cec..23f387b30ece 100644
15168 +--- a/security/selinux/avc.c
15169 ++++ b/security/selinux/avc.c
15170 +@@ -863,7 +863,7 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid,
15171 + if (orig->ae.xp_node) {
15172 + rc = avc_xperms_populate(node, orig->ae.xp_node);
15173 + if (rc) {
15174 +- kmem_cache_free(avc_node_cachep, node);
15175 ++ avc_node_kill(node);
15176 + goto out_unlock;
15177 + }
15178 + }
15179 +diff --git a/sound/core/control.c b/sound/core/control.c
15180 +index 36571cd49be3..a0ce22164957 100644
15181 +--- a/sound/core/control.c
15182 ++++ b/sound/core/control.c
15183 +@@ -1467,8 +1467,9 @@ static int call_tlv_handler(struct snd_ctl_file *file, int op_flag,
15184 + if (kctl->tlv.c == NULL)
15185 + return -ENXIO;
15186 +
15187 +- /* When locked, this is unavailable. */
15188 +- if (vd->owner != NULL && vd->owner != file)
15189 ++ /* Write and command operations are not allowed for locked element. */
15190 ++ if (op_flag != SNDRV_CTL_TLV_OP_READ &&
15191 ++ vd->owner != NULL && vd->owner != file)
15192 + return -EPERM;
15193 +
15194 + return kctl->tlv.c(kctl, op_flag, size, buf);
15195 +diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
15196 +index 92b0d4523a07..6fe93d5f6f71 100644
15197 +--- a/sound/core/seq/seq_clientmgr.c
15198 ++++ b/sound/core/seq/seq_clientmgr.c
15199 +@@ -564,7 +564,7 @@ static int update_timestamp_of_queue(struct snd_seq_event *event,
15200 + event->queue = queue;
15201 + event->flags &= ~SNDRV_SEQ_TIME_STAMP_MASK;
15202 + if (real_time) {
15203 +- event->time.time = snd_seq_timer_get_cur_time(q->timer);
15204 ++ event->time.time = snd_seq_timer_get_cur_time(q->timer, true);
15205 + event->flags |= SNDRV_SEQ_TIME_STAMP_REAL;
15206 + } else {
15207 + event->time.tick = snd_seq_timer_get_cur_tick(q->timer);
15208 +@@ -1639,7 +1639,7 @@ static int snd_seq_ioctl_get_queue_status(struct snd_seq_client *client,
15209 + tmr = queue->timer;
15210 + status->events = queue->tickq->cells + queue->timeq->cells;
15211 +
15212 +- status->time = snd_seq_timer_get_cur_time(tmr);
15213 ++ status->time = snd_seq_timer_get_cur_time(tmr, true);
15214 + status->tick = snd_seq_timer_get_cur_tick(tmr);
15215 +
15216 + status->running = tmr->running;
15217 +diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c
15218 +index 1a6dc4ff44a6..ea1aa0796276 100644
15219 +--- a/sound/core/seq/seq_queue.c
15220 ++++ b/sound/core/seq/seq_queue.c
15221 +@@ -261,6 +261,8 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop)
15222 + {
15223 + unsigned long flags;
15224 + struct snd_seq_event_cell *cell;
15225 ++ snd_seq_tick_time_t cur_tick;
15226 ++ snd_seq_real_time_t cur_time;
15227 +
15228 + if (q == NULL)
15229 + return;
15230 +@@ -277,17 +279,18 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop)
15231 +
15232 + __again:
15233 + /* Process tick queue... */
15234 ++ cur_tick = snd_seq_timer_get_cur_tick(q->timer);
15235 + for (;;) {
15236 +- cell = snd_seq_prioq_cell_out(q->tickq,
15237 +- &q->timer->tick.cur_tick);
15238 ++ cell = snd_seq_prioq_cell_out(q->tickq, &cur_tick);
15239 + if (!cell)
15240 + break;
15241 + snd_seq_dispatch_event(cell, atomic, hop);
15242 + }
15243 +
15244 + /* Process time queue... */
15245 ++ cur_time = snd_seq_timer_get_cur_time(q->timer, false);
15246 + for (;;) {
15247 +- cell = snd_seq_prioq_cell_out(q->timeq, &q->timer->cur_time);
15248 ++ cell = snd_seq_prioq_cell_out(q->timeq, &cur_time);
15249 + if (!cell)
15250 + break;
15251 + snd_seq_dispatch_event(cell, atomic, hop);
15252 +@@ -415,6 +418,7 @@ int snd_seq_queue_check_access(int queueid, int client)
15253 + int snd_seq_queue_set_owner(int queueid, int client, int locked)
15254 + {
15255 + struct snd_seq_queue *q = queueptr(queueid);
15256 ++ unsigned long flags;
15257 +
15258 + if (q == NULL)
15259 + return -EINVAL;
15260 +@@ -424,8 +428,10 @@ int snd_seq_queue_set_owner(int queueid, int client, int locked)
15261 + return -EPERM;
15262 + }
15263 +
15264 ++ spin_lock_irqsave(&q->owner_lock, flags);
15265 + q->locked = locked ? 1 : 0;
15266 + q->owner = client;
15267 ++ spin_unlock_irqrestore(&q->owner_lock, flags);
15268 + queue_access_unlock(q);
15269 + queuefree(q);
15270 +
15271 +@@ -564,15 +570,17 @@ void snd_seq_queue_client_termination(int client)
15272 + unsigned long flags;
15273 + int i;
15274 + struct snd_seq_queue *q;
15275 ++ bool matched;
15276 +
15277 + for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
15278 + if ((q = queueptr(i)) == NULL)
15279 + continue;
15280 + spin_lock_irqsave(&q->owner_lock, flags);
15281 +- if (q->owner == client)
15282 ++ matched = (q->owner == client);
15283 ++ if (matched)
15284 + q->klocked = 1;
15285 + spin_unlock_irqrestore(&q->owner_lock, flags);
15286 +- if (q->owner == client) {
15287 ++ if (matched) {
15288 + if (q->timer->running)
15289 + snd_seq_timer_stop(q->timer);
15290 + snd_seq_timer_reset(q->timer);
15291 +@@ -764,6 +772,8 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry,
15292 + int i, bpm;
15293 + struct snd_seq_queue *q;
15294 + struct snd_seq_timer *tmr;
15295 ++ bool locked;
15296 ++ int owner;
15297 +
15298 + for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
15299 + if ((q = queueptr(i)) == NULL)
15300 +@@ -775,9 +785,14 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry,
15301 + else
15302 + bpm = 0;
15303 +
15304 ++ spin_lock_irq(&q->owner_lock);
15305 ++ locked = q->locked;
15306 ++ owner = q->owner;
15307 ++ spin_unlock_irq(&q->owner_lock);
15308 ++
15309 + snd_iprintf(buffer, "queue %d: [%s]\n", q->queue, q->name);
15310 +- snd_iprintf(buffer, "owned by client : %d\n", q->owner);
15311 +- snd_iprintf(buffer, "lock status : %s\n", q->locked ? "Locked" : "Free");
15312 ++ snd_iprintf(buffer, "owned by client : %d\n", owner);
15313 ++ snd_iprintf(buffer, "lock status : %s\n", locked ? "Locked" : "Free");
15314 + snd_iprintf(buffer, "queued time events : %d\n", snd_seq_prioq_avail(q->timeq));
15315 + snd_iprintf(buffer, "queued tick events : %d\n", snd_seq_prioq_avail(q->tickq));
15316 + snd_iprintf(buffer, "timer state : %s\n", tmr->running ? "Running" : "Stopped");
15317 +diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c
15318 +index 0e1feb597586..bd5e5a5d52a8 100644
15319 +--- a/sound/core/seq/seq_timer.c
15320 ++++ b/sound/core/seq/seq_timer.c
15321 +@@ -436,14 +436,15 @@ int snd_seq_timer_continue(struct snd_seq_timer *tmr)
15322 + }
15323 +
15324 + /* return current 'real' time. use timeofday() to get better granularity. */
15325 +-snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
15326 ++snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr,
15327 ++ bool adjust_ktime)
15328 + {
15329 + snd_seq_real_time_t cur_time;
15330 + unsigned long flags;
15331 +
15332 + spin_lock_irqsave(&tmr->lock, flags);
15333 + cur_time = tmr->cur_time;
15334 +- if (tmr->running) {
15335 ++ if (adjust_ktime && tmr->running) {
15336 + struct timespec64 tm;
15337 +
15338 + ktime_get_ts64(&tm);
15339 +@@ -460,7 +461,13 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
15340 + high PPQ values) */
15341 + snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr)
15342 + {
15343 +- return tmr->tick.cur_tick;
15344 ++ snd_seq_tick_time_t cur_tick;
15345 ++ unsigned long flags;
15346 ++
15347 ++ spin_lock_irqsave(&tmr->lock, flags);
15348 ++ cur_tick = tmr->tick.cur_tick;
15349 ++ spin_unlock_irqrestore(&tmr->lock, flags);
15350 ++ return cur_tick;
15351 + }
15352 +
15353 +
15354 +diff --git a/sound/core/seq/seq_timer.h b/sound/core/seq/seq_timer.h
15355 +index 9506b661fe5b..5d47d559465e 100644
15356 +--- a/sound/core/seq/seq_timer.h
15357 ++++ b/sound/core/seq/seq_timer.h
15358 +@@ -135,7 +135,8 @@ int snd_seq_timer_set_ppq(struct snd_seq_timer *tmr, int ppq);
15359 + int snd_seq_timer_set_position_tick(struct snd_seq_timer *tmr, snd_seq_tick_time_t position);
15360 + int snd_seq_timer_set_position_time(struct snd_seq_timer *tmr, snd_seq_real_time_t position);
15361 + int snd_seq_timer_set_skew(struct snd_seq_timer *tmr, unsigned int skew, unsigned int base);
15362 +-snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr);
15363 ++snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr,
15364 ++ bool adjust_ktime);
15365 + snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr);
15366 +
15367 + extern int seq_default_timer_class;
15368 +diff --git a/sound/hda/hdmi_chmap.c b/sound/hda/hdmi_chmap.c
15369 +index f21633cd9b38..acbe61b8db7b 100644
15370 +--- a/sound/hda/hdmi_chmap.c
15371 ++++ b/sound/hda/hdmi_chmap.c
15372 +@@ -249,7 +249,7 @@ void snd_hdac_print_channel_allocation(int spk_alloc, char *buf, int buflen)
15373 +
15374 + for (i = 0, j = 0; i < ARRAY_SIZE(cea_speaker_allocation_names); i++) {
15375 + if (spk_alloc & (1 << i))
15376 +- j += snprintf(buf + j, buflen - j, " %s",
15377 ++ j += scnprintf(buf + j, buflen - j, " %s",
15378 + cea_speaker_allocation_names[i]);
15379 + }
15380 + buf[j] = '\0'; /* necessary when j == 0 */
15381 +diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
15382 +index e3f3351da480..a6f7561e7bb9 100644
15383 +--- a/sound/pci/hda/hda_codec.c
15384 ++++ b/sound/pci/hda/hda_codec.c
15385 +@@ -4002,7 +4002,7 @@ void snd_print_pcm_bits(int pcm, char *buf, int buflen)
15386 +
15387 + for (i = 0, j = 0; i < ARRAY_SIZE(bits); i++)
15388 + if (pcm & (AC_SUPPCM_BITS_8 << i))
15389 +- j += snprintf(buf + j, buflen - j, " %d", bits[i]);
15390 ++ j += scnprintf(buf + j, buflen - j, " %d", bits[i]);
15391 +
15392 + buf[j] = '\0'; /* necessary when j == 0 */
15393 + }
15394 +diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c
15395 +index ba7fe9b6655c..864cc8c9ada0 100644
15396 +--- a/sound/pci/hda/hda_eld.c
15397 ++++ b/sound/pci/hda/hda_eld.c
15398 +@@ -373,7 +373,7 @@ static void hdmi_print_pcm_rates(int pcm, char *buf, int buflen)
15399 +
15400 + for (i = 0, j = 0; i < ARRAY_SIZE(alsa_rates); i++)
15401 + if (pcm & (1 << i))
15402 +- j += snprintf(buf + j, buflen - j, " %d",
15403 ++ j += scnprintf(buf + j, buflen - j, " %d",
15404 + alsa_rates[i]);
15405 +
15406 + buf[j] = '\0'; /* necessary when j == 0 */
15407 +diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c
15408 +index 9b7efece4484..2a173de7ca02 100644
15409 +--- a/sound/pci/hda/hda_sysfs.c
15410 ++++ b/sound/pci/hda/hda_sysfs.c
15411 +@@ -221,7 +221,7 @@ static ssize_t init_verbs_show(struct device *dev,
15412 + mutex_lock(&codec->user_mutex);
15413 + for (i = 0; i < codec->init_verbs.used; i++) {
15414 + struct hda_verb *v = snd_array_elem(&codec->init_verbs, i);
15415 +- len += snprintf(buf + len, PAGE_SIZE - len,
15416 ++ len += scnprintf(buf + len, PAGE_SIZE - len,
15417 + "0x%02x 0x%03x 0x%04x\n",
15418 + v->nid, v->verb, v->param);
15419 + }
15420 +@@ -271,7 +271,7 @@ static ssize_t hints_show(struct device *dev,
15421 + mutex_lock(&codec->user_mutex);
15422 + for (i = 0; i < codec->hints.used; i++) {
15423 + struct hda_hint *hint = snd_array_elem(&codec->hints, i);
15424 +- len += snprintf(buf + len, PAGE_SIZE - len,
15425 ++ len += scnprintf(buf + len, PAGE_SIZE - len,
15426 + "%s = %s\n", hint->key, hint->val);
15427 + }
15428 + mutex_unlock(&codec->user_mutex);
15429 +diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
15430 +index 382b6d2ed803..9cc9304ff21a 100644
15431 +--- a/sound/pci/hda/patch_conexant.c
15432 ++++ b/sound/pci/hda/patch_conexant.c
15433 +@@ -969,6 +969,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
15434 + SND_PCI_QUIRK(0x17aa, 0x215f, "Lenovo T510", CXT_PINCFG_LENOVO_TP410),
15435 + SND_PCI_QUIRK(0x17aa, 0x21ce, "Lenovo T420", CXT_PINCFG_LENOVO_TP410),
15436 + SND_PCI_QUIRK(0x17aa, 0x21cf, "Lenovo T520", CXT_PINCFG_LENOVO_TP410),
15437 ++ SND_PCI_QUIRK(0x17aa, 0x21d2, "Lenovo T420s", CXT_PINCFG_LENOVO_TP410),
15438 + SND_PCI_QUIRK(0x17aa, 0x21da, "Lenovo X220", CXT_PINCFG_LENOVO_TP410),
15439 + SND_PCI_QUIRK(0x17aa, 0x21db, "Lenovo X220-tablet", CXT_PINCFG_LENOVO_TP410),
15440 + SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo IdeaPad Z560", CXT_FIXUP_MUTE_LED_EAPD),
15441 +diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
15442 +index f21405597215..12913368c231 100644
15443 +--- a/sound/pci/hda/patch_hdmi.c
15444 ++++ b/sound/pci/hda/patch_hdmi.c
15445 +@@ -2574,9 +2574,12 @@ static int alloc_intel_hdmi(struct hda_codec *codec)
15446 + /* parse and post-process for Intel codecs */
15447 + static int parse_intel_hdmi(struct hda_codec *codec)
15448 + {
15449 +- int err;
15450 ++ int err, retries = 3;
15451 ++
15452 ++ do {
15453 ++ err = hdmi_parse_codec(codec);
15454 ++ } while (err < 0 && retries--);
15455 +
15456 +- err = hdmi_parse_codec(codec);
15457 + if (err < 0) {
15458 + generic_spec_free(codec);
15459 + return err;
15460 +diff --git a/sound/sh/aica.c b/sound/sh/aica.c
15461 +index fdc680ae8aa0..d9acf551a898 100644
15462 +--- a/sound/sh/aica.c
15463 ++++ b/sound/sh/aica.c
15464 +@@ -117,10 +117,10 @@ static void spu_memset(u32 toi, u32 what, int length)
15465 + }
15466 +
15467 + /* spu_memload - write to SPU address space */
15468 +-static void spu_memload(u32 toi, void *from, int length)
15469 ++static void spu_memload(u32 toi, const void *from, int length)
15470 + {
15471 + unsigned long flags;
15472 +- u32 *froml = from;
15473 ++ const u32 *froml = from;
15474 + u32 __iomem *to = (u32 __iomem *) (SPU_MEMORY_BASE + toi);
15475 + int i;
15476 + u32 val;
15477 +diff --git a/sound/sh/sh_dac_audio.c b/sound/sh/sh_dac_audio.c
15478 +index 834b2574786f..6251b5e1b64a 100644
15479 +--- a/sound/sh/sh_dac_audio.c
15480 ++++ b/sound/sh/sh_dac_audio.c
15481 +@@ -190,7 +190,6 @@ static int snd_sh_dac_pcm_copy(struct snd_pcm_substream *substream,
15482 + {
15483 + /* channel is not used (interleaved data) */
15484 + struct snd_sh_dac *chip = snd_pcm_substream_chip(substream);
15485 +- struct snd_pcm_runtime *runtime = substream->runtime;
15486 +
15487 + if (copy_from_user_toio(chip->data_buffer + pos, src, count))
15488 + return -EFAULT;
15489 +@@ -210,7 +209,6 @@ static int snd_sh_dac_pcm_copy_kernel(struct snd_pcm_substream *substream,
15490 + {
15491 + /* channel is not used (interleaved data) */
15492 + struct snd_sh_dac *chip = snd_pcm_substream_chip(substream);
15493 +- struct snd_pcm_runtime *runtime = substream->runtime;
15494 +
15495 + memcpy_toio(chip->data_buffer + pos, src, count);
15496 + chip->buffer_end = chip->data_buffer + pos + count;
15497 +@@ -229,7 +227,6 @@ static int snd_sh_dac_pcm_silence(struct snd_pcm_substream *substream,
15498 + {
15499 + /* channel is not used (interleaved data) */
15500 + struct snd_sh_dac *chip = snd_pcm_substream_chip(substream);
15501 +- struct snd_pcm_runtime *runtime = substream->runtime;
15502 +
15503 + memset_io(chip->data_buffer + pos, 0, count);
15504 + chip->buffer_end = chip->data_buffer + pos + count;
15505 +diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig
15506 +index 4a56f3dfba51..23887613b5c3 100644
15507 +--- a/sound/soc/atmel/Kconfig
15508 ++++ b/sound/soc/atmel/Kconfig
15509 +@@ -25,6 +25,8 @@ config SND_ATMEL_SOC_DMA
15510 +
15511 + config SND_ATMEL_SOC_SSC_DMA
15512 + tristate
15513 ++ select SND_ATMEL_SOC_DMA
15514 ++ select SND_ATMEL_SOC_PDC
15515 +
15516 + config SND_ATMEL_SOC_SSC
15517 + tristate
15518 +diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c
15519 +index 7a312168f864..a031f25031b4 100644
15520 +--- a/sound/soc/sunxi/sun8i-codec.c
15521 ++++ b/sound/soc/sunxi/sun8i-codec.c
15522 +@@ -71,6 +71,7 @@
15523 +
15524 + #define SUN8I_SYS_SR_CTRL_AIF1_FS_MASK GENMASK(15, 12)
15525 + #define SUN8I_SYS_SR_CTRL_AIF2_FS_MASK GENMASK(11, 8)
15526 ++#define SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK GENMASK(3, 2)
15527 + #define SUN8I_AIF1CLK_CTRL_AIF1_WORD_SIZ_MASK GENMASK(5, 4)
15528 + #define SUN8I_AIF1CLK_CTRL_AIF1_LRCK_DIV_MASK GENMASK(8, 6)
15529 + #define SUN8I_AIF1CLK_CTRL_AIF1_BCLK_DIV_MASK GENMASK(12, 9)
15530 +@@ -221,7 +222,7 @@ static int sun8i_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
15531 + return -EINVAL;
15532 + }
15533 + regmap_update_bits(scodec->regmap, SUN8I_AIF1CLK_CTRL,
15534 +- BIT(SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT),
15535 ++ SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK,
15536 + value << SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT);
15537 +
15538 + return 0;
15539 +diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
15540 +index 51ee7910e98c..4872c27f6054 100644
15541 +--- a/sound/usb/quirks.c
15542 ++++ b/sound/usb/quirks.c
15543 +@@ -1151,6 +1151,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
15544 + case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
15545 + case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */
15546 + case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */
15547 ++ case USB_ID(0x2912, 0x30c8): /* Audioengine D1 */
15548 + return true;
15549 + }
15550 + return false;
15551 +diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c
15552 +index f4b3cda412fc..e75271e731b2 100644
15553 +--- a/sound/usb/usx2y/usX2Yhwdep.c
15554 ++++ b/sound/usb/usx2y/usX2Yhwdep.c
15555 +@@ -131,7 +131,7 @@ static int snd_usX2Y_hwdep_dsp_status(struct snd_hwdep *hw,
15556 + info->num_dsps = 2; // 0: Prepad Data, 1: FPGA Code
15557 + if (us428->chip_status & USX2Y_STAT_CHIP_INIT)
15558 + info->chip_ready = 1;
15559 +- info->version = USX2Y_DRIVER_VERSION;
15560 ++ info->version = USX2Y_DRIVER_VERSION;
15561 + return 0;
15562 + }
15563 +
15564 +diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
15565 +index b24afc0e6e81..45b50b89009a 100644
15566 +--- a/tools/lib/api/fs/fs.c
15567 ++++ b/tools/lib/api/fs/fs.c
15568 +@@ -210,6 +210,7 @@ static bool fs__env_override(struct fs *fs)
15569 + size_t name_len = strlen(fs->name);
15570 + /* name + "_PATH" + '\0' */
15571 + char upper_name[name_len + 5 + 1];
15572 ++
15573 + memcpy(upper_name, fs->name, name_len);
15574 + mem_toupper(upper_name, name_len);
15575 + strcpy(&upper_name[name_len], "_PATH");
15576 +@@ -219,7 +220,8 @@ static bool fs__env_override(struct fs *fs)
15577 + return false;
15578 +
15579 + fs->found = true;
15580 +- strncpy(fs->path, override_path, sizeof(fs->path));
15581 ++ strncpy(fs->path, override_path, sizeof(fs->path) - 1);
15582 ++ fs->path[sizeof(fs->path) - 1] = '\0';
15583 + return true;
15584 + }
15585 +
15586 +diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
15587 +index 0a0e9112f284..5cb9f009f2be 100644
15588 +--- a/tools/objtool/arch/x86/lib/x86-opcode-map.txt
15589 ++++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
15590 +@@ -909,7 +909,7 @@ EndTable
15591 +
15592 + GrpTable: Grp3_2
15593 + 0: TEST Ev,Iz
15594 +-1:
15595 ++1: TEST Ev,Iz
15596 + 2: NOT Ev
15597 + 3: NEG Ev
15598 + 4: MUL rAX,Ev
15599 +diff --git a/tools/testing/selftests/size/get_size.c b/tools/testing/selftests/size/get_size.c
15600 +index d4b59ab979a0..f55943b6d1e2 100644
15601 +--- a/tools/testing/selftests/size/get_size.c
15602 ++++ b/tools/testing/selftests/size/get_size.c
15603 +@@ -12,23 +12,35 @@
15604 + * own execution. It also attempts to have as few dependencies
15605 + * on kernel features as possible.
15606 + *
15607 +- * It should be statically linked, with startup libs avoided.
15608 +- * It uses no library calls, and only the following 3 syscalls:
15609 ++ * It should be statically linked, with startup libs avoided. It uses
15610 ++ * no library calls except the syscall() function for the following 3
15611 ++ * syscalls:
15612 + * sysinfo(), write(), and _exit()
15613 + *
15614 + * For output, it avoids printf (which in some C libraries
15615 + * has large external dependencies) by implementing it's own
15616 + * number output and print routines, and using __builtin_strlen()
15617 ++ *
15618 ++ * The test may crash if any of the above syscalls fails because in some
15619 ++ * libc implementations (e.g. the GNU C Library) errno is saved in
15620 ++ * thread-local storage, which does not get initialized due to avoiding
15621 ++ * startup libs.
15622 + */
15623 +
15624 + #include <sys/sysinfo.h>
15625 + #include <unistd.h>
15626 ++#include <sys/syscall.h>
15627 +
15628 + #define STDOUT_FILENO 1
15629 +
15630 + static int print(const char *s)
15631 + {
15632 +- return write(STDOUT_FILENO, s, __builtin_strlen(s));
15633 ++ size_t len = 0;
15634 ++
15635 ++ while (s[len] != '\0')
15636 ++ len++;
15637 ++
15638 ++ return syscall(SYS_write, STDOUT_FILENO, s, len);
15639 + }
15640 +
15641 + static inline char *num_to_str(unsigned long num, char *buf, int len)
15642 +@@ -80,12 +92,12 @@ void _start(void)
15643 + print("TAP version 13\n");
15644 + print("# Testing system size.\n");
15645 +
15646 +- ccode = sysinfo(&info);
15647 ++ ccode = syscall(SYS_sysinfo, &info);
15648 + if (ccode < 0) {
15649 + print("not ok 1");
15650 + print(test_name);
15651 + print(" ---\n reason: \"could not get sysinfo\"\n ...\n");
15652 +- _exit(ccode);
15653 ++ syscall(SYS_exit, ccode);
15654 + }
15655 + print("ok 1");
15656 + print(test_name);
15657 +@@ -101,5 +113,5 @@ void _start(void)
15658 + print(" ...\n");
15659 + print("1..1\n");
15660 +
15661 +- _exit(0);
15662 ++ syscall(SYS_exit, 0);
15663 + }
15664 +diff --git a/tools/usb/usbip/src/usbip_network.c b/tools/usb/usbip/src/usbip_network.c
15665 +index b4c37e76a6e0..187dfaa67d0a 100644
15666 +--- a/tools/usb/usbip/src/usbip_network.c
15667 ++++ b/tools/usb/usbip/src/usbip_network.c
15668 +@@ -62,39 +62,39 @@ void usbip_setup_port_number(char *arg)
15669 + info("using port %d (\"%s\")", usbip_port, usbip_port_string);
15670 + }
15671 +
15672 +-void usbip_net_pack_uint32_t(int pack, uint32_t *num)
15673 ++uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num)
15674 + {
15675 + uint32_t i;
15676 +
15677 + if (pack)
15678 +- i = htonl(*num);
15679 ++ i = htonl(num);
15680 + else
15681 +- i = ntohl(*num);
15682 ++ i = ntohl(num);
15683 +
15684 +- *num = i;
15685 ++ return i;
15686 + }
15687 +
15688 +-void usbip_net_pack_uint16_t(int pack, uint16_t *num)
15689 ++uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num)
15690 + {
15691 + uint16_t i;
15692 +
15693 + if (pack)
15694 +- i = htons(*num);
15695 ++ i = htons(num);
15696 + else
15697 +- i = ntohs(*num);
15698 ++ i = ntohs(num);
15699 +
15700 +- *num = i;
15701 ++ return i;
15702 + }
15703 +
15704 + void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev)
15705 + {
15706 +- usbip_net_pack_uint32_t(pack, &udev->busnum);
15707 +- usbip_net_pack_uint32_t(pack, &udev->devnum);
15708 +- usbip_net_pack_uint32_t(pack, &udev->speed);
15709 ++ udev->busnum = usbip_net_pack_uint32_t(pack, udev->busnum);
15710 ++ udev->devnum = usbip_net_pack_uint32_t(pack, udev->devnum);
15711 ++ udev->speed = usbip_net_pack_uint32_t(pack, udev->speed);
15712 +
15713 +- usbip_net_pack_uint16_t(pack, &udev->idVendor);
15714 +- usbip_net_pack_uint16_t(pack, &udev->idProduct);
15715 +- usbip_net_pack_uint16_t(pack, &udev->bcdDevice);
15716 ++ udev->idVendor = usbip_net_pack_uint16_t(pack, udev->idVendor);
15717 ++ udev->idProduct = usbip_net_pack_uint16_t(pack, udev->idProduct);
15718 ++ udev->bcdDevice = usbip_net_pack_uint16_t(pack, udev->bcdDevice);
15719 + }
15720 +
15721 + void usbip_net_pack_usb_interface(int pack __attribute__((unused)),
15722 +@@ -141,6 +141,14 @@ ssize_t usbip_net_send(int sockfd, void *buff, size_t bufflen)
15723 + return usbip_net_xmit(sockfd, buff, bufflen, 1);
15724 + }
15725 +
15726 ++static inline void usbip_net_pack_op_common(int pack,
15727 ++ struct op_common *op_common)
15728 ++{
15729 ++ op_common->version = usbip_net_pack_uint16_t(pack, op_common->version);
15730 ++ op_common->code = usbip_net_pack_uint16_t(pack, op_common->code);
15731 ++ op_common->status = usbip_net_pack_uint32_t(pack, op_common->status);
15732 ++}
15733 ++
15734 + int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status)
15735 + {
15736 + struct op_common op_common;
15737 +@@ -152,7 +160,7 @@ int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status)
15738 + op_common.code = code;
15739 + op_common.status = status;
15740 +
15741 +- PACK_OP_COMMON(1, &op_common);
15742 ++ usbip_net_pack_op_common(1, &op_common);
15743 +
15744 + rc = usbip_net_send(sockfd, &op_common, sizeof(op_common));
15745 + if (rc < 0) {
15746 +@@ -176,7 +184,7 @@ int usbip_net_recv_op_common(int sockfd, uint16_t *code)
15747 + goto err;
15748 + }
15749 +
15750 +- PACK_OP_COMMON(0, &op_common);
15751 ++ usbip_net_pack_op_common(0, &op_common);
15752 +
15753 + if (op_common.version != USBIP_VERSION) {
15754 + dbg("version mismatch: %d %d", op_common.version,
15755 +diff --git a/tools/usb/usbip/src/usbip_network.h b/tools/usb/usbip/src/usbip_network.h
15756 +index 7032687621d3..8e8330c0f1c9 100644
15757 +--- a/tools/usb/usbip/src/usbip_network.h
15758 ++++ b/tools/usb/usbip/src/usbip_network.h
15759 +@@ -34,12 +34,6 @@ struct op_common {
15760 +
15761 + } __attribute__((packed));
15762 +
15763 +-#define PACK_OP_COMMON(pack, op_common) do {\
15764 +- usbip_net_pack_uint16_t(pack, &(op_common)->version);\
15765 +- usbip_net_pack_uint16_t(pack, &(op_common)->code);\
15766 +- usbip_net_pack_uint32_t(pack, &(op_common)->status);\
15767 +-} while (0)
15768 +-
15769 + /* ---------------------------------------------------------------------- */
15770 + /* Dummy Code */
15771 + #define OP_UNSPEC 0x00
15772 +@@ -165,11 +159,11 @@ struct op_devlist_reply_extra {
15773 + } while (0)
15774 +
15775 + #define PACK_OP_DEVLIST_REPLY(pack, reply) do {\
15776 +- usbip_net_pack_uint32_t(pack, &(reply)->ndev);\
15777 ++ (reply)->ndev = usbip_net_pack_uint32_t(pack, (reply)->ndev);\
15778 + } while (0)
15779 +
15780 +-void usbip_net_pack_uint32_t(int pack, uint32_t *num);
15781 +-void usbip_net_pack_uint16_t(int pack, uint16_t *num);
15782 ++uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num);
15783 ++uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num);
15784 + void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev);
15785 + void usbip_net_pack_usb_interface(int pack, struct usbip_usb_interface *uinf);
15786 +