Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:4.4 commit in: /
Date: Fri, 14 Feb 2020 23:34:27
Message-Id: 1581723239.99e4cd48b7fc7ffe5e9689983a0f607689471d1a.mpagano@gentoo
<
1 commit: 99e4cd48b7fc7ffe5e9689983a0f607689471d1a
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Fri Feb 14 23:33:59 2020 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Fri Feb 14 23:33:59 2020 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=99e4cd48
7
8 Linux patch 4.4.214
9
10 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>
11
12 0000_README | 4 +
13 1213_linux-4.4.214.patch | 11112 +++++++++++++++++++++++++++++++++++++++++++++
14 2 files changed, 11116 insertions(+)
15
16 diff --git a/0000_README b/0000_README
17 index fb1c3ff..a0335a4 100644
18 --- a/0000_README
19 +++ b/0000_README
20 @@ -895,6 +895,10 @@ Patch: 1212_linux-4.4.213.patch
21 From: http://www.kernel.org
22 Desc: Linux 4.4.213
23
24 +Patch: 1213_linux-4.4.214.patch
25 +From: http://www.kernel.org
26 +Desc: Linux 4.4.214
27 +
28 Patch: 1500_XATTR_USER_PREFIX.patch
29 From: https://bugs.gentoo.org/show_bug.cgi?id=470644
30 Desc: Support for namespace user.pax.* on tmpfs.
31
32 diff --git a/1213_linux-4.4.214.patch b/1213_linux-4.4.214.patch
33 new file mode 100644
34 index 0000000..d9f280f
35 --- /dev/null
36 +++ b/1213_linux-4.4.214.patch
37 @@ -0,0 +1,11112 @@
38 +diff --git a/Makefile b/Makefile
39 +index 6e86896525d9..89f09ef4c552 100644
40 +--- a/Makefile
41 ++++ b/Makefile
42 +@@ -1,6 +1,6 @@
43 + VERSION = 4
44 + PATCHLEVEL = 4
45 +-SUBLEVEL = 213
46 ++SUBLEVEL = 214
47 + EXTRAVERSION =
48 + NAME = Blurry Fish Butt
49 +
50 +diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
51 +index 44a578c10732..2f52e584f3f7 100644
52 +--- a/arch/arc/boot/dts/axs10x_mb.dtsi
53 ++++ b/arch/arc/boot/dts/axs10x_mb.dtsi
54 +@@ -44,6 +44,7 @@
55 + interrupt-names = "macirq";
56 + phy-mode = "rgmii";
57 + snps,pbl = < 32 >;
58 ++ snps,multicast-filter-bins = <256>;
59 + clocks = <&apbclk>;
60 + clock-names = "stmmaceth";
61 + max-speed = <100>;
62 +diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi
63 +index a53279160f98..6b1894400ccc 100644
64 +--- a/arch/arm/boot/dts/sama5d3.dtsi
65 ++++ b/arch/arm/boot/dts/sama5d3.dtsi
66 +@@ -1106,49 +1106,49 @@
67 + usart0_clk: usart0_clk {
68 + #clock-cells = <0>;
69 + reg = <12>;
70 +- atmel,clk-output-range = <0 66000000>;
71 ++ atmel,clk-output-range = <0 83000000>;
72 + };
73 +
74 + usart1_clk: usart1_clk {
75 + #clock-cells = <0>;
76 + reg = <13>;
77 +- atmel,clk-output-range = <0 66000000>;
78 ++ atmel,clk-output-range = <0 83000000>;
79 + };
80 +
81 + usart2_clk: usart2_clk {
82 + #clock-cells = <0>;
83 + reg = <14>;
84 +- atmel,clk-output-range = <0 66000000>;
85 ++ atmel,clk-output-range = <0 83000000>;
86 + };
87 +
88 + usart3_clk: usart3_clk {
89 + #clock-cells = <0>;
90 + reg = <15>;
91 +- atmel,clk-output-range = <0 66000000>;
92 ++ atmel,clk-output-range = <0 83000000>;
93 + };
94 +
95 + uart0_clk: uart0_clk {
96 + #clock-cells = <0>;
97 + reg = <16>;
98 +- atmel,clk-output-range = <0 66000000>;
99 ++ atmel,clk-output-range = <0 83000000>;
100 + };
101 +
102 + twi0_clk: twi0_clk {
103 + reg = <18>;
104 + #clock-cells = <0>;
105 +- atmel,clk-output-range = <0 16625000>;
106 ++ atmel,clk-output-range = <0 41500000>;
107 + };
108 +
109 + twi1_clk: twi1_clk {
110 + #clock-cells = <0>;
111 + reg = <19>;
112 +- atmel,clk-output-range = <0 16625000>;
113 ++ atmel,clk-output-range = <0 41500000>;
114 + };
115 +
116 + twi2_clk: twi2_clk {
117 + #clock-cells = <0>;
118 + reg = <20>;
119 +- atmel,clk-output-range = <0 16625000>;
120 ++ atmel,clk-output-range = <0 41500000>;
121 + };
122 +
123 + mci0_clk: mci0_clk {
124 +@@ -1164,19 +1164,19 @@
125 + spi0_clk: spi0_clk {
126 + #clock-cells = <0>;
127 + reg = <24>;
128 +- atmel,clk-output-range = <0 133000000>;
129 ++ atmel,clk-output-range = <0 166000000>;
130 + };
131 +
132 + spi1_clk: spi1_clk {
133 + #clock-cells = <0>;
134 + reg = <25>;
135 +- atmel,clk-output-range = <0 133000000>;
136 ++ atmel,clk-output-range = <0 166000000>;
137 + };
138 +
139 + tcb0_clk: tcb0_clk {
140 + #clock-cells = <0>;
141 + reg = <26>;
142 +- atmel,clk-output-range = <0 133000000>;
143 ++ atmel,clk-output-range = <0 166000000>;
144 + };
145 +
146 + pwm_clk: pwm_clk {
147 +@@ -1187,7 +1187,7 @@
148 + adc_clk: adc_clk {
149 + #clock-cells = <0>;
150 + reg = <29>;
151 +- atmel,clk-output-range = <0 66000000>;
152 ++ atmel,clk-output-range = <0 83000000>;
153 + };
154 +
155 + dma0_clk: dma0_clk {
156 +@@ -1218,13 +1218,13 @@
157 + ssc0_clk: ssc0_clk {
158 + #clock-cells = <0>;
159 + reg = <38>;
160 +- atmel,clk-output-range = <0 66000000>;
161 ++ atmel,clk-output-range = <0 83000000>;
162 + };
163 +
164 + ssc1_clk: ssc1_clk {
165 + #clock-cells = <0>;
166 + reg = <39>;
167 +- atmel,clk-output-range = <0 66000000>;
168 ++ atmel,clk-output-range = <0 83000000>;
169 + };
170 +
171 + sha_clk: sha_clk {
172 +diff --git a/arch/arm/boot/dts/sama5d3_can.dtsi b/arch/arm/boot/dts/sama5d3_can.dtsi
173 +index c5a3772741bf..0fac79f75c06 100644
174 +--- a/arch/arm/boot/dts/sama5d3_can.dtsi
175 ++++ b/arch/arm/boot/dts/sama5d3_can.dtsi
176 +@@ -37,13 +37,13 @@
177 + can0_clk: can0_clk {
178 + #clock-cells = <0>;
179 + reg = <40>;
180 +- atmel,clk-output-range = <0 66000000>;
181 ++ atmel,clk-output-range = <0 83000000>;
182 + };
183 +
184 + can1_clk: can1_clk {
185 + #clock-cells = <0>;
186 + reg = <41>;
187 +- atmel,clk-output-range = <0 66000000>;
188 ++ atmel,clk-output-range = <0 83000000>;
189 + };
190 + };
191 + };
192 +diff --git a/arch/arm/boot/dts/sama5d3_tcb1.dtsi b/arch/arm/boot/dts/sama5d3_tcb1.dtsi
193 +index 801f9745e82f..b80dbc45a3c2 100644
194 +--- a/arch/arm/boot/dts/sama5d3_tcb1.dtsi
195 ++++ b/arch/arm/boot/dts/sama5d3_tcb1.dtsi
196 +@@ -23,6 +23,7 @@
197 + tcb1_clk: tcb1_clk {
198 + #clock-cells = <0>;
199 + reg = <27>;
200 ++ atmel,clk-output-range = <0 166000000>;
201 + };
202 + };
203 + };
204 +diff --git a/arch/arm/boot/dts/sama5d3_uart.dtsi b/arch/arm/boot/dts/sama5d3_uart.dtsi
205 +index 2511d748867b..71818c7bfb67 100644
206 +--- a/arch/arm/boot/dts/sama5d3_uart.dtsi
207 ++++ b/arch/arm/boot/dts/sama5d3_uart.dtsi
208 +@@ -42,13 +42,13 @@
209 + uart0_clk: uart0_clk {
210 + #clock-cells = <0>;
211 + reg = <16>;
212 +- atmel,clk-output-range = <0 66000000>;
213 ++ atmel,clk-output-range = <0 83000000>;
214 + };
215 +
216 + uart1_clk: uart1_clk {
217 + #clock-cells = <0>;
218 + reg = <17>;
219 +- atmel,clk-output-range = <0 66000000>;
220 ++ atmel,clk-output-range = <0 83000000>;
221 + };
222 + };
223 + };
224 +diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S
225 +index 9a2f0b051e10..c6cf775975a2 100644
226 +--- a/arch/arm/mach-tegra/sleep-tegra30.S
227 ++++ b/arch/arm/mach-tegra/sleep-tegra30.S
228 +@@ -379,6 +379,14 @@ _pll_m_c_x_done:
229 + pll_locked r1, r0, CLK_RESET_PLLC_BASE
230 + pll_locked r1, r0, CLK_RESET_PLLX_BASE
231 +
232 ++ tegra_get_soc_id TEGRA_APB_MISC_BASE, r1
233 ++ cmp r1, #TEGRA30
234 ++ beq 1f
235 ++ ldr r1, [r0, #CLK_RESET_PLLP_BASE]
236 ++ bic r1, r1, #(1<<31) @ disable PllP bypass
237 ++ str r1, [r0, #CLK_RESET_PLLP_BASE]
238 ++1:
239 ++
240 + mov32 r7, TEGRA_TMRUS_BASE
241 + ldr r1, [r7]
242 + add r1, r1, #LOCK_DELAY
243 +@@ -638,7 +646,10 @@ tegra30_switch_cpu_to_clk32k:
244 + str r0, [r4, #PMC_PLLP_WB0_OVERRIDE]
245 +
246 + /* disable PLLP, PLLA, PLLC and PLLX */
247 ++ tegra_get_soc_id TEGRA_APB_MISC_BASE, r1
248 ++ cmp r1, #TEGRA30
249 + ldr r0, [r5, #CLK_RESET_PLLP_BASE]
250 ++ orrne r0, r0, #(1 << 31) @ enable PllP bypass on fast cluster
251 + bic r0, r0, #(1 << 30)
252 + str r0, [r5, #CLK_RESET_PLLP_BASE]
253 + ldr r0, [r5, #CLK_RESET_PLLA_BASE]
254 +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
255 +index 01b6c00a7060..4ece20178145 100644
256 +--- a/arch/powerpc/Kconfig
257 ++++ b/arch/powerpc/Kconfig
258 +@@ -93,6 +93,7 @@ config PPC
259 + select BINFMT_ELF
260 + select ARCH_HAS_ELF_RANDOMIZE
261 + select OF
262 ++ select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE
263 + select OF_EARLY_FLATTREE
264 + select OF_RESERVED_MEM
265 + select HAVE_FTRACE_MCOUNT_RECORD
266 +diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c
267 +index 9d3bd4c45a24..1c4354f922fd 100644
268 +--- a/arch/powerpc/boot/4xx.c
269 ++++ b/arch/powerpc/boot/4xx.c
270 +@@ -232,7 +232,7 @@ void ibm4xx_denali_fixup_memsize(void)
271 + dpath = 8; /* 64 bits */
272 +
273 + /* get address pins (rows) */
274 +- val = SDRAM0_READ(DDR0_42);
275 ++ val = SDRAM0_READ(DDR0_42);
276 +
277 + row = DDR_GET_VAL(val, DDR_APIN, DDR_APIN_SHIFT);
278 + if (row > max_row)
279 +diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
280 +index 767ac1572c02..54c6ba87a25a 100644
281 +--- a/arch/powerpc/kvm/book3s_hv.c
282 ++++ b/arch/powerpc/kvm/book3s_hv.c
283 +@@ -1669,7 +1669,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
284 + mutex_unlock(&kvm->lock);
285 +
286 + if (!vcore)
287 +- goto free_vcpu;
288 ++ goto uninit_vcpu;
289 +
290 + spin_lock(&vcore->lock);
291 + ++vcore->num_threads;
292 +@@ -1685,6 +1685,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
293 +
294 + return vcpu;
295 +
296 ++uninit_vcpu:
297 ++ kvm_vcpu_uninit(vcpu);
298 + free_vcpu:
299 + kmem_cache_free(kvm_vcpu_cache, vcpu);
300 + out:
301 +diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
302 +index 81313844d81c..91db2852aa6e 100644
303 +--- a/arch/powerpc/kvm/book3s_pr.c
304 ++++ b/arch/powerpc/kvm/book3s_pr.c
305 +@@ -1434,10 +1434,12 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
306 +
307 + err = kvmppc_mmu_init(vcpu);
308 + if (err < 0)
309 +- goto uninit_vcpu;
310 ++ goto free_shared_page;
311 +
312 + return vcpu;
313 +
314 ++free_shared_page:
315 ++ free_page((unsigned long)vcpu->arch.shared);
316 + uninit_vcpu:
317 + kvm_vcpu_uninit(vcpu);
318 + free_shadow_vcpu:
319 +diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
320 +index e8b1027e1b5b..0e65d52eb56d 100644
321 +--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
322 ++++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
323 +@@ -205,8 +205,10 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb)
324 +
325 + for (i = 0; i < scns_per_block; i++) {
326 + pfn = PFN_DOWN(phys_addr);
327 +- if (!pfn_present(pfn))
328 ++ if (!pfn_present(pfn)) {
329 ++ phys_addr += MIN_MEMORY_BLOCK_SIZE;
330 + continue;
331 ++ }
332 +
333 + rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
334 + phys_addr += MIN_MEMORY_BLOCK_SIZE;
335 +diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
336 +index 3e8865b187de..17b322e8b799 100644
337 +--- a/arch/powerpc/platforms/pseries/iommu.c
338 ++++ b/arch/powerpc/platforms/pseries/iommu.c
339 +@@ -202,10 +202,10 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
340 + return be64_to_cpu(*tcep);
341 + }
342 +
343 +-static void tce_free_pSeriesLP(struct iommu_table*, long, long);
344 ++static void tce_free_pSeriesLP(unsigned long liobn, long, long);
345 + static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
346 +
347 +-static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
348 ++static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
349 + long npages, unsigned long uaddr,
350 + enum dma_data_direction direction,
351 + struct dma_attrs *attrs)
352 +@@ -216,25 +216,25 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
353 + int ret = 0;
354 + long tcenum_start = tcenum, npages_start = npages;
355 +
356 +- rpn = __pa(uaddr) >> TCE_SHIFT;
357 ++ rpn = __pa(uaddr) >> tceshift;
358 + proto_tce = TCE_PCI_READ;
359 + if (direction != DMA_TO_DEVICE)
360 + proto_tce |= TCE_PCI_WRITE;
361 +
362 + while (npages--) {
363 +- tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
364 +- rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
365 ++ tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
366 ++ rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
367 +
368 + if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
369 + ret = (int)rc;
370 +- tce_free_pSeriesLP(tbl, tcenum_start,
371 ++ tce_free_pSeriesLP(liobn, tcenum_start,
372 + (npages_start - (npages + 1)));
373 + break;
374 + }
375 +
376 + if (rc && printk_ratelimit()) {
377 + printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
378 +- printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
379 ++ printk("\tindex = 0x%llx\n", (u64)liobn);
380 + printk("\ttcenum = 0x%llx\n", (u64)tcenum);
381 + printk("\ttce val = 0x%llx\n", tce );
382 + dump_stack();
383 +@@ -263,7 +263,8 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
384 + unsigned long flags;
385 +
386 + if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
387 +- return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
388 ++ return tce_build_pSeriesLP(tbl->it_index, tcenum,
389 ++ tbl->it_page_shift, npages, uaddr,
390 + direction, attrs);
391 + }
392 +
393 +@@ -279,8 +280,9 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
394 + /* If allocation fails, fall back to the loop implementation */
395 + if (!tcep) {
396 + local_irq_restore(flags);
397 +- return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
398 +- direction, attrs);
399 ++ return tce_build_pSeriesLP(tbl->it_index, tcenum,
400 ++ tbl->it_page_shift,
401 ++ npages, uaddr, direction, attrs);
402 + }
403 + __this_cpu_write(tce_page, tcep);
404 + }
405 +@@ -331,16 +333,16 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
406 + return ret;
407 + }
408 +
409 +-static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
410 ++static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages)
411 + {
412 + u64 rc;
413 +
414 + while (npages--) {
415 +- rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0);
416 ++ rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0);
417 +
418 + if (rc && printk_ratelimit()) {
419 + printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
420 +- printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
421 ++ printk("\tindex = 0x%llx\n", (u64)liobn);
422 + printk("\ttcenum = 0x%llx\n", (u64)tcenum);
423 + dump_stack();
424 + }
425 +@@ -355,7 +357,7 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
426 + u64 rc;
427 +
428 + if (!firmware_has_feature(FW_FEATURE_MULTITCE))
429 +- return tce_free_pSeriesLP(tbl, tcenum, npages);
430 ++ return tce_free_pSeriesLP(tbl->it_index, tcenum, npages);
431 +
432 + rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
433 +
434 +@@ -470,6 +472,19 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
435 + u64 rc = 0;
436 + long l, limit;
437 +
438 ++ if (!firmware_has_feature(FW_FEATURE_MULTITCE)) {
439 ++ unsigned long tceshift = be32_to_cpu(maprange->tce_shift);
440 ++ unsigned long dmastart = (start_pfn << PAGE_SHIFT) +
441 ++ be64_to_cpu(maprange->dma_base);
442 ++ unsigned long tcenum = dmastart >> tceshift;
443 ++ unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift;
444 ++ void *uaddr = __va(start_pfn << PAGE_SHIFT);
445 ++
446 ++ return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn),
447 ++ tcenum, tceshift, npages, (unsigned long) uaddr,
448 ++ DMA_BIDIRECTIONAL, 0);
449 ++ }
450 ++
451 + local_irq_disable(); /* to protect tcep and the page behind it */
452 + tcep = __this_cpu_read(tce_page);
453 +
454 +diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h
455 +index 66013b4fe10d..58da9c4addb2 100644
456 +--- a/arch/sparc/include/uapi/asm/ipcbuf.h
457 ++++ b/arch/sparc/include/uapi/asm/ipcbuf.h
458 +@@ -14,19 +14,19 @@
459 +
460 + struct ipc64_perm
461 + {
462 +- __kernel_key_t key;
463 +- __kernel_uid_t uid;
464 +- __kernel_gid_t gid;
465 +- __kernel_uid_t cuid;
466 +- __kernel_gid_t cgid;
467 ++ __kernel_key_t key;
468 ++ __kernel_uid32_t uid;
469 ++ __kernel_gid32_t gid;
470 ++ __kernel_uid32_t cuid;
471 ++ __kernel_gid32_t cgid;
472 + #ifndef __arch64__
473 +- unsigned short __pad0;
474 ++ unsigned short __pad0;
475 + #endif
476 +- __kernel_mode_t mode;
477 +- unsigned short __pad1;
478 +- unsigned short seq;
479 +- unsigned long long __unused1;
480 +- unsigned long long __unused2;
481 ++ __kernel_mode_t mode;
482 ++ unsigned short __pad1;
483 ++ unsigned short seq;
484 ++ unsigned long long __unused1;
485 ++ unsigned long long __unused2;
486 + };
487 +
488 + #endif /* __SPARC_IPCBUF_H */
489 +diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
490 +index c2a9dd816c5c..9a7983968ba8 100644
491 +--- a/arch/x86/kernel/cpu/tsx.c
492 ++++ b/arch/x86/kernel/cpu/tsx.c
493 +@@ -115,11 +115,12 @@ void __init tsx_init(void)
494 + tsx_disable();
495 +
496 + /*
497 +- * tsx_disable() will change the state of the
498 +- * RTM CPUID bit. Clear it here since it is now
499 +- * expected to be not set.
500 ++ * tsx_disable() will change the state of the RTM and HLE CPUID
501 ++ * bits. Clear them here since they are now expected to be not
502 ++ * set.
503 + */
504 + setup_clear_cpu_cap(X86_FEATURE_RTM);
505 ++ setup_clear_cpu_cap(X86_FEATURE_HLE);
506 + } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) {
507 +
508 + /*
509 +@@ -131,10 +132,10 @@ void __init tsx_init(void)
510 + tsx_enable();
511 +
512 + /*
513 +- * tsx_enable() will change the state of the
514 +- * RTM CPUID bit. Force it here since it is now
515 +- * expected to be set.
516 ++ * tsx_enable() will change the state of the RTM and HLE CPUID
517 ++ * bits. Force them here since they are now expected to be set.
518 + */
519 + setup_force_cpu_cap(X86_FEATURE_RTM);
520 ++ setup_force_cpu_cap(X86_FEATURE_HLE);
521 + }
522 + }
523 +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
524 +index 6c7847b3aa2d..ffbdd201c1f1 100644
525 +--- a/arch/x86/kvm/emulate.c
526 ++++ b/arch/x86/kvm/emulate.c
527 +@@ -23,6 +23,7 @@
528 + #include <linux/kvm_host.h>
529 + #include "kvm_cache_regs.h"
530 + #include <linux/module.h>
531 ++#include <linux/nospec.h>
532 + #include <asm/kvm_emulate.h>
533 + #include <linux/stringify.h>
534 + #include <asm/debugreg.h>
535 +@@ -5041,16 +5042,28 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
536 + ctxt->ad_bytes = def_ad_bytes ^ 6;
537 + break;
538 + case 0x26: /* ES override */
539 ++ has_seg_override = true;
540 ++ ctxt->seg_override = VCPU_SREG_ES;
541 ++ break;
542 + case 0x2e: /* CS override */
543 ++ has_seg_override = true;
544 ++ ctxt->seg_override = VCPU_SREG_CS;
545 ++ break;
546 + case 0x36: /* SS override */
547 ++ has_seg_override = true;
548 ++ ctxt->seg_override = VCPU_SREG_SS;
549 ++ break;
550 + case 0x3e: /* DS override */
551 + has_seg_override = true;
552 +- ctxt->seg_override = (ctxt->b >> 3) & 3;
553 ++ ctxt->seg_override = VCPU_SREG_DS;
554 + break;
555 + case 0x64: /* FS override */
556 ++ has_seg_override = true;
557 ++ ctxt->seg_override = VCPU_SREG_FS;
558 ++ break;
559 + case 0x65: /* GS override */
560 + has_seg_override = true;
561 +- ctxt->seg_override = ctxt->b & 7;
562 ++ ctxt->seg_override = VCPU_SREG_GS;
563 + break;
564 + case 0x40 ... 0x4f: /* REX */
565 + if (mode != X86EMUL_MODE_PROT64)
566 +@@ -5134,10 +5147,15 @@ done_prefixes:
567 + }
568 + break;
569 + case Escape:
570 +- if (ctxt->modrm > 0xbf)
571 +- opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
572 +- else
573 ++ if (ctxt->modrm > 0xbf) {
574 ++ size_t size = ARRAY_SIZE(opcode.u.esc->high);
575 ++ u32 index = array_index_nospec(
576 ++ ctxt->modrm - 0xc0, size);
577 ++
578 ++ opcode = opcode.u.esc->high[index];
579 ++ } else {
580 + opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
581 ++ }
582 + break;
583 + case InstrDual:
584 + if ((ctxt->modrm >> 6) == 3)
585 +diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
586 +index 62cf8c915e95..fce6fa012d30 100644
587 +--- a/arch/x86/kvm/hyperv.c
588 ++++ b/arch/x86/kvm/hyperv.c
589 +@@ -26,6 +26,7 @@
590 + #include "hyperv.h"
591 +
592 + #include <linux/kvm_host.h>
593 ++#include <linux/nospec.h>
594 + #include <trace/events/kvm.h>
595 +
596 + #include "trace.h"
597 +@@ -53,11 +54,12 @@ static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
598 + u32 index, u64 *pdata)
599 + {
600 + struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
601 ++ size_t size = ARRAY_SIZE(hv->hv_crash_param);
602 +
603 +- if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
604 ++ if (WARN_ON_ONCE(index >= size))
605 + return -EINVAL;
606 +
607 +- *pdata = hv->hv_crash_param[index];
608 ++ *pdata = hv->hv_crash_param[array_index_nospec(index, size)];
609 + return 0;
610 + }
611 +
612 +@@ -96,11 +98,12 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
613 + u32 index, u64 data)
614 + {
615 + struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
616 ++ size_t size = ARRAY_SIZE(hv->hv_crash_param);
617 +
618 +- if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
619 ++ if (WARN_ON_ONCE(index >= size))
620 + return -EINVAL;
621 +
622 +- hv->hv_crash_param[index] = data;
623 ++ hv->hv_crash_param[array_index_nospec(index, size)] = data;
624 + return 0;
625 + }
626 +
627 +diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
628 +index 7cc2360f1848..791850bfc981 100644
629 +--- a/arch/x86/kvm/i8259.c
630 ++++ b/arch/x86/kvm/i8259.c
631 +@@ -456,46 +456,37 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
632 + return s->elcr;
633 + }
634 +
635 +-static int picdev_in_range(gpa_t addr)
636 +-{
637 +- switch (addr) {
638 +- case 0x20:
639 +- case 0x21:
640 +- case 0xa0:
641 +- case 0xa1:
642 +- case 0x4d0:
643 +- case 0x4d1:
644 +- return 1;
645 +- default:
646 +- return 0;
647 +- }
648 +-}
649 +-
650 + static int picdev_write(struct kvm_pic *s,
651 + gpa_t addr, int len, const void *val)
652 + {
653 + unsigned char data = *(unsigned char *)val;
654 +- if (!picdev_in_range(addr))
655 +- return -EOPNOTSUPP;
656 +
657 + if (len != 1) {
658 + pr_pic_unimpl("non byte write\n");
659 + return 0;
660 + }
661 +- pic_lock(s);
662 + switch (addr) {
663 + case 0x20:
664 + case 0x21:
665 ++ pic_lock(s);
666 ++ pic_ioport_write(&s->pics[0], addr, data);
667 ++ pic_unlock(s);
668 ++ break;
669 + case 0xa0:
670 + case 0xa1:
671 +- pic_ioport_write(&s->pics[addr >> 7], addr, data);
672 ++ pic_lock(s);
673 ++ pic_ioport_write(&s->pics[1], addr, data);
674 ++ pic_unlock(s);
675 + break;
676 + case 0x4d0:
677 + case 0x4d1:
678 ++ pic_lock(s);
679 + elcr_ioport_write(&s->pics[addr & 1], addr, data);
680 ++ pic_unlock(s);
681 + break;
682 ++ default:
683 ++ return -EOPNOTSUPP;
684 + }
685 +- pic_unlock(s);
686 + return 0;
687 + }
688 +
689 +@@ -503,29 +494,31 @@ static int picdev_read(struct kvm_pic *s,
690 + gpa_t addr, int len, void *val)
691 + {
692 + unsigned char data = 0;
693 +- if (!picdev_in_range(addr))
694 +- return -EOPNOTSUPP;
695 +
696 + if (len != 1) {
697 + memset(val, 0, len);
698 + pr_pic_unimpl("non byte read\n");
699 + return 0;
700 + }
701 +- pic_lock(s);
702 + switch (addr) {
703 + case 0x20:
704 + case 0x21:
705 + case 0xa0:
706 + case 0xa1:
707 ++ pic_lock(s);
708 + data = pic_ioport_read(&s->pics[addr >> 7], addr);
709 ++ pic_unlock(s);
710 + break;
711 + case 0x4d0:
712 + case 0x4d1:
713 ++ pic_lock(s);
714 + data = elcr_ioport_read(&s->pics[addr & 1], addr);
715 ++ pic_unlock(s);
716 + break;
717 ++ default:
718 ++ return -EOPNOTSUPP;
719 + }
720 + *(unsigned char *)val = data;
721 +- pic_unlock(s);
722 + return 0;
723 + }
724 +
725 +diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
726 +index d380111351c0..086833ecb9f2 100644
727 +--- a/arch/x86/kvm/ioapic.c
728 ++++ b/arch/x86/kvm/ioapic.c
729 +@@ -36,6 +36,7 @@
730 + #include <linux/io.h>
731 + #include <linux/slab.h>
732 + #include <linux/export.h>
733 ++#include <linux/nospec.h>
734 + #include <asm/processor.h>
735 + #include <asm/page.h>
736 + #include <asm/current.h>
737 +@@ -73,13 +74,14 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
738 + default:
739 + {
740 + u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
741 +- u64 redir_content;
742 ++ u64 redir_content = ~0ULL;
743 +
744 +- if (redir_index < IOAPIC_NUM_PINS)
745 +- redir_content =
746 +- ioapic->redirtbl[redir_index].bits;
747 +- else
748 +- redir_content = ~0ULL;
749 ++ if (redir_index < IOAPIC_NUM_PINS) {
750 ++ u32 index = array_index_nospec(
751 ++ redir_index, IOAPIC_NUM_PINS);
752 ++
753 ++ redir_content = ioapic->redirtbl[index].bits;
754 ++ }
755 +
756 + result = (ioapic->ioregsel & 0x1) ?
757 + (redir_content >> 32) & 0xffffffff :
758 +@@ -289,6 +291,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
759 + ioapic_debug("change redir index %x val %x\n", index, val);
760 + if (index >= IOAPIC_NUM_PINS)
761 + return;
762 ++ index = array_index_nospec(index, IOAPIC_NUM_PINS);
763 + e = &ioapic->redirtbl[index];
764 + mask_before = e->fields.mask;
765 + /* Preserve read-only fields */
766 +diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
767 +index 3c70f6c76d3a..ce8c4ae25c15 100644
768 +--- a/arch/x86/kvm/lapic.c
769 ++++ b/arch/x86/kvm/lapic.c
770 +@@ -36,6 +36,7 @@
771 + #include <asm/delay.h>
772 + #include <linux/atomic.h>
773 + #include <linux/jump_label.h>
774 ++#include <linux/nospec.h>
775 + #include "kvm_cache_regs.h"
776 + #include "irq.h"
777 + #include "trace.h"
778 +@@ -1432,15 +1433,21 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
779 + case APIC_LVTTHMR:
780 + case APIC_LVTPC:
781 + case APIC_LVT1:
782 +- case APIC_LVTERR:
783 ++ case APIC_LVTERR: {
784 + /* TODO: Check vector */
785 ++ size_t size;
786 ++ u32 index;
787 ++
788 + if (!kvm_apic_sw_enabled(apic))
789 + val |= APIC_LVT_MASKED;
790 +
791 +- val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
792 ++ size = ARRAY_SIZE(apic_lvt_mask);
793 ++ index = array_index_nospec(
794 ++ (reg - APIC_LVTT) >> 4, size);
795 ++ val &= apic_lvt_mask[index];
796 + apic_set_reg(apic, reg, val);
797 +-
798 + break;
799 ++ }
800 +
801 + case APIC_LVTT:
802 + if (!kvm_apic_sw_enabled(apic))
803 +diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
804 +index 0149ac59c273..3e3016411020 100644
805 +--- a/arch/x86/kvm/mtrr.c
806 ++++ b/arch/x86/kvm/mtrr.c
807 +@@ -17,6 +17,7 @@
808 + */
809 +
810 + #include <linux/kvm_host.h>
811 ++#include <linux/nospec.h>
812 + #include <asm/mtrr.h>
813 +
814 + #include "cpuid.h"
815 +@@ -202,11 +203,15 @@ static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit)
816 + break;
817 + case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000:
818 + *seg = 1;
819 +- *unit = msr - MSR_MTRRfix16K_80000;
820 ++ *unit = array_index_nospec(
821 ++ msr - MSR_MTRRfix16K_80000,
822 ++ MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1);
823 + break;
824 + case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
825 + *seg = 2;
826 +- *unit = msr - MSR_MTRRfix4K_C0000;
827 ++ *unit = array_index_nospec(
828 ++ msr - MSR_MTRRfix4K_C0000,
829 ++ MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1);
830 + break;
831 + default:
832 + return false;
833 +diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
834 +index f96e1f962587..fbf3d25af765 100644
835 +--- a/arch/x86/kvm/pmu.h
836 ++++ b/arch/x86/kvm/pmu.h
837 +@@ -1,6 +1,8 @@
838 + #ifndef __KVM_X86_PMU_H
839 + #define __KVM_X86_PMU_H
840 +
841 ++#include <linux/nospec.h>
842 ++
843 + #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu)
844 + #define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu))
845 + #define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu)
846 +@@ -80,8 +82,12 @@ static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
847 + static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
848 + u32 base)
849 + {
850 +- if (msr >= base && msr < base + pmu->nr_arch_gp_counters)
851 +- return &pmu->gp_counters[msr - base];
852 ++ if (msr >= base && msr < base + pmu->nr_arch_gp_counters) {
853 ++ u32 index = array_index_nospec(msr - base,
854 ++ pmu->nr_arch_gp_counters);
855 ++
856 ++ return &pmu->gp_counters[index];
857 ++ }
858 +
859 + return NULL;
860 + }
861 +@@ -91,8 +97,12 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
862 + {
863 + int base = MSR_CORE_PERF_FIXED_CTR0;
864 +
865 +- if (msr >= base && msr < base + pmu->nr_arch_fixed_counters)
866 +- return &pmu->fixed_counters[msr - base];
867 ++ if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) {
868 ++ u32 index = array_index_nospec(msr - base,
869 ++ pmu->nr_arch_fixed_counters);
870 ++
871 ++ return &pmu->fixed_counters[index];
872 ++ }
873 +
874 + return NULL;
875 + }
876 +diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
877 +index 8fc07ea23344..822829f00590 100644
878 +--- a/arch/x86/kvm/pmu_intel.c
879 ++++ b/arch/x86/kvm/pmu_intel.c
880 +@@ -87,10 +87,14 @@ static unsigned intel_find_arch_event(struct kvm_pmu *pmu,
881 +
882 + static unsigned intel_find_fixed_event(int idx)
883 + {
884 +- if (idx >= ARRAY_SIZE(fixed_pmc_events))
885 ++ u32 event;
886 ++ size_t size = ARRAY_SIZE(fixed_pmc_events);
887 ++
888 ++ if (idx >= size)
889 + return PERF_COUNT_HW_MAX;
890 +
891 +- return intel_arch_events[fixed_pmc_events[idx]].event_type;
892 ++ event = fixed_pmc_events[array_index_nospec(idx, size)];
893 ++ return intel_arch_events[event].event_type;
894 + }
895 +
896 + /* check if a PMC is enabled by comparising it with globl_ctrl bits. */
897 +@@ -131,15 +135,19 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu,
898 + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
899 + bool fixed = idx & (1u << 30);
900 + struct kvm_pmc *counters;
901 ++ unsigned int num_counters;
902 +
903 + idx &= ~(3u << 30);
904 +- if (!fixed && idx >= pmu->nr_arch_gp_counters)
905 +- return NULL;
906 +- if (fixed && idx >= pmu->nr_arch_fixed_counters)
907 ++ if (fixed) {
908 ++ counters = pmu->fixed_counters;
909 ++ num_counters = pmu->nr_arch_fixed_counters;
910 ++ } else {
911 ++ counters = pmu->gp_counters;
912 ++ num_counters = pmu->nr_arch_gp_counters;
913 ++ }
914 ++ if (idx >= num_counters)
915 + return NULL;
916 +- counters = fixed ? pmu->fixed_counters : pmu->gp_counters;
917 +-
918 +- return &counters[idx];
919 ++ return &counters[array_index_nospec(idx, num_counters)];
920 + }
921 +
922 + static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
923 +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
924 +index 9344ac6b4f99..6c2b45f5d501 100644
925 +--- a/arch/x86/kvm/vmx.c
926 ++++ b/arch/x86/kvm/vmx.c
927 +@@ -7261,8 +7261,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
928 + /* _system ok, as nested_vmx_check_permission verified cpl=0 */
929 + if (kvm_write_guest_virt_system(vcpu, gva, &field_value,
930 + (is_long_mode(vcpu) ? 8 : 4),
931 +- &e))
932 ++ &e)) {
933 + kvm_inject_page_fault(vcpu, &e);
934 ++ return 1;
935 ++ }
936 + }
937 +
938 + nested_vmx_succeed(vcpu);
939 +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
940 +new file mode 100644
941 +index 000000000000..3791ce8d269e
942 +--- /dev/null
943 ++++ b/arch/x86/kvm/vmx/vmx.c
944 +@@ -0,0 +1,8033 @@
945 ++// SPDX-License-Identifier: GPL-2.0-only
946 ++/*
947 ++ * Kernel-based Virtual Machine driver for Linux
948 ++ *
949 ++ * This module enables machines with Intel VT-x extensions to run virtual
950 ++ * machines without emulation or binary translation.
951 ++ *
952 ++ * Copyright (C) 2006 Qumranet, Inc.
953 ++ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
954 ++ *
955 ++ * Authors:
956 ++ * Avi Kivity <avi@××××××××.com>
957 ++ * Yaniv Kamay <yaniv@××××××××.com>
958 ++ */
959 ++
960 ++#include <linux/frame.h>
961 ++#include <linux/highmem.h>
962 ++#include <linux/hrtimer.h>
963 ++#include <linux/kernel.h>
964 ++#include <linux/kvm_host.h>
965 ++#include <linux/module.h>
966 ++#include <linux/moduleparam.h>
967 ++#include <linux/mod_devicetable.h>
968 ++#include <linux/mm.h>
969 ++#include <linux/sched.h>
970 ++#include <linux/sched/smt.h>
971 ++#include <linux/slab.h>
972 ++#include <linux/tboot.h>
973 ++#include <linux/trace_events.h>
974 ++
975 ++#include <asm/apic.h>
976 ++#include <asm/asm.h>
977 ++#include <asm/cpu.h>
978 ++#include <asm/debugreg.h>
979 ++#include <asm/desc.h>
980 ++#include <asm/fpu/internal.h>
981 ++#include <asm/io.h>
982 ++#include <asm/irq_remapping.h>
983 ++#include <asm/kexec.h>
984 ++#include <asm/perf_event.h>
985 ++#include <asm/mce.h>
986 ++#include <asm/mmu_context.h>
987 ++#include <asm/mshyperv.h>
988 ++#include <asm/spec-ctrl.h>
989 ++#include <asm/virtext.h>
990 ++#include <asm/vmx.h>
991 ++
992 ++#include "capabilities.h"
993 ++#include "cpuid.h"
994 ++#include "evmcs.h"
995 ++#include "irq.h"
996 ++#include "kvm_cache_regs.h"
997 ++#include "lapic.h"
998 ++#include "mmu.h"
999 ++#include "nested.h"
1000 ++#include "ops.h"
1001 ++#include "pmu.h"
1002 ++#include "trace.h"
1003 ++#include "vmcs.h"
1004 ++#include "vmcs12.h"
1005 ++#include "vmx.h"
1006 ++#include "x86.h"
1007 ++
1008 ++MODULE_AUTHOR("Qumranet");
1009 ++MODULE_LICENSE("GPL");
1010 ++
1011 ++static const struct x86_cpu_id vmx_cpu_id[] = {
1012 ++ X86_FEATURE_MATCH(X86_FEATURE_VMX),
1013 ++ {}
1014 ++};
1015 ++MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
1016 ++
1017 ++bool __read_mostly enable_vpid = 1;
1018 ++module_param_named(vpid, enable_vpid, bool, 0444);
1019 ++
1020 ++static bool __read_mostly enable_vnmi = 1;
1021 ++module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
1022 ++
1023 ++bool __read_mostly flexpriority_enabled = 1;
1024 ++module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
1025 ++
1026 ++bool __read_mostly enable_ept = 1;
1027 ++module_param_named(ept, enable_ept, bool, S_IRUGO);
1028 ++
1029 ++bool __read_mostly enable_unrestricted_guest = 1;
1030 ++module_param_named(unrestricted_guest,
1031 ++ enable_unrestricted_guest, bool, S_IRUGO);
1032 ++
1033 ++bool __read_mostly enable_ept_ad_bits = 1;
1034 ++module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
1035 ++
1036 ++static bool __read_mostly emulate_invalid_guest_state = true;
1037 ++module_param(emulate_invalid_guest_state, bool, S_IRUGO);
1038 ++
1039 ++static bool __read_mostly fasteoi = 1;
1040 ++module_param(fasteoi, bool, S_IRUGO);
1041 ++
1042 ++static bool __read_mostly enable_apicv = 1;
1043 ++module_param(enable_apicv, bool, S_IRUGO);
1044 ++
1045 ++/*
1046 ++ * If nested=1, nested virtualization is supported, i.e., guests may use
1047 ++ * VMX and be a hypervisor for its own guests. If nested=0, guests may not
1048 ++ * use VMX instructions.
1049 ++ */
1050 ++static bool __read_mostly nested = 1;
1051 ++module_param(nested, bool, S_IRUGO);
1052 ++
1053 ++bool __read_mostly enable_pml = 1;
1054 ++module_param_named(pml, enable_pml, bool, S_IRUGO);
1055 ++
1056 ++static bool __read_mostly dump_invalid_vmcs = 0;
1057 ++module_param(dump_invalid_vmcs, bool, 0644);
1058 ++
1059 ++#define MSR_BITMAP_MODE_X2APIC 1
1060 ++#define MSR_BITMAP_MODE_X2APIC_APICV 2
1061 ++
1062 ++#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
1063 ++
1064 ++/* Guest_tsc -> host_tsc conversion requires 64-bit division. */
1065 ++static int __read_mostly cpu_preemption_timer_multi;
1066 ++static bool __read_mostly enable_preemption_timer = 1;
1067 ++#ifdef CONFIG_X86_64
1068 ++module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
1069 ++#endif
1070 ++
1071 ++#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
1072 ++#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
1073 ++#define KVM_VM_CR0_ALWAYS_ON \
1074 ++ (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \
1075 ++ X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
1076 ++#define KVM_CR4_GUEST_OWNED_BITS \
1077 ++ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
1078 ++ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
1079 ++
1080 ++#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
1081 ++#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
1082 ++#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
1083 ++
1084 ++#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
1085 ++
1086 ++#define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \
1087 ++ RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \
1088 ++ RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
1089 ++ RTIT_STATUS_BYTECNT))
1090 ++
1091 ++#define MSR_IA32_RTIT_OUTPUT_BASE_MASK \
1092 ++ (~((1UL << cpuid_query_maxphyaddr(vcpu)) - 1) | 0x7f)
1093 ++
1094 ++/*
1095 ++ * These 2 parameters are used to config the controls for Pause-Loop Exiting:
1096 ++ * ple_gap: upper bound on the amount of time between two successive
1097 ++ * executions of PAUSE in a loop. Also indicate if ple enabled.
1098 ++ * According to test, this time is usually smaller than 128 cycles.
1099 ++ * ple_window: upper bound on the amount of time a guest is allowed to execute
1100 ++ * in a PAUSE loop. Tests indicate that most spinlocks are held for
1101 ++ * less than 2^12 cycles
1102 ++ * Time is measured based on a counter that runs at the same rate as the TSC,
1103 ++ * refer SDM volume 3b section 21.6.13 & 22.1.3.
1104 ++ */
1105 ++static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
1106 ++module_param(ple_gap, uint, 0444);
1107 ++
1108 ++static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
1109 ++module_param(ple_window, uint, 0444);
1110 ++
1111 ++/* Default doubles per-vcpu window every exit. */
1112 ++static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
1113 ++module_param(ple_window_grow, uint, 0444);
1114 ++
1115 ++/* Default resets per-vcpu window every exit to ple_window. */
1116 ++static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
1117 ++module_param(ple_window_shrink, uint, 0444);
1118 ++
1119 ++/* Default is to compute the maximum so we can never overflow. */
1120 ++static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
1121 ++module_param(ple_window_max, uint, 0444);
1122 ++
1123 ++/* Default is SYSTEM mode, 1 for host-guest mode */
1124 ++int __read_mostly pt_mode = PT_MODE_SYSTEM;
1125 ++module_param(pt_mode, int, S_IRUGO);
1126 ++
1127 ++static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
1128 ++static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
1129 ++static DEFINE_MUTEX(vmx_l1d_flush_mutex);
1130 ++
1131 ++/* Storage for pre module init parameter parsing */
1132 ++static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO;
1133 ++
1134 ++static const struct {
1135 ++ const char *option;
1136 ++ bool for_parse;
1137 ++} vmentry_l1d_param[] = {
1138 ++ [VMENTER_L1D_FLUSH_AUTO] = {"auto", true},
1139 ++ [VMENTER_L1D_FLUSH_NEVER] = {"never", true},
1140 ++ [VMENTER_L1D_FLUSH_COND] = {"cond", true},
1141 ++ [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true},
1142 ++ [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
1143 ++ [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
1144 ++};
1145 ++
1146 ++#define L1D_CACHE_ORDER 4
1147 ++static void *vmx_l1d_flush_pages;
1148 ++
1149 ++static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
1150 ++{
1151 ++ struct page *page;
1152 ++ unsigned int i;
1153 ++
1154 ++ if (!boot_cpu_has_bug(X86_BUG_L1TF)) {
1155 ++ l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
1156 ++ return 0;
1157 ++ }
1158 ++
1159 ++ if (!enable_ept) {
1160 ++ l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
1161 ++ return 0;
1162 ++ }
1163 ++
1164 ++ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
1165 ++ u64 msr;
1166 ++
1167 ++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
1168 ++ if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
1169 ++ l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
1170 ++ return 0;
1171 ++ }
1172 ++ }
1173 ++
1174 ++ /* If set to auto use the default l1tf mitigation method */
1175 ++ if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
1176 ++ switch (l1tf_mitigation) {
1177 ++ case L1TF_MITIGATION_OFF:
1178 ++ l1tf = VMENTER_L1D_FLUSH_NEVER;
1179 ++ break;
1180 ++ case L1TF_MITIGATION_FLUSH_NOWARN:
1181 ++ case L1TF_MITIGATION_FLUSH:
1182 ++ case L1TF_MITIGATION_FLUSH_NOSMT:
1183 ++ l1tf = VMENTER_L1D_FLUSH_COND;
1184 ++ break;
1185 ++ case L1TF_MITIGATION_FULL:
1186 ++ case L1TF_MITIGATION_FULL_FORCE:
1187 ++ l1tf = VMENTER_L1D_FLUSH_ALWAYS;
1188 ++ break;
1189 ++ }
1190 ++ } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) {
1191 ++ l1tf = VMENTER_L1D_FLUSH_ALWAYS;
1192 ++ }
1193 ++
1194 ++ if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
1195 ++ !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
1196 ++ /*
1197 ++ * This allocation for vmx_l1d_flush_pages is not tied to a VM
1198 ++ * lifetime and so should not be charged to a memcg.
1199 ++ */
1200 ++ page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
1201 ++ if (!page)
1202 ++ return -ENOMEM;
1203 ++ vmx_l1d_flush_pages = page_address(page);
1204 ++
1205 ++ /*
1206 ++ * Initialize each page with a different pattern in
1207 ++ * order to protect against KSM in the nested
1208 ++ * virtualization case.
1209 ++ */
1210 ++ for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
1211 ++ memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1,
1212 ++ PAGE_SIZE);
1213 ++ }
1214 ++ }
1215 ++
1216 ++ l1tf_vmx_mitigation = l1tf;
1217 ++
1218 ++ if (l1tf != VMENTER_L1D_FLUSH_NEVER)
1219 ++ static_branch_enable(&vmx_l1d_should_flush);
1220 ++ else
1221 ++ static_branch_disable(&vmx_l1d_should_flush);
1222 ++
1223 ++ if (l1tf == VMENTER_L1D_FLUSH_COND)
1224 ++ static_branch_enable(&vmx_l1d_flush_cond);
1225 ++ else
1226 ++ static_branch_disable(&vmx_l1d_flush_cond);
1227 ++ return 0;
1228 ++}
1229 ++
1230 ++static int vmentry_l1d_flush_parse(const char *s)
1231 ++{
1232 ++ unsigned int i;
1233 ++
1234 ++ if (s) {
1235 ++ for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
1236 ++ if (vmentry_l1d_param[i].for_parse &&
1237 ++ sysfs_streq(s, vmentry_l1d_param[i].option))
1238 ++ return i;
1239 ++ }
1240 ++ }
1241 ++ return -EINVAL;
1242 ++}
1243 ++
1244 ++static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
1245 ++{
1246 ++ int l1tf, ret;
1247 ++
1248 ++ l1tf = vmentry_l1d_flush_parse(s);
1249 ++ if (l1tf < 0)
1250 ++ return l1tf;
1251 ++
1252 ++ if (!boot_cpu_has(X86_BUG_L1TF))
1253 ++ return 0;
1254 ++
1255 ++ /*
1256 ++ * Has vmx_init() run already? If not then this is the pre init
1257 ++ * parameter parsing. In that case just store the value and let
1258 ++ * vmx_init() do the proper setup after enable_ept has been
1259 ++ * established.
1260 ++ */
1261 ++ if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) {
1262 ++ vmentry_l1d_flush_param = l1tf;
1263 ++ return 0;
1264 ++ }
1265 ++
1266 ++ mutex_lock(&vmx_l1d_flush_mutex);
1267 ++ ret = vmx_setup_l1d_flush(l1tf);
1268 ++ mutex_unlock(&vmx_l1d_flush_mutex);
1269 ++ return ret;
1270 ++}
1271 ++
1272 ++static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
1273 ++{
1274 ++ if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
1275 ++ return sprintf(s, "???\n");
1276 ++
1277 ++ return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
1278 ++}
1279 ++
1280 ++static const struct kernel_param_ops vmentry_l1d_flush_ops = {
1281 ++ .set = vmentry_l1d_flush_set,
1282 ++ .get = vmentry_l1d_flush_get,
1283 ++};
1284 ++module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
1285 ++
1286 ++static bool guest_state_valid(struct kvm_vcpu *vcpu);
1287 ++static u32 vmx_segment_access_rights(struct kvm_segment *var);
1288 ++static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
1289 ++ u32 msr, int type);
1290 ++
1291 ++void vmx_vmexit(void);
1292 ++
1293 ++#define vmx_insn_failed(fmt...) \
1294 ++do { \
1295 ++ WARN_ONCE(1, fmt); \
1296 ++ pr_warn_ratelimited(fmt); \
1297 ++} while (0)
1298 ++
1299 ++asmlinkage void vmread_error(unsigned long field, bool fault)
1300 ++{
1301 ++ if (fault)
1302 ++ kvm_spurious_fault();
1303 ++ else
1304 ++ vmx_insn_failed("kvm: vmread failed: field=%lx\n", field);
1305 ++}
1306 ++
1307 ++noinline void vmwrite_error(unsigned long field, unsigned long value)
1308 ++{
1309 ++ vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n",
1310 ++ field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
1311 ++}
1312 ++
1313 ++noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
1314 ++{
1315 ++ vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr);
1316 ++}
1317 ++
1318 ++noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
1319 ++{
1320 ++ vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr);
1321 ++}
1322 ++
1323 ++noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
1324 ++{
1325 ++ vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
1326 ++ ext, vpid, gva);
1327 ++}
1328 ++
1329 ++noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
1330 ++{
1331 ++ vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
1332 ++ ext, eptp, gpa);
1333 ++}
1334 ++
1335 ++static DEFINE_PER_CPU(struct vmcs *, vmxarea);
1336 ++DEFINE_PER_CPU(struct vmcs *, current_vmcs);
1337 ++/*
1338 ++ * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
1339 ++ * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
1340 ++ */
1341 ++static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
1342 ++
1343 ++/*
1344 ++ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
1345 ++ * can find which vCPU should be waken up.
1346 ++ */
1347 ++static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
1348 ++static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
1349 ++
1350 ++static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
1351 ++static DEFINE_SPINLOCK(vmx_vpid_lock);
1352 ++
1353 ++struct vmcs_config vmcs_config;
1354 ++struct vmx_capability vmx_capability;
1355 ++
1356 ++#define VMX_SEGMENT_FIELD(seg) \
1357 ++ [VCPU_SREG_##seg] = { \
1358 ++ .selector = GUEST_##seg##_SELECTOR, \
1359 ++ .base = GUEST_##seg##_BASE, \
1360 ++ .limit = GUEST_##seg##_LIMIT, \
1361 ++ .ar_bytes = GUEST_##seg##_AR_BYTES, \
1362 ++ }
1363 ++
1364 ++static const struct kvm_vmx_segment_field {
1365 ++ unsigned selector;
1366 ++ unsigned base;
1367 ++ unsigned limit;
1368 ++ unsigned ar_bytes;
1369 ++} kvm_vmx_segment_fields[] = {
1370 ++ VMX_SEGMENT_FIELD(CS),
1371 ++ VMX_SEGMENT_FIELD(DS),
1372 ++ VMX_SEGMENT_FIELD(ES),
1373 ++ VMX_SEGMENT_FIELD(FS),
1374 ++ VMX_SEGMENT_FIELD(GS),
1375 ++ VMX_SEGMENT_FIELD(SS),
1376 ++ VMX_SEGMENT_FIELD(TR),
1377 ++ VMX_SEGMENT_FIELD(LDTR),
1378 ++};
1379 ++
1380 ++u64 host_efer;
1381 ++static unsigned long host_idt_base;
1382 ++
1383 ++/*
1384 ++ * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
1385 ++ * will emulate SYSCALL in legacy mode if the vendor string in guest
1386 ++ * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
1387 ++ * support this emulation, IA32_STAR must always be included in
1388 ++ * vmx_msr_index[], even in i386 builds.
1389 ++ */
1390 ++const u32 vmx_msr_index[] = {
1391 ++#ifdef CONFIG_X86_64
1392 ++ MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
1393 ++#endif
1394 ++ MSR_EFER, MSR_TSC_AUX, MSR_STAR,
1395 ++ MSR_IA32_TSX_CTRL,
1396 ++};
1397 ++
1398 ++#if IS_ENABLED(CONFIG_HYPERV)
1399 ++static bool __read_mostly enlightened_vmcs = true;
1400 ++module_param(enlightened_vmcs, bool, 0444);
1401 ++
1402 ++/* check_ept_pointer() should be under protection of ept_pointer_lock. */
1403 ++static void check_ept_pointer_match(struct kvm *kvm)
1404 ++{
1405 ++ struct kvm_vcpu *vcpu;
1406 ++ u64 tmp_eptp = INVALID_PAGE;
1407 ++ int i;
1408 ++
1409 ++ kvm_for_each_vcpu(i, vcpu, kvm) {
1410 ++ if (!VALID_PAGE(tmp_eptp)) {
1411 ++ tmp_eptp = to_vmx(vcpu)->ept_pointer;
1412 ++ } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
1413 ++ to_kvm_vmx(kvm)->ept_pointers_match
1414 ++ = EPT_POINTERS_MISMATCH;
1415 ++ return;
1416 ++ }
1417 ++ }
1418 ++
1419 ++ to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
1420 ++}
1421 ++
1422 ++static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
1423 ++ void *data)
1424 ++{
1425 ++ struct kvm_tlb_range *range = data;
1426 ++
1427 ++ return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn,
1428 ++ range->pages);
1429 ++}
1430 ++
1431 ++static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm,
1432 ++ struct kvm_vcpu *vcpu, struct kvm_tlb_range *range)
1433 ++{
1434 ++ u64 ept_pointer = to_vmx(vcpu)->ept_pointer;
1435 ++
1436 ++ /*
1437 ++ * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs address
1438 ++ * of the base of EPT PML4 table, strip off EPT configuration
1439 ++ * information.
1440 ++ */
1441 ++ if (range)
1442 ++ return hyperv_flush_guest_mapping_range(ept_pointer & PAGE_MASK,
1443 ++ kvm_fill_hv_flush_list_func, (void *)range);
1444 ++ else
1445 ++ return hyperv_flush_guest_mapping(ept_pointer & PAGE_MASK);
1446 ++}
1447 ++
1448 ++static int hv_remote_flush_tlb_with_range(struct kvm *kvm,
1449 ++ struct kvm_tlb_range *range)
1450 ++{
1451 ++ struct kvm_vcpu *vcpu;
1452 ++ int ret = 0, i;
1453 ++
1454 ++ spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
1455 ++
1456 ++ if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
1457 ++ check_ept_pointer_match(kvm);
1458 ++
1459 ++ if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
1460 ++ kvm_for_each_vcpu(i, vcpu, kvm) {
1461 ++ /* If ept_pointer is invalid pointer, bypass flush request. */
1462 ++ if (VALID_PAGE(to_vmx(vcpu)->ept_pointer))
1463 ++ ret |= __hv_remote_flush_tlb_with_range(
1464 ++ kvm, vcpu, range);
1465 ++ }
1466 ++ } else {
1467 ++ ret = __hv_remote_flush_tlb_with_range(kvm,
1468 ++ kvm_get_vcpu(kvm, 0), range);
1469 ++ }
1470 ++
1471 ++ spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
1472 ++ return ret;
1473 ++}
1474 ++static int hv_remote_flush_tlb(struct kvm *kvm)
1475 ++{
1476 ++ return hv_remote_flush_tlb_with_range(kvm, NULL);
1477 ++}
1478 ++
1479 ++static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
1480 ++{
1481 ++ struct hv_enlightened_vmcs *evmcs;
1482 ++ struct hv_partition_assist_pg **p_hv_pa_pg =
1483 ++ &vcpu->kvm->arch.hyperv.hv_pa_pg;
1484 ++ /*
1485 ++ * Synthetic VM-Exit is not enabled in current code and so All
1486 ++ * evmcs in singe VM shares same assist page.
1487 ++ */
1488 ++ if (!*p_hv_pa_pg)
1489 ++ *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
1490 ++
1491 ++ if (!*p_hv_pa_pg)
1492 ++ return -ENOMEM;
1493 ++
1494 ++ evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
1495 ++
1496 ++ evmcs->partition_assist_page =
1497 ++ __pa(*p_hv_pa_pg);
1498 ++ evmcs->hv_vm_id = (unsigned long)vcpu->kvm;
1499 ++ evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
1500 ++
1501 ++ return 0;
1502 ++}
1503 ++
1504 ++#endif /* IS_ENABLED(CONFIG_HYPERV) */
1505 ++
1506 ++/*
1507 ++ * Comment's format: document - errata name - stepping - processor name.
1508 ++ * Refer from
1509 ++ * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
1510 ++ */
1511 ++static u32 vmx_preemption_cpu_tfms[] = {
1512 ++/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */
1513 ++0x000206E6,
1514 ++/* 323056.pdf - AAX65 - C2 - Xeon L3406 */
1515 ++/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
1516 ++/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
1517 ++0x00020652,
1518 ++/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
1519 ++0x00020655,
1520 ++/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */
1521 ++/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */
1522 ++/*
1523 ++ * 320767.pdf - AAP86 - B1 -
1524 ++ * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
1525 ++ */
1526 ++0x000106E5,
1527 ++/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
1528 ++0x000106A0,
1529 ++/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
1530 ++0x000106A1,
1531 ++/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
1532 ++0x000106A4,
1533 ++ /* 321333.pdf - AAM126 - D0 - Xeon 3500 */
1534 ++ /* 321324.pdf - AAK139 - D0 - Xeon 5500 */
1535 ++ /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
1536 ++0x000106A5,
1537 ++ /* Xeon E3-1220 V2 */
1538 ++0x000306A8,
1539 ++};
1540 ++
1541 ++static inline bool cpu_has_broken_vmx_preemption_timer(void)
1542 ++{
1543 ++ u32 eax = cpuid_eax(0x00000001), i;
1544 ++
1545 ++ /* Clear the reserved bits */
1546 ++ eax &= ~(0x3U << 14 | 0xfU << 28);
1547 ++ for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
1548 ++ if (eax == vmx_preemption_cpu_tfms[i])
1549 ++ return true;
1550 ++
1551 ++ return false;
1552 ++}
1553 ++
1554 ++static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
1555 ++{
1556 ++ return flexpriority_enabled && lapic_in_kernel(vcpu);
1557 ++}
1558 ++
1559 ++static inline bool report_flexpriority(void)
1560 ++{
1561 ++ return flexpriority_enabled;
1562 ++}
1563 ++
1564 ++static inline int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
1565 ++{
1566 ++ int i;
1567 ++
1568 ++ for (i = 0; i < vmx->nmsrs; ++i)
1569 ++ if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
1570 ++ return i;
1571 ++ return -1;
1572 ++}
1573 ++
1574 ++struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
1575 ++{
1576 ++ int i;
1577 ++
1578 ++ i = __find_msr_index(vmx, msr);
1579 ++ if (i >= 0)
1580 ++ return &vmx->guest_msrs[i];
1581 ++ return NULL;
1582 ++}
1583 ++
1584 ++static int vmx_set_guest_msr(struct vcpu_vmx *vmx, struct shared_msr_entry *msr, u64 data)
1585 ++{
1586 ++ int ret = 0;
1587 ++
1588 ++ u64 old_msr_data = msr->data;
1589 ++ msr->data = data;
1590 ++ if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
1591 ++ preempt_disable();
1592 ++ ret = kvm_set_shared_msr(msr->index, msr->data,
1593 ++ msr->mask);
1594 ++ preempt_enable();
1595 ++ if (ret)
1596 ++ msr->data = old_msr_data;
1597 ++ }
1598 ++ return ret;
1599 ++}
1600 ++
1601 ++void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
1602 ++{
1603 ++ vmcs_clear(loaded_vmcs->vmcs);
1604 ++ if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
1605 ++ vmcs_clear(loaded_vmcs->shadow_vmcs);
1606 ++ loaded_vmcs->cpu = -1;
1607 ++ loaded_vmcs->launched = 0;
1608 ++}
1609 ++
1610 ++#ifdef CONFIG_KEXEC_CORE
1611 ++/*
1612 ++ * This bitmap is used to indicate whether the vmclear
1613 ++ * operation is enabled on all cpus. All disabled by
1614 ++ * default.
1615 ++ */
1616 ++static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
1617 ++
1618 ++static inline void crash_enable_local_vmclear(int cpu)
1619 ++{
1620 ++ cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
1621 ++}
1622 ++
1623 ++static inline void crash_disable_local_vmclear(int cpu)
1624 ++{
1625 ++ cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
1626 ++}
1627 ++
1628 ++static inline int crash_local_vmclear_enabled(int cpu)
1629 ++{
1630 ++ return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
1631 ++}
1632 ++
1633 ++static void crash_vmclear_local_loaded_vmcss(void)
1634 ++{
1635 ++ int cpu = raw_smp_processor_id();
1636 ++ struct loaded_vmcs *v;
1637 ++
1638 ++ if (!crash_local_vmclear_enabled(cpu))
1639 ++ return;
1640 ++
1641 ++ list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
1642 ++ loaded_vmcss_on_cpu_link)
1643 ++ vmcs_clear(v->vmcs);
1644 ++}
1645 ++#else
1646 ++static inline void crash_enable_local_vmclear(int cpu) { }
1647 ++static inline void crash_disable_local_vmclear(int cpu) { }
1648 ++#endif /* CONFIG_KEXEC_CORE */
1649 ++
1650 ++static void __loaded_vmcs_clear(void *arg)
1651 ++{
1652 ++ struct loaded_vmcs *loaded_vmcs = arg;
1653 ++ int cpu = raw_smp_processor_id();
1654 ++
1655 ++ if (loaded_vmcs->cpu != cpu)
1656 ++ return; /* vcpu migration can race with cpu offline */
1657 ++ if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
1658 ++ per_cpu(current_vmcs, cpu) = NULL;
1659 ++ crash_disable_local_vmclear(cpu);
1660 ++ list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
1661 ++
1662 ++ /*
1663 ++ * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
1664 ++ * is before setting loaded_vmcs->vcpu to -1 which is done in
1665 ++ * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
1666 ++ * then adds the vmcs into percpu list before it is deleted.
1667 ++ */
1668 ++ smp_wmb();
1669 ++
1670 ++ loaded_vmcs_init(loaded_vmcs);
1671 ++ crash_enable_local_vmclear(cpu);
1672 ++}
1673 ++
1674 ++void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
1675 ++{
1676 ++ int cpu = loaded_vmcs->cpu;
1677 ++
1678 ++ if (cpu != -1)
1679 ++ smp_call_function_single(cpu,
1680 ++ __loaded_vmcs_clear, loaded_vmcs, 1);
1681 ++}
1682 ++
1683 ++static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
1684 ++ unsigned field)
1685 ++{
1686 ++ bool ret;
1687 ++ u32 mask = 1 << (seg * SEG_FIELD_NR + field);
1688 ++
1689 ++ if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) {
1690 ++ kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS);
1691 ++ vmx->segment_cache.bitmask = 0;
1692 ++ }
1693 ++ ret = vmx->segment_cache.bitmask & mask;
1694 ++ vmx->segment_cache.bitmask |= mask;
1695 ++ return ret;
1696 ++}
1697 ++
1698 ++static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
1699 ++{
1700 ++ u16 *p = &vmx->segment_cache.seg[seg].selector;
1701 ++
1702 ++ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
1703 ++ *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
1704 ++ return *p;
1705 ++}
1706 ++
1707 ++static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
1708 ++{
1709 ++ ulong *p = &vmx->segment_cache.seg[seg].base;
1710 ++
1711 ++ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
1712 ++ *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
1713 ++ return *p;
1714 ++}
1715 ++
1716 ++static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
1717 ++{
1718 ++ u32 *p = &vmx->segment_cache.seg[seg].limit;
1719 ++
1720 ++ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
1721 ++ *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
1722 ++ return *p;
1723 ++}
1724 ++
1725 ++static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
1726 ++{
1727 ++ u32 *p = &vmx->segment_cache.seg[seg].ar;
1728 ++
1729 ++ if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
1730 ++ *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
1731 ++ return *p;
1732 ++}
1733 ++
1734 ++void update_exception_bitmap(struct kvm_vcpu *vcpu)
1735 ++{
1736 ++ u32 eb;
1737 ++
1738 ++ eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
1739 ++ (1u << DB_VECTOR) | (1u << AC_VECTOR);
1740 ++ /*
1741 ++ * Guest access to VMware backdoor ports could legitimately
1742 ++ * trigger #GP because of TSS I/O permission bitmap.
1743 ++ * We intercept those #GP and allow access to them anyway
1744 ++ * as VMware does.
1745 ++ */
1746 ++ if (enable_vmware_backdoor)
1747 ++ eb |= (1u << GP_VECTOR);
1748 ++ if ((vcpu->guest_debug &
1749 ++ (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
1750 ++ (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
1751 ++ eb |= 1u << BP_VECTOR;
1752 ++ if (to_vmx(vcpu)->rmode.vm86_active)
1753 ++ eb = ~0;
1754 ++ if (enable_ept)
1755 ++ eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
1756 ++
1757 ++ /* When we are running a nested L2 guest and L1 specified for it a
1758 ++ * certain exception bitmap, we must trap the same exceptions and pass
1759 ++ * them to L1. When running L2, we will only handle the exceptions
1760 ++ * specified above if L1 did not want them.
1761 ++ */
1762 ++ if (is_guest_mode(vcpu))
1763 ++ eb |= get_vmcs12(vcpu)->exception_bitmap;
1764 ++
1765 ++ vmcs_write32(EXCEPTION_BITMAP, eb);
1766 ++}
1767 ++
1768 ++/*
1769 ++ * Check if MSR is intercepted for currently loaded MSR bitmap.
1770 ++ */
1771 ++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
1772 ++{
1773 ++ unsigned long *msr_bitmap;
1774 ++ int f = sizeof(unsigned long);
1775 ++
1776 ++ if (!cpu_has_vmx_msr_bitmap())
1777 ++ return true;
1778 ++
1779 ++ msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
1780 ++
1781 ++ if (msr <= 0x1fff) {
1782 ++ return !!test_bit(msr, msr_bitmap + 0x800 / f);
1783 ++ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
1784 ++ msr &= 0x1fff;
1785 ++ return !!test_bit(msr, msr_bitmap + 0xc00 / f);
1786 ++ }
1787 ++
1788 ++ return true;
1789 ++}
1790 ++
1791 ++static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1792 ++ unsigned long entry, unsigned long exit)
1793 ++{
1794 ++ vm_entry_controls_clearbit(vmx, entry);
1795 ++ vm_exit_controls_clearbit(vmx, exit);
1796 ++}
1797 ++
1798 ++int vmx_find_msr_index(struct vmx_msrs *m, u32 msr)
1799 ++{
1800 ++ unsigned int i;
1801 ++
1802 ++ for (i = 0; i < m->nr; ++i) {
1803 ++ if (m->val[i].index == msr)
1804 ++ return i;
1805 ++ }
1806 ++ return -ENOENT;
1807 ++}
1808 ++
1809 ++static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
1810 ++{
1811 ++ int i;
1812 ++ struct msr_autoload *m = &vmx->msr_autoload;
1813 ++
1814 ++ switch (msr) {
1815 ++ case MSR_EFER:
1816 ++ if (cpu_has_load_ia32_efer()) {
1817 ++ clear_atomic_switch_msr_special(vmx,
1818 ++ VM_ENTRY_LOAD_IA32_EFER,
1819 ++ VM_EXIT_LOAD_IA32_EFER);
1820 ++ return;
1821 ++ }
1822 ++ break;
1823 ++ case MSR_CORE_PERF_GLOBAL_CTRL:
1824 ++ if (cpu_has_load_perf_global_ctrl()) {
1825 ++ clear_atomic_switch_msr_special(vmx,
1826 ++ VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1827 ++ VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
1828 ++ return;
1829 ++ }
1830 ++ break;
1831 ++ }
1832 ++ i = vmx_find_msr_index(&m->guest, msr);
1833 ++ if (i < 0)
1834 ++ goto skip_guest;
1835 ++ --m->guest.nr;
1836 ++ m->guest.val[i] = m->guest.val[m->guest.nr];
1837 ++ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
1838 ++
1839 ++skip_guest:
1840 ++ i = vmx_find_msr_index(&m->host, msr);
1841 ++ if (i < 0)
1842 ++ return;
1843 ++
1844 ++ --m->host.nr;
1845 ++ m->host.val[i] = m->host.val[m->host.nr];
1846 ++ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
1847 ++}
1848 ++
1849 ++static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1850 ++ unsigned long entry, unsigned long exit,
1851 ++ unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
1852 ++ u64 guest_val, u64 host_val)
1853 ++{
1854 ++ vmcs_write64(guest_val_vmcs, guest_val);
1855 ++ if (host_val_vmcs != HOST_IA32_EFER)
1856 ++ vmcs_write64(host_val_vmcs, host_val);
1857 ++ vm_entry_controls_setbit(vmx, entry);
1858 ++ vm_exit_controls_setbit(vmx, exit);
1859 ++}
1860 ++
1861 ++static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1862 ++ u64 guest_val, u64 host_val, bool entry_only)
1863 ++{
1864 ++ int i, j = 0;
1865 ++ struct msr_autoload *m = &vmx->msr_autoload;
1866 ++
1867 ++ switch (msr) {
1868 ++ case MSR_EFER:
1869 ++ if (cpu_has_load_ia32_efer()) {
1870 ++ add_atomic_switch_msr_special(vmx,
1871 ++ VM_ENTRY_LOAD_IA32_EFER,
1872 ++ VM_EXIT_LOAD_IA32_EFER,
1873 ++ GUEST_IA32_EFER,
1874 ++ HOST_IA32_EFER,
1875 ++ guest_val, host_val);
1876 ++ return;
1877 ++ }
1878 ++ break;
1879 ++ case MSR_CORE_PERF_GLOBAL_CTRL:
1880 ++ if (cpu_has_load_perf_global_ctrl()) {
1881 ++ add_atomic_switch_msr_special(vmx,
1882 ++ VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1883 ++ VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
1884 ++ GUEST_IA32_PERF_GLOBAL_CTRL,
1885 ++ HOST_IA32_PERF_GLOBAL_CTRL,
1886 ++ guest_val, host_val);
1887 ++ return;
1888 ++ }
1889 ++ break;
1890 ++ case MSR_IA32_PEBS_ENABLE:
1891 ++ /* PEBS needs a quiescent period after being disabled (to write
1892 ++ * a record). Disabling PEBS through VMX MSR swapping doesn't
1893 ++ * provide that period, so a CPU could write host's record into
1894 ++ * guest's memory.
1895 ++ */
1896 ++ wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
1897 ++ }
1898 ++
1899 ++ i = vmx_find_msr_index(&m->guest, msr);
1900 ++ if (!entry_only)
1901 ++ j = vmx_find_msr_index(&m->host, msr);
1902 ++
1903 ++ if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) ||
1904 ++ (j < 0 && m->host.nr == NR_LOADSTORE_MSRS)) {
1905 ++ printk_once(KERN_WARNING "Not enough msr switch entries. "
1906 ++ "Can't add msr %x\n", msr);
1907 ++ return;
1908 ++ }
1909 ++ if (i < 0) {
1910 ++ i = m->guest.nr++;
1911 ++ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
1912 ++ }
1913 ++ m->guest.val[i].index = msr;
1914 ++ m->guest.val[i].value = guest_val;
1915 ++
1916 ++ if (entry_only)
1917 ++ return;
1918 ++
1919 ++ if (j < 0) {
1920 ++ j = m->host.nr++;
1921 ++ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
1922 ++ }
1923 ++ m->host.val[j].index = msr;
1924 ++ m->host.val[j].value = host_val;
1925 ++}
1926 ++
1927 ++static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
1928 ++{
1929 ++ u64 guest_efer = vmx->vcpu.arch.efer;
1930 ++ u64 ignore_bits = 0;
1931 ++
1932 ++ /* Shadow paging assumes NX to be available. */
1933 ++ if (!enable_ept)
1934 ++ guest_efer |= EFER_NX;
1935 ++
1936 ++ /*
1937 ++ * LMA and LME handled by hardware; SCE meaningless outside long mode.
1938 ++ */
1939 ++ ignore_bits |= EFER_SCE;
1940 ++#ifdef CONFIG_X86_64
1941 ++ ignore_bits |= EFER_LMA | EFER_LME;
1942 ++ /* SCE is meaningful only in long mode on Intel */
1943 ++ if (guest_efer & EFER_LMA)
1944 ++ ignore_bits &= ~(u64)EFER_SCE;
1945 ++#endif
1946 ++
1947 ++ /*
1948 ++ * On EPT, we can't emulate NX, so we must switch EFER atomically.
1949 ++ * On CPUs that support "load IA32_EFER", always switch EFER
1950 ++ * atomically, since it's faster than switching it manually.
1951 ++ */
1952 ++ if (cpu_has_load_ia32_efer() ||
1953 ++ (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
1954 ++ if (!(guest_efer & EFER_LMA))
1955 ++ guest_efer &= ~EFER_LME;
1956 ++ if (guest_efer != host_efer)
1957 ++ add_atomic_switch_msr(vmx, MSR_EFER,
1958 ++ guest_efer, host_efer, false);
1959 ++ else
1960 ++ clear_atomic_switch_msr(vmx, MSR_EFER);
1961 ++ return false;
1962 ++ } else {
1963 ++ clear_atomic_switch_msr(vmx, MSR_EFER);
1964 ++
1965 ++ guest_efer &= ~ignore_bits;
1966 ++ guest_efer |= host_efer & ignore_bits;
1967 ++
1968 ++ vmx->guest_msrs[efer_offset].data = guest_efer;
1969 ++ vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
1970 ++
1971 ++ return true;
1972 ++ }
1973 ++}
1974 ++
1975 ++#ifdef CONFIG_X86_32
1976 ++/*
1977 ++ * On 32-bit kernels, VM exits still load the FS and GS bases from the
1978 ++ * VMCS rather than the segment table. KVM uses this helper to figure
1979 ++ * out the current bases to poke them into the VMCS before entry.
1980 ++ */
1981 ++static unsigned long segment_base(u16 selector)
1982 ++{
1983 ++ struct desc_struct *table;
1984 ++ unsigned long v;
1985 ++
1986 ++ if (!(selector & ~SEGMENT_RPL_MASK))
1987 ++ return 0;
1988 ++
1989 ++ table = get_current_gdt_ro();
1990 ++
1991 ++ if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
1992 ++ u16 ldt_selector = kvm_read_ldt();
1993 ++
1994 ++ if (!(ldt_selector & ~SEGMENT_RPL_MASK))
1995 ++ return 0;
1996 ++
1997 ++ table = (struct desc_struct *)segment_base(ldt_selector);
1998 ++ }
1999 ++ v = get_desc_base(&table[selector >> 3]);
2000 ++ return v;
2001 ++}
2002 ++#endif
2003 ++
2004 ++static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range)
2005 ++{
2006 ++ u32 i;
2007 ++
2008 ++ wrmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
2009 ++ wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
2010 ++ wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
2011 ++ wrmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
2012 ++ for (i = 0; i < addr_range; i++) {
2013 ++ wrmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
2014 ++ wrmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
2015 ++ }
2016 ++}
2017 ++
2018 ++static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range)
2019 ++{
2020 ++ u32 i;
2021 ++
2022 ++ rdmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
2023 ++ rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
2024 ++ rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
2025 ++ rdmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
2026 ++ for (i = 0; i < addr_range; i++) {
2027 ++ rdmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
2028 ++ rdmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
2029 ++ }
2030 ++}
2031 ++
2032 ++static void pt_guest_enter(struct vcpu_vmx *vmx)
2033 ++{
2034 ++ if (pt_mode == PT_MODE_SYSTEM)
2035 ++ return;
2036 ++
2037 ++ /*
2038 ++ * GUEST_IA32_RTIT_CTL is already set in the VMCS.
2039 ++ * Save host state before VM entry.
2040 ++ */
2041 ++ rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
2042 ++ if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
2043 ++ wrmsrl(MSR_IA32_RTIT_CTL, 0);
2044 ++ pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range);
2045 ++ pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range);
2046 ++ }
2047 ++}
2048 ++
2049 ++static void pt_guest_exit(struct vcpu_vmx *vmx)
2050 ++{
2051 ++ if (pt_mode == PT_MODE_SYSTEM)
2052 ++ return;
2053 ++
2054 ++ if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
2055 ++ pt_save_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range);
2056 ++ pt_load_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range);
2057 ++ }
2058 ++
2059 ++ /* Reload host state (IA32_RTIT_CTL will be cleared on VM exit). */
2060 ++ wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
2061 ++}
2062 ++
2063 ++void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
2064 ++ unsigned long fs_base, unsigned long gs_base)
2065 ++{
2066 ++ if (unlikely(fs_sel != host->fs_sel)) {
2067 ++ if (!(fs_sel & 7))
2068 ++ vmcs_write16(HOST_FS_SELECTOR, fs_sel);
2069 ++ else
2070 ++ vmcs_write16(HOST_FS_SELECTOR, 0);
2071 ++ host->fs_sel = fs_sel;
2072 ++ }
2073 ++ if (unlikely(gs_sel != host->gs_sel)) {
2074 ++ if (!(gs_sel & 7))
2075 ++ vmcs_write16(HOST_GS_SELECTOR, gs_sel);
2076 ++ else
2077 ++ vmcs_write16(HOST_GS_SELECTOR, 0);
2078 ++ host->gs_sel = gs_sel;
2079 ++ }
2080 ++ if (unlikely(fs_base != host->fs_base)) {
2081 ++ vmcs_writel(HOST_FS_BASE, fs_base);
2082 ++ host->fs_base = fs_base;
2083 ++ }
2084 ++ if (unlikely(gs_base != host->gs_base)) {
2085 ++ vmcs_writel(HOST_GS_BASE, gs_base);
2086 ++ host->gs_base = gs_base;
2087 ++ }
2088 ++}
2089 ++
2090 ++void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
2091 ++{
2092 ++ struct vcpu_vmx *vmx = to_vmx(vcpu);
2093 ++ struct vmcs_host_state *host_state;
2094 ++#ifdef CONFIG_X86_64
2095 ++ int cpu = raw_smp_processor_id();
2096 ++#endif
2097 ++ unsigned long fs_base, gs_base;
2098 ++ u16 fs_sel, gs_sel;
2099 ++ int i;
2100 ++
2101 ++ vmx->req_immediate_exit = false;
2102 ++
2103 ++ /*
2104 ++ * Note that guest MSRs to be saved/restored can also be changed
2105 ++ * when guest state is loaded. This happens when guest transitions
2106 ++ * to/from long-mode by setting MSR_EFER.LMA.
2107 ++ */
2108 ++ if (!vmx->guest_msrs_ready) {
2109 ++ vmx->guest_msrs_ready = true;
2110 ++ for (i = 0; i < vmx->save_nmsrs; ++i)
2111 ++ kvm_set_shared_msr(vmx->guest_msrs[i].index,
2112 ++ vmx->guest_msrs[i].data,
2113 ++ vmx->guest_msrs[i].mask);
2114 ++
2115 ++ }
2116 ++ if (vmx->guest_state_loaded)
2117 ++ return;
2118 ++
2119 ++ host_state = &vmx->loaded_vmcs->host_state;
2120 ++
2121 ++ /*
2122 ++ * Set host fs and gs selectors. Unfortunately, 22.2.3 does not
2123 ++ * allow segment selectors with cpl > 0 or ti == 1.
2124 ++ */
2125 ++ host_state->ldt_sel = kvm_read_ldt();
2126 ++
2127 ++#ifdef CONFIG_X86_64
2128 ++ savesegment(ds, host_state->ds_sel);
2129 ++ savesegment(es, host_state->es_sel);
2130 ++
2131 ++ gs_base = cpu_kernelmode_gs_base(cpu);
2132 ++ if (likely(is_64bit_mm(current->mm))) {
2133 ++ save_fsgs_for_kvm();
2134 ++ fs_sel = current->thread.fsindex;
2135 ++ gs_sel = current->thread.gsindex;
2136 ++ fs_base = current->thread.fsbase;
2137 ++ vmx->msr_host_kernel_gs_base = current->thread.gsbase;
2138 ++ } else {
2139 ++ savesegment(fs, fs_sel);
2140 ++ savesegment(gs, gs_sel);
2141 ++ fs_base = read_msr(MSR_FS_BASE);
2142 ++ vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
2143 ++ }
2144 ++
2145 ++ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
2146 ++#else
2147 ++ savesegment(fs, fs_sel);
2148 ++ savesegment(gs, gs_sel);
2149 ++ fs_base = segment_base(fs_sel);
2150 ++ gs_base = segment_base(gs_sel);
2151 ++#endif
2152 ++
2153 ++ vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
2154 ++ vmx->guest_state_loaded = true;
2155 ++}
2156 ++
2157 ++static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
2158 ++{
2159 ++ struct vmcs_host_state *host_state;
2160 ++
2161 ++ if (!vmx->guest_state_loaded)
2162 ++ return;
2163 ++
2164 ++ host_state = &vmx->loaded_vmcs->host_state;
2165 ++
2166 ++ ++vmx->vcpu.stat.host_state_reload;
2167 ++
2168 ++#ifdef CONFIG_X86_64
2169 ++ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
2170 ++#endif
2171 ++ if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
2172 ++ kvm_load_ldt(host_state->ldt_sel);
2173 ++#ifdef CONFIG_X86_64
2174 ++ load_gs_index(host_state->gs_sel);
2175 ++#else
2176 ++ loadsegment(gs, host_state->gs_sel);
2177 ++#endif
2178 ++ }
2179 ++ if (host_state->fs_sel & 7)
2180 ++ loadsegment(fs, host_state->fs_sel);
2181 ++#ifdef CONFIG_X86_64
2182 ++ if (unlikely(host_state->ds_sel | host_state->es_sel)) {
2183 ++ loadsegment(ds, host_state->ds_sel);
2184 ++ loadsegment(es, host_state->es_sel);
2185 ++ }
2186 ++#endif
2187 ++ invalidate_tss_limit();
2188 ++#ifdef CONFIG_X86_64
2189 ++ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
2190 ++#endif
2191 ++ load_fixmap_gdt(raw_smp_processor_id());
2192 ++ vmx->guest_state_loaded = false;
2193 ++ vmx->guest_msrs_ready = false;
2194 ++}
2195 ++
2196 ++#ifdef CONFIG_X86_64
2197 ++static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
2198 ++{
2199 ++ preempt_disable();
2200 ++ if (vmx->guest_state_loaded)
2201 ++ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
2202 ++ preempt_enable();
2203 ++ return vmx->msr_guest_kernel_gs_base;
2204 ++}
2205 ++
2206 ++static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
2207 ++{
2208 ++ preempt_disable();
2209 ++ if (vmx->guest_state_loaded)
2210 ++ wrmsrl(MSR_KERNEL_GS_BASE, data);
2211 ++ preempt_enable();
2212 ++ vmx->msr_guest_kernel_gs_base = data;
2213 ++}
2214 ++#endif
2215 ++
2216 ++static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
2217 ++{
2218 ++ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
2219 ++ struct pi_desc old, new;
2220 ++ unsigned int dest;
2221 ++
2222 ++ /*
2223 ++ * In case of hot-plug or hot-unplug, we may have to undo
2224 ++ * vmx_vcpu_pi_put even if there is no assigned device. And we
2225 ++ * always keep PI.NDST up to date for simplicity: it makes the
2226 ++ * code easier, and CPU migration is not a fast path.
2227 ++ */
2228 ++ if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
2229 ++ return;
2230 ++
2231 ++ /*
2232 ++ * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
2233 ++ * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
2234 ++ * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
2235 ++ * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
2236 ++ * correctly.
2237 ++ */
2238 ++ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
2239 ++ pi_clear_sn(pi_desc);
2240 ++ goto after_clear_sn;
2241 ++ }
2242 ++
2243 ++ /* The full case. */
2244 ++ do {
2245 ++ old.control = new.control = pi_desc->control;
2246 ++
2247 ++ dest = cpu_physical_id(cpu);
2248 ++
2249 ++ if (x2apic_enabled())
2250 ++ new.ndst = dest;
2251 ++ else
2252 ++ new.ndst = (dest << 8) & 0xFF00;
2253 ++
2254 ++ new.sn = 0;
2255 ++ } while (cmpxchg64(&pi_desc->control, old.control,
2256 ++ new.control) != old.control);
2257 ++
2258 ++after_clear_sn:
2259 ++
2260 ++ /*
2261 ++ * Clear SN before reading the bitmap. The VT-d firmware
2262 ++ * writes the bitmap and reads SN atomically (5.2.3 in the
2263 ++ * spec), so it doesn't really have a memory barrier that
2264 ++ * pairs with this, but we cannot do that and we need one.
2265 ++ */
2266 ++ smp_mb__after_atomic();
2267 ++
2268 ++ if (!pi_is_pir_empty(pi_desc))
2269 ++ pi_set_on(pi_desc);
2270 ++}
2271 ++
2272 ++void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
2273 ++{
2274 ++ struct vcpu_vmx *vmx = to_vmx(vcpu);
2275 ++ bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
2276 ++
2277 ++ if (!already_loaded) {
2278 ++ loaded_vmcs_clear(vmx->loaded_vmcs);
2279 ++ local_irq_disable();
2280 ++ crash_disable_local_vmclear(cpu);
2281 ++
2282 ++ /*
2283 ++ * Read loaded_vmcs->cpu should be before fetching
2284 ++ * loaded_vmcs->loaded_vmcss_on_cpu_link.
2285 ++ * See the comments in __loaded_vmcs_clear().
2286 ++ */
2287 ++ smp_rmb();
2288 ++
2289 ++ list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
2290 ++ &per_cpu(loaded_vmcss_on_cpu, cpu));
2291 ++ crash_enable_local_vmclear(cpu);
2292 ++ local_irq_enable();
2293 ++ }
2294 ++
2295 ++ if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
2296 ++ per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
2297 ++ vmcs_load(vmx->loaded_vmcs->vmcs);
2298 ++ indirect_branch_prediction_barrier();
2299 ++ }
2300 ++
2301 ++ if (!already_loaded) {
2302 ++ void *gdt = get_current_gdt_ro();
2303 ++ unsigned long sysenter_esp;
2304 ++
2305 ++ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2306 ++
2307 ++ /*
2308 ++ * Linux uses per-cpu TSS and GDT, so set these when switching
2309 ++ * processors. See 22.2.4.
2310 ++ */
2311 ++ vmcs_writel(HOST_TR_BASE,
2312 ++ (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
2313 ++ vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
2314 ++
2315 ++ rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
2316 ++ vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
2317 ++
2318 ++ vmx->loaded_vmcs->cpu = cpu;
2319 ++ }
2320 ++
2321 ++ /* Setup TSC multiplier */
2322 ++ if (kvm_has_tsc_control &&
2323 ++ vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
2324 ++ decache_tsc_multiplier(vmx);
2325 ++}
2326 ++
2327 ++/*
2328 ++ * Switches to specified vcpu, until a matching vcpu_put(), but assumes
2329 ++ * vcpu mutex is already taken.
2330 ++ */
2331 ++void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2332 ++{
2333 ++ struct vcpu_vmx *vmx = to_vmx(vcpu);
2334 ++
2335 ++ vmx_vcpu_load_vmcs(vcpu, cpu);
2336 ++
2337 ++ vmx_vcpu_pi_load(vcpu, cpu);
2338 ++
2339 ++ vmx->host_pkru = read_pkru();
2340 ++ vmx->host_debugctlmsr = get_debugctlmsr();
2341 ++}
2342 ++
2343 ++static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
2344 ++{
2345 ++ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
2346 ++
2347 ++ if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
2348 ++ !irq_remapping_cap(IRQ_POSTING_CAP) ||
2349 ++ !kvm_vcpu_apicv_active(vcpu))
2350 ++ return;
2351 ++
2352 ++ /* Set SN when the vCPU is preempted */
2353 ++ if (vcpu->preempted)
2354 ++ pi_set_sn(pi_desc);
2355 ++}
2356 ++
2357 ++static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
2358 ++{
2359 ++ vmx_vcpu_pi_put(vcpu);
2360 ++
2361 ++ vmx_prepare_switch_to_host(to_vmx(vcpu));
2362 ++}
2363 ++
2364 ++static bool emulation_required(struct kvm_vcpu *vcpu)
2365 ++{
2366 ++ return emulate_invalid_guest_state && !guest_state_valid(vcpu);
2367 ++}
2368 ++
2369 ++static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
2370 ++
2371 ++unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
2372 ++{
2373 ++ struct vcpu_vmx *vmx = to_vmx(vcpu);
2374 ++ unsigned long rflags, save_rflags;
2375 ++
2376 ++ if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) {
2377 ++ kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
2378 ++ rflags = vmcs_readl(GUEST_RFLAGS);
2379 ++ if (vmx->rmode.vm86_active) {
2380 ++ rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
2381 ++ save_rflags = vmx->rmode.save_rflags;
2382 ++ rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
2383 ++ }
2384 ++ vmx->rflags = rflags;
2385 ++ }
2386 ++ return vmx->rflags;
2387 ++}
2388 ++
2389 ++void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
2390 ++{
2391 ++ struct vcpu_vmx *vmx = to_vmx(vcpu);
2392 ++ unsigned long old_rflags;
2393 ++
2394 ++ if (enable_unrestricted_guest) {
2395 ++ kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
2396 ++ vmx->rflags = rflags;
2397 ++ vmcs_writel(GUEST_RFLAGS, rflags);
2398 ++ return;
2399 ++ }
2400 ++
2401 ++ old_rflags = vmx_get_rflags(vcpu);
2402 ++ vmx->rflags = rflags;
2403 ++ if (vmx->rmode.vm86_active) {
2404 ++ vmx->rmode.save_rflags = rflags;
2405 ++ rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
2406 ++ }
2407 ++ vmcs_writel(GUEST_RFLAGS, rflags);
2408 ++
2409 ++ if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM)
2410 ++ vmx->emulation_required = emulation_required(vcpu);
2411 ++}
2412 ++
2413 ++u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
2414 ++{
2415 ++ u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2416 ++ int ret = 0;
2417 ++
2418 ++ if (interruptibility & GUEST_INTR_STATE_STI)
2419 ++ ret |= KVM_X86_SHADOW_INT_STI;
2420 ++ if (interruptibility & GUEST_INTR_STATE_MOV_SS)
2421 ++ ret |= KVM_X86_SHADOW_INT_MOV_SS;
2422 ++
2423 ++ return ret;
2424 ++}
2425 ++
2426 ++void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
2427 ++{
2428 ++ u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2429 ++ u32 interruptibility = interruptibility_old;
2430 ++
2431 ++ interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
2432 ++
2433 ++ if (mask & KVM_X86_SHADOW_INT_MOV_SS)
2434 ++ interruptibility |= GUEST_INTR_STATE_MOV_SS;
2435 ++ else if (mask & KVM_X86_SHADOW_INT_STI)
2436 ++ interruptibility |= GUEST_INTR_STATE_STI;
2437 ++
2438 ++ if ((interruptibility != interruptibility_old))
2439 ++ vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
2440 ++}
2441 ++
2442 ++static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
2443 ++{
2444 ++ struct vcpu_vmx *vmx = to_vmx(vcpu);
2445 ++ unsigned long value;
2446 ++
2447 ++ /*
2448 ++ * Any MSR write that attempts to change bits marked reserved will
2449 ++ * case a #GP fault.
2450 ++ */
2451 ++ if (data & vmx->pt_desc.ctl_bitmask)
2452 ++ return 1;
2453 ++
2454 ++ /*
2455 ++ * Any attempt to modify IA32_RTIT_CTL while TraceEn is set will
2456 ++ * result in a #GP unless the same write also clears TraceEn.
2457 ++ */
2458 ++ if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) &&
2459 ++ ((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN))
2460 ++ return 1;
2461 ++
2462 ++ /*
2463 ++ * WRMSR to IA32_RTIT_CTL that sets TraceEn but clears this bit
2464 ++ * and FabricEn would cause #GP, if
2465 ++ * CPUID.(EAX=14H, ECX=0):ECX.SNGLRGNOUT[bit 2] = 0
2466 ++ */
2467 ++ if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) &&
2468 ++ !(data & RTIT_CTL_FABRIC_EN) &&
2469 ++ !intel_pt_validate_cap(vmx->pt_desc.caps,
2470 ++ PT_CAP_single_range_output))
2471 ++ return 1;
2472 ++
2473 ++ /*
2474 ++ * MTCFreq, CycThresh and PSBFreq encodings check, any MSR write that
2475 ++ * utilize encodings marked reserved will casue a #GP fault.
2476 ++ */
2477 ++ value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods);
2478 ++ if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) &&
2479 ++ !test_bit((data & RTIT_CTL_MTC_RANGE) >>
2480 ++ RTIT_CTL_MTC_RANGE_OFFSET, &value))
2481 ++ return 1;
2482 ++ value = intel_pt_validate_cap(vmx->pt_desc.caps,
2483 ++ PT_CAP_cycle_thresholds);
2484 ++ if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
2485 ++ !test_bit((data & RTIT_CTL_CYC_THRESH) >>
2486 ++ RTIT_CTL_CYC_THRESH_OFFSET, &value))
2487 ++ return 1;
2488 ++ value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods);
2489 ++ if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
2490 ++ !test_bit((data & RTIT_CTL_PSB_FREQ) >>
2491 ++ RTIT_CTL_PSB_FREQ_OFFSET, &value))
2492 ++ return 1;
2493 ++
2494 ++ /*
2495 ++ * If ADDRx_CFG is reserved or the encodings is >2 will
2496 ++ * cause a #GP fault.
2497 ++ */
2498 ++ value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET;
2499 ++ if ((value && (vmx->pt_desc.addr_range < 1)) || (value > 2))
2500 ++ return 1;
2501 ++ value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET;
2502 ++ if ((value && (vmx->pt_desc.addr_range < 2)) || (value > 2))
2503 ++ return 1;
2504 ++ value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET;
2505 ++ if ((value && (vmx->pt_desc.addr_range < 3)) || (value > 2))
2506 ++ return 1;
2507 ++ value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET;
2508 ++ if ((value && (vmx->pt_desc.addr_range < 4)) || (value > 2))
2509 ++ return 1;
2510 ++
2511 ++ return 0;
2512 ++}
2513 ++
2514 ++static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
2515 ++{
2516 ++ unsigned long rip;
2517 ++
2518 ++ /*
2519 ++ * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
2520 ++ * undefined behavior: Intel's SDM doesn't mandate the VMCS field be
2521 ++ * set when EPT misconfig occurs. In practice, real hardware updates
2522 ++ * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors
2523 ++ * (namely Hyper-V) don't set it due to it being undefined behavior,
2524 ++ * i.e. we end up advancing IP with some random value.
2525 ++ */
2526 ++ if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
2527 ++ to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) {
2528 ++ rip = kvm_rip_read(vcpu);
2529 ++ rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
2530 ++ kvm_rip_write(vcpu, rip);
2531 ++ } else {
2532 ++ if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
2533 ++ return 0;
2534 ++ }
2535 ++
2536 ++ /* skipping an emulated instruction also counts */
2537 ++ vmx_set_interrupt_shadow(vcpu, 0);
2538 ++
2539 ++ return 1;
2540 ++}
2541 ++
2542 ++static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
2543 ++{
2544 ++ /*
2545 ++ * Ensure that we clear the HLT state in the VMCS. We don't need to
2546 ++ * explicitly skip the instruction because if the HLT state is set,
2547 ++ * then the instruction is already executing and RIP has already been
2548 ++ * advanced.
2549 ++ */
2550 ++ if (kvm_hlt_in_guest(vcpu->kvm) &&
2551 ++ vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
2552 ++ vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
2553 ++}
2554 ++
2555 ++static void vmx_queue_exception(struct kvm_vcpu *vcpu)
2556 ++{
2557 ++ struct vcpu_vmx *vmx = to_vmx(vcpu);
2558 ++ unsigned nr = vcpu->arch.exception.nr;
2559 ++ bool has_error_code = vcpu->arch.exception.has_error_code;
2560 ++ u32 error_code = vcpu->arch.exception.error_code;
2561 ++ u32 intr_info = nr | INTR_INFO_VALID_MASK;
2562 ++
2563 ++ kvm_deliver_exception_payload(vcpu);
2564 ++
2565 ++ if (has_error_code) {
2566 ++ vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
2567 ++ intr_info |= INTR_INFO_DELIVER_CODE_MASK;
2568 ++ }
2569 ++
2570 ++ if (vmx->rmode.vm86_active) {
2571 ++ int inc_eip = 0;
2572 ++ if (kvm_exception_is_soft(nr))
2573 ++ inc_eip = vcpu->arch.event_exit_inst_len;
2574 ++ kvm_inject_realmode_interrupt(vcpu, nr, inc_eip);
2575 ++ return;
2576 ++ }
2577 ++
2578 ++ WARN_ON_ONCE(vmx->emulation_required);
2579 ++