1 |
commit: ec095309f3e13173054c6b3f03749edd89ce5944 |
2 |
Author: Alice Ferrazzi <alicef <AT> gentoo <DOT> org> |
3 |
AuthorDate: Thu Jan 4 15:10:05 2018 +0000 |
4 |
Commit: Alice Ferrazzi <alicef <AT> gentoo <DOT> org> |
5 |
CommitDate: Thu Jan 4 15:10:05 2018 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=ec095309 |
7 |
|
8 |
x86 page table isolation fixes |
9 |
|
10 |
0000_README | 14 +- |
11 |
1700_do_not_enable_PTI_on_AMD_processor.patch | 44 -- |
12 |
1700_x86-page-table-isolation-fixes.patch | 453 +++++++++++++++++++++ |
13 |
1701_make_sure_the_user_kernel_PTEs_match.patch | 56 --- |
14 |
...rnel_CR3_at_early_in_entry_SYSCALL_compat.patch | 68 ---- |
15 |
5 files changed, 456 insertions(+), 179 deletions(-) |
16 |
|
17 |
diff --git a/0000_README b/0000_README |
18 |
index d47f74d..c07cc2b 100644 |
19 |
--- a/0000_README |
20 |
+++ b/0000_README |
21 |
@@ -95,17 +95,9 @@ Patch: 1510_fs-enable-link-security-restrictions-by-default.patch |
22 |
From: http://sources.debian.net/src/linux/3.16.7-ckt4-3/debian/patches/debian/fs-enable-link-security-restrictions-by-default.patch/ |
23 |
Desc: Enable link security restrictions by default. |
24 |
|
25 |
-Patch: 1700_do_not_enable_PTI_on_AMD_processor.patch |
26 |
-From: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/patch/?id=694d99d40972f12e59a3696effee8a376b79d7c8 |
27 |
-Desc: x86/cpu, x86/pti: Do not enable PTI on AMD processors. |
28 |
- |
29 |
-Patch: 1701_make_sure_the_user_kernel_PTEs_match.patch |
30 |
-From: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/patch/?id=52994c256df36fda9a715697431cba9daecb6b11 |
31 |
-Desc: x86/pti: Make sure the user/kernel PTEs match |
32 |
- |
33 |
-Patch: 1702_switch_to_kernel_CR3_at_early_in_entry_SYSCALL_compat.patch |
34 |
-From: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit/?h=WIP.x86/pti&id=d7732ba55c4b6a2da339bb12589c515830cfac2c |
35 |
-Desc: Switch to kernel CR3 at early in entry_SYSCALL_compat() |
36 |
+Patch: 1700_x86-page-table-isolation-fixes.patch |
37 |
+From: https://github.com/torvalds/linux/commit/00a5ae218d57741088068799b810416ac249a9ce |
38 |
+Desc: x86 page table isolation fixes comulative patch. |
39 |
|
40 |
Patch: 2100_bcache-data-corruption-fix-for-bi-partno.patch |
41 |
From: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=62530ed8b1d07a45dec94d46e521c0c6c2d476e6 |
42 |
|
43 |
diff --git a/1700_do_not_enable_PTI_on_AMD_processor.patch b/1700_do_not_enable_PTI_on_AMD_processor.patch |
44 |
deleted file mode 100644 |
45 |
index 3069c4c..0000000 |
46 |
--- a/1700_do_not_enable_PTI_on_AMD_processor.patch |
47 |
+++ /dev/null |
48 |
@@ -1,44 +0,0 @@ |
49 |
-From 694d99d40972f12e59a3696effee8a376b79d7c8 Mon Sep 17 00:00:00 2001 |
50 |
-From: Tom Lendacky <thomas.lendacky@×××.com> |
51 |
-Date: Tue, 26 Dec 2017 23:43:54 -0600 |
52 |
-Subject: x86/cpu, x86/pti: Do not enable PTI on AMD processors |
53 |
- |
54 |
-AMD processors are not subject to the types of attacks that the kernel |
55 |
-page table isolation feature protects against. The AMD microarchitecture |
56 |
-does not allow memory references, including speculative references, that |
57 |
-access higher privileged data when running in a lesser privileged mode |
58 |
-when that access would result in a page fault. |
59 |
- |
60 |
-Disable page table isolation by default on AMD processors by not setting |
61 |
-the X86_BUG_CPU_INSECURE feature, which controls whether X86_FEATURE_PTI |
62 |
-is set. |
63 |
- |
64 |
-Signed-off-by: Tom Lendacky <thomas.lendacky@×××.com> |
65 |
-Signed-off-by: Thomas Gleixner <tglx@××××××××××.de> |
66 |
-Reviewed-by: Borislav Petkov <bp@××××.de> |
67 |
-Cc: Dave Hansen <dave.hansen@×××××××××××.com> |
68 |
-Cc: Andy Lutomirski <luto@××××××.org> |
69 |
-Cc: stable@×××××××××××.org |
70 |
-Link: https://lkml.kernel.org/r/20171227054354.20369.94587.stgit@×××××××××××××××××××××.net |
71 |
---- |
72 |
- arch/x86/kernel/cpu/common.c | 4 ++-- |
73 |
- 1 file changed, 2 insertions(+), 2 deletions(-) |
74 |
- |
75 |
-diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c |
76 |
-index f2a94df..b1be494 100644 |
77 |
---- a/arch/x86/kernel/cpu/common.c |
78 |
-+++ b/arch/x86/kernel/cpu/common.c |
79 |
-@@ -899,8 +899,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) |
80 |
- |
81 |
- setup_force_cpu_cap(X86_FEATURE_ALWAYS); |
82 |
- |
83 |
-- /* Assume for now that ALL x86 CPUs are insecure */ |
84 |
-- setup_force_cpu_bug(X86_BUG_CPU_INSECURE); |
85 |
-+ if (c->x86_vendor != X86_VENDOR_AMD) |
86 |
-+ setup_force_cpu_bug(X86_BUG_CPU_INSECURE); |
87 |
- |
88 |
- fpu__init_system(c); |
89 |
- |
90 |
--- |
91 |
-cgit v1.1 |
92 |
- |
93 |
|
94 |
diff --git a/1700_x86-page-table-isolation-fixes.patch b/1700_x86-page-table-isolation-fixes.patch |
95 |
new file mode 100644 |
96 |
index 0000000..6fcbf41 |
97 |
--- /dev/null |
98 |
+++ b/1700_x86-page-table-isolation-fixes.patch |
99 |
@@ -0,0 +1,453 @@ |
100 |
+From 87faa0d9b43b4755ff6963a22d1fd1bee1aa3b39 Mon Sep 17 00:00:00 2001 |
101 |
+From: Thomas Gleixner <tglx@××××××××××.de> |
102 |
+Date: Wed, 3 Jan 2018 15:18:44 +0100 |
103 |
+Subject: [PATCH 1/7] x86/pti: Enable PTI by default |
104 |
+ |
105 |
+This really want's to be enabled by default. Users who know what they are |
106 |
+doing can disable it either in the config or on the kernel command line. |
107 |
+ |
108 |
+Signed-off-by: Thomas Gleixner <tglx@××××××××××.de> |
109 |
+Cc: stable@×××××××××××.org |
110 |
+--- |
111 |
+ security/Kconfig | 1 + |
112 |
+ 1 file changed, 1 insertion(+) |
113 |
+ |
114 |
+diff --git a/security/Kconfig b/security/Kconfig |
115 |
+index a623d13bf2884..3d4debd0257e2 100644 |
116 |
+--- a/security/Kconfig |
117 |
++++ b/security/Kconfig |
118 |
+@@ -56,6 +56,7 @@ config SECURITY_NETWORK |
119 |
+ |
120 |
+ config PAGE_TABLE_ISOLATION |
121 |
+ bool "Remove the kernel mapping in user mode" |
122 |
++ default y |
123 |
+ depends on X86_64 && !UML |
124 |
+ help |
125 |
+ This feature reduces the number of hardware side channels by |
126 |
+ |
127 |
+From 694d99d40972f12e59a3696effee8a376b79d7c8 Mon Sep 17 00:00:00 2001 |
128 |
+From: Tom Lendacky <thomas.lendacky@×××.com> |
129 |
+Date: Tue, 26 Dec 2017 23:43:54 -0600 |
130 |
+Subject: [PATCH 2/7] x86/cpu, x86/pti: Do not enable PTI on AMD processors |
131 |
+ |
132 |
+AMD processors are not subject to the types of attacks that the kernel |
133 |
+page table isolation feature protects against. The AMD microarchitecture |
134 |
+does not allow memory references, including speculative references, that |
135 |
+access higher privileged data when running in a lesser privileged mode |
136 |
+when that access would result in a page fault. |
137 |
+ |
138 |
+Disable page table isolation by default on AMD processors by not setting |
139 |
+the X86_BUG_CPU_INSECURE feature, which controls whether X86_FEATURE_PTI |
140 |
+is set. |
141 |
+ |
142 |
+Signed-off-by: Tom Lendacky <thomas.lendacky@×××.com> |
143 |
+Signed-off-by: Thomas Gleixner <tglx@××××××××××.de> |
144 |
+Reviewed-by: Borislav Petkov <bp@××××.de> |
145 |
+Cc: Dave Hansen <dave.hansen@×××××××××××.com> |
146 |
+Cc: Andy Lutomirski <luto@××××××.org> |
147 |
+Cc: stable@×××××××××××.org |
148 |
+Link: https://lkml.kernel.org/r/20171227054354.20369.94587.stgit@×××××××××××××××××××××.net |
149 |
+--- |
150 |
+ arch/x86/kernel/cpu/common.c | 4 ++-- |
151 |
+ 1 file changed, 2 insertions(+), 2 deletions(-) |
152 |
+ |
153 |
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c |
154 |
+index f2a94dfb434e9..b1be494ab4e8b 100644 |
155 |
+--- a/arch/x86/kernel/cpu/common.c |
156 |
++++ b/arch/x86/kernel/cpu/common.c |
157 |
+@@ -899,8 +899,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) |
158 |
+ |
159 |
+ setup_force_cpu_cap(X86_FEATURE_ALWAYS); |
160 |
+ |
161 |
+- /* Assume for now that ALL x86 CPUs are insecure */ |
162 |
+- setup_force_cpu_bug(X86_BUG_CPU_INSECURE); |
163 |
++ if (c->x86_vendor != X86_VENDOR_AMD) |
164 |
++ setup_force_cpu_bug(X86_BUG_CPU_INSECURE); |
165 |
+ |
166 |
+ fpu__init_system(c); |
167 |
+ |
168 |
+ |
169 |
+From 52994c256df36fda9a715697431cba9daecb6b11 Mon Sep 17 00:00:00 2001 |
170 |
+From: Thomas Gleixner <tglx@××××××××××.de> |
171 |
+Date: Wed, 3 Jan 2018 15:57:59 +0100 |
172 |
+Subject: [PATCH 3/7] x86/pti: Make sure the user/kernel PTEs match |
173 |
+ |
174 |
+Meelis reported that his K8 Athlon64 emits MCE warnings when PTI is |
175 |
+enabled: |
176 |
+ |
177 |
+[Hardware Error]: Error Addr: 0x0000ffff81e000e0 |
178 |
+[Hardware Error]: MC1 Error: L1 TLB multimatch. |
179 |
+[Hardware Error]: cache level: L1, tx: INSN |
180 |
+ |
181 |
+The address is in the entry area, which is mapped into kernel _AND_ user |
182 |
+space. That's special because we switch CR3 while we are executing |
183 |
+there. |
184 |
+ |
185 |
+User mapping: |
186 |
+0xffffffff81e00000-0xffffffff82000000 2M ro PSE GLB x pmd |
187 |
+ |
188 |
+Kernel mapping: |
189 |
+0xffffffff81000000-0xffffffff82000000 16M ro PSE x pmd |
190 |
+ |
191 |
+So the K8 is complaining that the TLB entries differ. They differ in the |
192 |
+GLB bit. |
193 |
+ |
194 |
+Drop the GLB bit when installing the user shared mapping. |
195 |
+ |
196 |
+Fixes: 6dc72c3cbca0 ("x86/mm/pti: Share entry text PMD") |
197 |
+Reported-by: Meelis Roos <mroos@×××××.ee> |
198 |
+Signed-off-by: Thomas Gleixner <tglx@××××××××××.de> |
199 |
+Tested-by: Meelis Roos <mroos@×××××.ee> |
200 |
+Cc: Borislav Petkov <bp@××××××.de> |
201 |
+Cc: Tom Lendacky <thomas.lendacky@×××.com> |
202 |
+Cc: stable@×××××××××××.org |
203 |
+Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801031407180.1957@nanos |
204 |
+--- |
205 |
+ arch/x86/mm/pti.c | 3 ++- |
206 |
+ 1 file changed, 2 insertions(+), 1 deletion(-) |
207 |
+ |
208 |
+diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c |
209 |
+index bce8aea656062..2da28ba975082 100644 |
210 |
+--- a/arch/x86/mm/pti.c |
211 |
++++ b/arch/x86/mm/pti.c |
212 |
+@@ -367,7 +367,8 @@ static void __init pti_setup_espfix64(void) |
213 |
+ static void __init pti_clone_entry_text(void) |
214 |
+ { |
215 |
+ pti_clone_pmds((unsigned long) __entry_text_start, |
216 |
+- (unsigned long) __irqentry_text_end, _PAGE_RW); |
217 |
++ (unsigned long) __irqentry_text_end, |
218 |
++ _PAGE_RW | _PAGE_GLOBAL); |
219 |
+ } |
220 |
+ |
221 |
+ /* |
222 |
+ |
223 |
+From a9cdbe72c4e8bf3b38781c317a79326e2e1a230d Mon Sep 17 00:00:00 2001 |
224 |
+From: Josh Poimboeuf <jpoimboe@××××××.com> |
225 |
+Date: Sun, 31 Dec 2017 10:18:06 -0600 |
226 |
+Subject: [PATCH 4/7] x86/dumpstack: Fix partial register dumps |
227 |
+MIME-Version: 1.0 |
228 |
+Content-Type: text/plain; charset=UTF-8 |
229 |
+Content-Transfer-Encoding: 8bit |
230 |
+ |
231 |
+The show_regs_safe() logic is wrong. When there's an iret stack frame, |
232 |
+it prints the entire pt_regs -- most of which is random stack data -- |
233 |
+instead of just the five registers at the end. |
234 |
+ |
235 |
+show_regs_safe() is also poorly named: the on_stack() checks aren't for |
236 |
+safety. Rename the function to show_regs_if_on_stack() and add a |
237 |
+comment to explain why the checks are needed. |
238 |
+ |
239 |
+These issues were introduced with the "partial register dump" feature of |
240 |
+the following commit: |
241 |
+ |
242 |
+ b02fcf9ba121 ("x86/unwinder: Handle stack overflows more gracefully") |
243 |
+ |
244 |
+That patch had gone through a few iterations of development, and the |
245 |
+above issues were artifacts from a previous iteration of the patch where |
246 |
+'regs' pointed directly to the iret frame rather than to the (partially |
247 |
+empty) pt_regs. |
248 |
+ |
249 |
+Tested-by: Alexander Tsoy <alexander@××××.me> |
250 |
+Signed-off-by: Josh Poimboeuf <jpoimboe@××××××.com> |
251 |
+Cc: Andy Lutomirski <luto@××××××.org> |
252 |
+Cc: Linus Torvalds <torvalds@××××××××××××××××.org> |
253 |
+Cc: Peter Zijlstra <peterz@×××××××××.org> |
254 |
+Cc: Thomas Gleixner <tglx@××××××××××.de> |
255 |
+Cc: Toralf Förster <toralf.foerster@×××.de> |
256 |
+Cc: stable@×××××××××××.org |
257 |
+Fixes: b02fcf9ba121 ("x86/unwinder: Handle stack overflows more gracefully") |
258 |
+Link: http://lkml.kernel.org/r/5b05b8b344f59db2d3d50dbdeba92d60f2304c54.1514736742.git.jpoimboe@××××××.com |
259 |
+Signed-off-by: Ingo Molnar <mingo@××××××.org> |
260 |
+--- |
261 |
+ arch/x86/include/asm/unwind.h | 17 +++++++++++++---- |
262 |
+ arch/x86/kernel/dumpstack.c | 28 ++++++++++++++++++++-------- |
263 |
+ arch/x86/kernel/stacktrace.c | 2 +- |
264 |
+ 3 files changed, 34 insertions(+), 13 deletions(-) |
265 |
+ |
266 |
+diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h |
267 |
+index c1688c2d0a128..1f86e1b0a5cdc 100644 |
268 |
+--- a/arch/x86/include/asm/unwind.h |
269 |
++++ b/arch/x86/include/asm/unwind.h |
270 |
+@@ -56,18 +56,27 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, |
271 |
+ |
272 |
+ #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) |
273 |
+ /* |
274 |
+- * WARNING: The entire pt_regs may not be safe to dereference. In some cases, |
275 |
+- * only the iret frame registers are accessible. Use with caution! |
276 |
++ * If 'partial' returns true, only the iret frame registers are valid. |
277 |
+ */ |
278 |
+-static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) |
279 |
++static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, |
280 |
++ bool *partial) |
281 |
+ { |
282 |
+ if (unwind_done(state)) |
283 |
+ return NULL; |
284 |
+ |
285 |
++ if (partial) { |
286 |
++#ifdef CONFIG_UNWINDER_ORC |
287 |
++ *partial = !state->full_regs; |
288 |
++#else |
289 |
++ *partial = false; |
290 |
++#endif |
291 |
++ } |
292 |
++ |
293 |
+ return state->regs; |
294 |
+ } |
295 |
+ #else |
296 |
+-static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) |
297 |
++static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, |
298 |
++ bool *partial) |
299 |
+ { |
300 |
+ return NULL; |
301 |
+ } |
302 |
+diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c |
303 |
+index 5fa110699ed27..d0bb176a7261a 100644 |
304 |
+--- a/arch/x86/kernel/dumpstack.c |
305 |
++++ b/arch/x86/kernel/dumpstack.c |
306 |
+@@ -76,12 +76,23 @@ void show_iret_regs(struct pt_regs *regs) |
307 |
+ regs->sp, regs->flags); |
308 |
+ } |
309 |
+ |
310 |
+-static void show_regs_safe(struct stack_info *info, struct pt_regs *regs) |
311 |
++static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, |
312 |
++ bool partial) |
313 |
+ { |
314 |
+- if (on_stack(info, regs, sizeof(*regs))) |
315 |
++ /* |
316 |
++ * These on_stack() checks aren't strictly necessary: the unwind code |
317 |
++ * has already validated the 'regs' pointer. The checks are done for |
318 |
++ * ordering reasons: if the registers are on the next stack, we don't |
319 |
++ * want to print them out yet. Otherwise they'll be shown as part of |
320 |
++ * the wrong stack. Later, when show_trace_log_lvl() switches to the |
321 |
++ * next stack, this function will be called again with the same regs so |
322 |
++ * they can be printed in the right context. |
323 |
++ */ |
324 |
++ if (!partial && on_stack(info, regs, sizeof(*regs))) { |
325 |
+ __show_regs(regs, 0); |
326 |
+- else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET, |
327 |
+- IRET_FRAME_SIZE)) { |
328 |
++ |
329 |
++ } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET, |
330 |
++ IRET_FRAME_SIZE)) { |
331 |
+ /* |
332 |
+ * When an interrupt or exception occurs in entry code, the |
333 |
+ * full pt_regs might not have been saved yet. In that case |
334 |
+@@ -98,6 +109,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
335 |
+ struct stack_info stack_info = {0}; |
336 |
+ unsigned long visit_mask = 0; |
337 |
+ int graph_idx = 0; |
338 |
++ bool partial; |
339 |
+ |
340 |
+ printk("%sCall Trace:\n", log_lvl); |
341 |
+ |
342 |
+@@ -140,7 +152,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
343 |
+ printk("%s <%s>\n", log_lvl, stack_name); |
344 |
+ |
345 |
+ if (regs) |
346 |
+- show_regs_safe(&stack_info, regs); |
347 |
++ show_regs_if_on_stack(&stack_info, regs, partial); |
348 |
+ |
349 |
+ /* |
350 |
+ * Scan the stack, printing any text addresses we find. At the |
351 |
+@@ -164,7 +176,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
352 |
+ |
353 |
+ /* |
354 |
+ * Don't print regs->ip again if it was already printed |
355 |
+- * by show_regs_safe() below. |
356 |
++ * by show_regs_if_on_stack(). |
357 |
+ */ |
358 |
+ if (regs && stack == ®s->ip) |
359 |
+ goto next; |
360 |
+@@ -199,9 +211,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
361 |
+ unwind_next_frame(&state); |
362 |
+ |
363 |
+ /* if the frame has entry regs, print them */ |
364 |
+- regs = unwind_get_entry_regs(&state); |
365 |
++ regs = unwind_get_entry_regs(&state, &partial); |
366 |
+ if (regs) |
367 |
+- show_regs_safe(&stack_info, regs); |
368 |
++ show_regs_if_on_stack(&stack_info, regs, partial); |
369 |
+ } |
370 |
+ |
371 |
+ if (stack_name) |
372 |
+diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c |
373 |
+index 8dabd7bf16730..60244bfaf88f6 100644 |
374 |
+--- a/arch/x86/kernel/stacktrace.c |
375 |
++++ b/arch/x86/kernel/stacktrace.c |
376 |
+@@ -98,7 +98,7 @@ static int __save_stack_trace_reliable(struct stack_trace *trace, |
377 |
+ for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state); |
378 |
+ unwind_next_frame(&state)) { |
379 |
+ |
380 |
+- regs = unwind_get_entry_regs(&state); |
381 |
++ regs = unwind_get_entry_regs(&state, NULL); |
382 |
+ if (regs) { |
383 |
+ /* |
384 |
+ * Kernel mode registers on the stack indicate an |
385 |
+ |
386 |
+From 3ffdeb1a02be3086f1411a15c5b9c481fa28e21f Mon Sep 17 00:00:00 2001 |
387 |
+From: Josh Poimboeuf <jpoimboe@××××××.com> |
388 |
+Date: Sun, 31 Dec 2017 10:18:07 -0600 |
389 |
+Subject: [PATCH 5/7] x86/dumpstack: Print registers for first stack frame |
390 |
+MIME-Version: 1.0 |
391 |
+Content-Type: text/plain; charset=UTF-8 |
392 |
+Content-Transfer-Encoding: 8bit |
393 |
+ |
394 |
+In the stack dump code, if the frame after the starting pt_regs is also |
395 |
+a regs frame, the registers don't get printed. Fix that. |
396 |
+ |
397 |
+Reported-by: Andy Lutomirski <luto@××××××××××.net> |
398 |
+Tested-by: Alexander Tsoy <alexander@××××.me> |
399 |
+Signed-off-by: Josh Poimboeuf <jpoimboe@××××××.com> |
400 |
+Cc: Andy Lutomirski <luto@××××××.org> |
401 |
+Cc: Linus Torvalds <torvalds@××××××××××××××××.org> |
402 |
+Cc: Peter Zijlstra <peterz@×××××××××.org> |
403 |
+Cc: Thomas Gleixner <tglx@××××××××××.de> |
404 |
+Cc: Toralf Förster <toralf.foerster@×××.de> |
405 |
+Cc: stable@×××××××××××.org |
406 |
+Fixes: 3b3fa11bc700 ("x86/dumpstack: Print any pt_regs found on the stack") |
407 |
+Link: http://lkml.kernel.org/r/396f84491d2f0ef64eda4217a2165f5712f6a115.1514736742.git.jpoimboe@××××××.com |
408 |
+Signed-off-by: Ingo Molnar <mingo@××××××.org> |
409 |
+--- |
410 |
+ arch/x86/kernel/dumpstack.c | 3 ++- |
411 |
+ 1 file changed, 2 insertions(+), 1 deletion(-) |
412 |
+ |
413 |
+diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c |
414 |
+index d0bb176a7261a..afbecff161d16 100644 |
415 |
+--- a/arch/x86/kernel/dumpstack.c |
416 |
++++ b/arch/x86/kernel/dumpstack.c |
417 |
+@@ -115,6 +115,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
418 |
+ |
419 |
+ unwind_start(&state, task, regs, stack); |
420 |
+ stack = stack ? : get_stack_pointer(task, regs); |
421 |
++ regs = unwind_get_entry_regs(&state, &partial); |
422 |
+ |
423 |
+ /* |
424 |
+ * Iterate through the stacks, starting with the current stack pointer. |
425 |
+@@ -132,7 +133,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
426 |
+ * - hardirq stack |
427 |
+ * - entry stack |
428 |
+ */ |
429 |
+- for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { |
430 |
++ for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { |
431 |
+ const char *stack_name; |
432 |
+ |
433 |
+ if (get_stack_info(stack, task, &stack_info, &visit_mask)) { |
434 |
+ |
435 |
+From d7732ba55c4b6a2da339bb12589c515830cfac2c Mon Sep 17 00:00:00 2001 |
436 |
+From: Thomas Gleixner <tglx@××××××××××.de> |
437 |
+Date: Wed, 3 Jan 2018 19:52:04 +0100 |
438 |
+Subject: [PATCH 6/7] x86/pti: Switch to kernel CR3 at early in |
439 |
+ entry_SYSCALL_compat() |
440 |
+ |
441 |
+The preparation for PTI which added CR3 switching to the entry code |
442 |
+misplaced the CR3 switch in entry_SYSCALL_compat(). |
443 |
+ |
444 |
+With PTI enabled the entry code tries to access a per cpu variable after |
445 |
+switching to kernel GS. This fails because that variable is not mapped to |
446 |
+user space. This results in a double fault and in the worst case a kernel |
447 |
+crash. |
448 |
+ |
449 |
+Move the switch ahead of the access and clobber RSP which has been saved |
450 |
+already. |
451 |
+ |
452 |
+Fixes: 8a09317b895f ("x86/mm/pti: Prepare the x86/entry assembly code for entry/exit CR3 switching") |
453 |
+Reported-by: Lars Wendler <wendler.lars@×××.de> |
454 |
+Reported-by: Laura Abbott <labbott@××××××.com> |
455 |
+Signed-off-by: Thomas Gleixner <tglx@××××××××××.de> |
456 |
+Cc: Borislav Betkov <bp@××××××.de> |
457 |
+Cc: Andy Lutomirski <luto@××××××.org>, |
458 |
+Cc: Dave Hansen <dave.hansen@×××××××××××.com>, |
459 |
+Cc: Peter Zijlstra <peterz@×××××××××.org>, |
460 |
+Cc: Greg KH <gregkh@×××××××××××××××.org>, , |
461 |
+Cc: Boris Ostrovsky <boris.ostrovsky@××××××.com>, |
462 |
+Cc: Juergen Gross <jgross@××××.com> |
463 |
+Cc: stable@×××××××××××.org |
464 |
+Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801031949200.1957@nanos |
465 |
+--- |
466 |
+ arch/x86/entry/entry_64_compat.S | 13 ++++++------- |
467 |
+ 1 file changed, 6 insertions(+), 7 deletions(-) |
468 |
+ |
469 |
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S |
470 |
+index 40f17009ec20c..98d5358e4041a 100644 |
471 |
+--- a/arch/x86/entry/entry_64_compat.S |
472 |
++++ b/arch/x86/entry/entry_64_compat.S |
473 |
+@@ -190,8 +190,13 @@ ENTRY(entry_SYSCALL_compat) |
474 |
+ /* Interrupts are off on entry. */ |
475 |
+ swapgs |
476 |
+ |
477 |
+- /* Stash user ESP and switch to the kernel stack. */ |
478 |
++ /* Stash user ESP */ |
479 |
+ movl %esp, %r8d |
480 |
++ |
481 |
++ /* Use %rsp as scratch reg. User ESP is stashed in r8 */ |
482 |
++ SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp |
483 |
++ |
484 |
++ /* Switch to the kernel stack */ |
485 |
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
486 |
+ |
487 |
+ /* Construct struct pt_regs on stack */ |
488 |
+@@ -219,12 +224,6 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) |
489 |
+ pushq $0 /* pt_regs->r14 = 0 */ |
490 |
+ pushq $0 /* pt_regs->r15 = 0 */ |
491 |
+ |
492 |
+- /* |
493 |
+- * We just saved %rdi so it is safe to clobber. It is not |
494 |
+- * preserved during the C calls inside TRACE_IRQS_OFF anyway. |
495 |
+- */ |
496 |
+- SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi |
497 |
+- |
498 |
+ /* |
499 |
+ * User mode is traced as though IRQs are on, and SYSENTER |
500 |
+ * turned them off. |
501 |
+ |
502 |
+From 2fd9c41aea47f4ad071accf94b94f94f2c4d31eb Mon Sep 17 00:00:00 2001 |
503 |
+From: Nick Desaulniers <ndesaulniers@××××××.com> |
504 |
+Date: Wed, 3 Jan 2018 12:39:52 -0800 |
505 |
+Subject: [PATCH 7/7] x86/process: Define cpu_tss_rw in same section as |
506 |
+ declaration |
507 |
+ |
508 |
+cpu_tss_rw is declared with DECLARE_PER_CPU_PAGE_ALIGNED |
509 |
+but then defined with DEFINE_PER_CPU_SHARED_ALIGNED |
510 |
+leading to section mismatch warnings. |
511 |
+ |
512 |
+Use DEFINE_PER_CPU_PAGE_ALIGNED consistently. This is necessary because |
513 |
+it's mapped to the cpu entry area and must be page aligned. |
514 |
+ |
515 |
+[ tglx: Massaged changelog a bit ] |
516 |
+ |
517 |
+Fixes: 1a935bc3d4ea ("x86/entry: Move SYSENTER_stack to the beginning of struct tss_struct") |
518 |
+Suggested-by: Thomas Gleixner <tglx@××××××××××.de> |
519 |
+Signed-off-by: Nick Desaulniers <ndesaulniers@××××××.com> |
520 |
+Signed-off-by: Thomas Gleixner <tglx@××××××××××.de> |
521 |
+Cc: thomas.lendacky@×××.com |
522 |
+Cc: Borislav Petkov <bpetkov@××××.de> |
523 |
+Cc: tklauser@×××××××.ch |
524 |
+Cc: minipli@××××××××××.com |
525 |
+Cc: me@××××××××.com |
526 |
+Cc: namit@××××××.com |
527 |
+Cc: luto@××××××.org |
528 |
+Cc: jpoimboe@××××××.com |
529 |
+Cc: tj@××××××.org |
530 |
+Cc: cl@×××××.com |
531 |
+Cc: bp@××××.de |
532 |
+Cc: thgarnie@××××××.com |
533 |
+Cc: kirill.shutemov@×××××××××××.com |
534 |
+Cc: stable@×××××××××××.org |
535 |
+Link: https://lkml.kernel.org/r/20180103203954.183360-1-ndesaulniers@××××××.com |
536 |
+--- |
537 |
+ arch/x86/kernel/process.c | 2 +- |
538 |
+ 1 file changed, 1 insertion(+), 1 deletion(-) |
539 |
+ |
540 |
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c |
541 |
+index 5174159784093..3cb2486c47e48 100644 |
542 |
+--- a/arch/x86/kernel/process.c |
543 |
++++ b/arch/x86/kernel/process.c |
544 |
+@@ -47,7 +47,7 @@ |
545 |
+ * section. Since TSS's are completely CPU-local, we want them |
546 |
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong. |
547 |
+ */ |
548 |
+-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = { |
549 |
++__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = { |
550 |
+ .x86_tss = { |
551 |
+ /* |
552 |
+ * .sp0 is only used when entering ring 0 from a lower |
553 |
|
554 |
diff --git a/1701_make_sure_the_user_kernel_PTEs_match.patch b/1701_make_sure_the_user_kernel_PTEs_match.patch |
555 |
deleted file mode 100644 |
556 |
index 601940b..0000000 |
557 |
--- a/1701_make_sure_the_user_kernel_PTEs_match.patch |
558 |
+++ /dev/null |
559 |
@@ -1,56 +0,0 @@ |
560 |
-From 52994c256df36fda9a715697431cba9daecb6b11 Mon Sep 17 00:00:00 2001 |
561 |
-From: Thomas Gleixner <tglx@××××××××××.de> |
562 |
-Date: Wed, 3 Jan 2018 15:57:59 +0100 |
563 |
-Subject: x86/pti: Make sure the user/kernel PTEs match |
564 |
- |
565 |
-Meelis reported that his K8 Athlon64 emits MCE warnings when PTI is |
566 |
-enabled: |
567 |
- |
568 |
-[Hardware Error]: Error Addr: 0x0000ffff81e000e0 |
569 |
-[Hardware Error]: MC1 Error: L1 TLB multimatch. |
570 |
-[Hardware Error]: cache level: L1, tx: INSN |
571 |
- |
572 |
-The address is in the entry area, which is mapped into kernel _AND_ user |
573 |
-space. That's special because we switch CR3 while we are executing |
574 |
-there. |
575 |
- |
576 |
-User mapping: |
577 |
-0xffffffff81e00000-0xffffffff82000000 2M ro PSE GLB x pmd |
578 |
- |
579 |
-Kernel mapping: |
580 |
-0xffffffff81000000-0xffffffff82000000 16M ro PSE x pmd |
581 |
- |
582 |
-So the K8 is complaining that the TLB entries differ. They differ in the |
583 |
-GLB bit. |
584 |
- |
585 |
-Drop the GLB bit when installing the user shared mapping. |
586 |
- |
587 |
-Fixes: 6dc72c3cbca0 ("x86/mm/pti: Share entry text PMD") |
588 |
-Reported-by: Meelis Roos <mroos@×××××.ee> |
589 |
-Signed-off-by: Thomas Gleixner <tglx@××××××××××.de> |
590 |
-Tested-by: Meelis Roos <mroos@×××××.ee> |
591 |
-Cc: Borislav Petkov <bp@××××××.de> |
592 |
-Cc: Tom Lendacky <thomas.lendacky@×××.com> |
593 |
-Cc: stable@×××××××××××.org |
594 |
-Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801031407180.1957@nanos |
595 |
---- |
596 |
- arch/x86/mm/pti.c | 3 ++- |
597 |
- 1 file changed, 2 insertions(+), 1 deletion(-) |
598 |
- |
599 |
-diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c |
600 |
-index bce8aea..2da28ba 100644 |
601 |
---- a/arch/x86/mm/pti.c |
602 |
-+++ b/arch/x86/mm/pti.c |
603 |
-@@ -367,7 +367,8 @@ static void __init pti_setup_espfix64(void) |
604 |
- static void __init pti_clone_entry_text(void) |
605 |
- { |
606 |
- pti_clone_pmds((unsigned long) __entry_text_start, |
607 |
-- (unsigned long) __irqentry_text_end, _PAGE_RW); |
608 |
-+ (unsigned long) __irqentry_text_end, |
609 |
-+ _PAGE_RW | _PAGE_GLOBAL); |
610 |
- } |
611 |
- |
612 |
- /* |
613 |
--- |
614 |
-cgit v1.1 |
615 |
- |
616 |
|
617 |
diff --git a/1702_switch_to_kernel_CR3_at_early_in_entry_SYSCALL_compat.patch b/1702_switch_to_kernel_CR3_at_early_in_entry_SYSCALL_compat.patch |
618 |
deleted file mode 100644 |
619 |
index 12d9555..0000000 |
620 |
--- a/1702_switch_to_kernel_CR3_at_early_in_entry_SYSCALL_compat.patch |
621 |
+++ /dev/null |
622 |
@@ -1,68 +0,0 @@ |
623 |
-From d7732ba55c4b6a2da339bb12589c515830cfac2c Mon Sep 17 00:00:00 2001 |
624 |
-From: Thomas Gleixner <tglx@××××××××××.de> |
625 |
-Date: Wed, 3 Jan 2018 19:52:04 +0100 |
626 |
-Subject: x86/pti: Switch to kernel CR3 at early in entry_SYSCALL_compat() |
627 |
- |
628 |
-The preparation for PTI which added CR3 switching to the entry code |
629 |
-misplaced the CR3 switch in entry_SYSCALL_compat(). |
630 |
- |
631 |
-With PTI enabled the entry code tries to access a per cpu variable after |
632 |
-switching to kernel GS. This fails because that variable is not mapped to |
633 |
-user space. This results in a double fault and in the worst case a kernel |
634 |
-crash. |
635 |
- |
636 |
-Move the switch ahead of the access and clobber RSP which has been saved |
637 |
-already. |
638 |
- |
639 |
-Fixes: 8a09317b895f ("x86/mm/pti: Prepare the x86/entry assembly code for entry/exit CR3 switching") |
640 |
-Reported-by: Lars Wendler <wendler.lars@×××.de> |
641 |
-Reported-by: Laura Abbott <labbott@××××××.com> |
642 |
-Signed-off-by: Thomas Gleixner <tglx@××××××××××.de> |
643 |
-Cc: Borislav Betkov <bp@××××××.de> |
644 |
-Cc: Andy Lutomirski <luto@××××××.org>, |
645 |
-Cc: Dave Hansen <dave.hansen@×××××××××××.com>, |
646 |
-Cc: Peter Zijlstra <peterz@×××××××××.org>, |
647 |
-Cc: Greg KH <gregkh@×××××××××××××××.org>, , |
648 |
-Cc: Boris Ostrovsky <boris.ostrovsky@××××××.com>, |
649 |
-Cc: Juergen Gross <jgross@××××.com> |
650 |
-Cc: stable@×××××××××××.org |
651 |
-Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801031949200.1957@nanos |
652 |
---- |
653 |
- arch/x86/entry/entry_64_compat.S | 13 ++++++------- |
654 |
- 1 file changed, 6 insertions(+), 7 deletions(-) |
655 |
- |
656 |
-diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S |
657 |
-index 40f1700..98d5358 100644 |
658 |
---- a/arch/x86/entry/entry_64_compat.S |
659 |
-+++ b/arch/x86/entry/entry_64_compat.S |
660 |
-@@ -190,8 +190,13 @@ ENTRY(entry_SYSCALL_compat) |
661 |
- /* Interrupts are off on entry. */ |
662 |
- swapgs |
663 |
- |
664 |
-- /* Stash user ESP and switch to the kernel stack. */ |
665 |
-+ /* Stash user ESP */ |
666 |
- movl %esp, %r8d |
667 |
-+ |
668 |
-+ /* Use %rsp as scratch reg. User ESP is stashed in r8 */ |
669 |
-+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp |
670 |
-+ |
671 |
-+ /* Switch to the kernel stack */ |
672 |
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
673 |
- |
674 |
- /* Construct struct pt_regs on stack */ |
675 |
-@@ -220,12 +225,6 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) |
676 |
- pushq $0 /* pt_regs->r15 = 0 */ |
677 |
- |
678 |
- /* |
679 |
-- * We just saved %rdi so it is safe to clobber. It is not |
680 |
-- * preserved during the C calls inside TRACE_IRQS_OFF anyway. |
681 |
-- */ |
682 |
-- SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi |
683 |
-- |
684 |
-- /* |
685 |
- * User mode is traced as though IRQs are on, and SYSENTER |
686 |
- * turned them off. |
687 |
- */ |
688 |
--- |
689 |
-cgit v1.1 |
690 |
- |