1 |
Author: mpagano |
2 |
Date: 2012-02-27 23:49:12 +0000 (Mon, 27 Feb 2012) |
3 |
New Revision: 2097 |
4 |
|
5 |
Added: |
6 |
genpatches-2.6/trunk/3.2/1007_linux-3.2.8.patch |
7 |
Modified: |
8 |
genpatches-2.6/trunk/3.2/0000_README |
9 |
Log: |
10 |
Linux patch 3.2.8 |
11 |
|
12 |
Modified: genpatches-2.6/trunk/3.2/0000_README |
13 |
=================================================================== |
14 |
--- genpatches-2.6/trunk/3.2/0000_README 2012-02-21 17:01:23 UTC (rev 2096) |
15 |
+++ genpatches-2.6/trunk/3.2/0000_README 2012-02-27 23:49:12 UTC (rev 2097) |
16 |
@@ -68,6 +68,10 @@ |
17 |
From: http://www.kernel.org |
18 |
Desc: Linux 3.2.7 |
19 |
|
20 |
+Patch: 1007_linux-3.2.8.patch |
21 |
+From: http://www.kernel.org |
22 |
+Desc: Linux 3.2.8 |
23 |
+ |
24 |
Patch: 2400_kcopy-patch-for-infiniband-driver.patch |
25 |
From: Alexey Shvetsov <alexxy@g.o> |
26 |
Desc: Zero copy for infiniband psm userspace driver |
27 |
|
28 |
Added: genpatches-2.6/trunk/3.2/1007_linux-3.2.8.patch |
29 |
=================================================================== |
30 |
--- genpatches-2.6/trunk/3.2/1007_linux-3.2.8.patch (rev 0) |
31 |
+++ genpatches-2.6/trunk/3.2/1007_linux-3.2.8.patch 2012-02-27 23:49:12 UTC (rev 2097) |
32 |
@@ -0,0 +1,666 @@ |
33 |
+diff --git a/Makefile b/Makefile |
34 |
+index d1bdc90..7df8a84 100644 |
35 |
+--- a/Makefile |
36 |
++++ b/Makefile |
37 |
+@@ -1,6 +1,6 @@ |
38 |
+ VERSION = 3 |
39 |
+ PATCHLEVEL = 2 |
40 |
+-SUBLEVEL = 7 |
41 |
++SUBLEVEL = 8 |
42 |
+ EXTRAVERSION = |
43 |
+ NAME = Saber-toothed Squirrel |
44 |
+ |
45 |
+diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h |
46 |
+index c9e09ea..a850b4d 100644 |
47 |
+--- a/arch/x86/include/asm/i387.h |
48 |
++++ b/arch/x86/include/asm/i387.h |
49 |
+@@ -29,8 +29,8 @@ extern unsigned int sig_xstate_size; |
50 |
+ extern void fpu_init(void); |
51 |
+ extern void mxcsr_feature_mask_init(void); |
52 |
+ extern int init_fpu(struct task_struct *child); |
53 |
+-extern asmlinkage void math_state_restore(void); |
54 |
+-extern void __math_state_restore(void); |
55 |
++extern void __math_state_restore(struct task_struct *); |
56 |
++extern void math_state_restore(void); |
57 |
+ extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); |
58 |
+ |
59 |
+ extern user_regset_active_fn fpregs_active, xfpregs_active; |
60 |
+@@ -212,19 +212,11 @@ static inline void fpu_fxsave(struct fpu *fpu) |
61 |
+ |
62 |
+ #endif /* CONFIG_X86_64 */ |
63 |
+ |
64 |
+-/* We need a safe address that is cheap to find and that is already |
65 |
+- in L1 during context switch. The best choices are unfortunately |
66 |
+- different for UP and SMP */ |
67 |
+-#ifdef CONFIG_SMP |
68 |
+-#define safe_address (__per_cpu_offset[0]) |
69 |
+-#else |
70 |
+-#define safe_address (kstat_cpu(0).cpustat.user) |
71 |
+-#endif |
72 |
+- |
73 |
+ /* |
74 |
+- * These must be called with preempt disabled |
75 |
++ * These must be called with preempt disabled. Returns |
76 |
++ * 'true' if the FPU state is still intact. |
77 |
+ */ |
78 |
+-static inline void fpu_save_init(struct fpu *fpu) |
79 |
++static inline int fpu_save_init(struct fpu *fpu) |
80 |
+ { |
81 |
+ if (use_xsave()) { |
82 |
+ fpu_xsave(fpu); |
83 |
+@@ -233,33 +225,33 @@ static inline void fpu_save_init(struct fpu *fpu) |
84 |
+ * xsave header may indicate the init state of the FP. |
85 |
+ */ |
86 |
+ if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) |
87 |
+- return; |
88 |
++ return 1; |
89 |
+ } else if (use_fxsr()) { |
90 |
+ fpu_fxsave(fpu); |
91 |
+ } else { |
92 |
+ asm volatile("fnsave %[fx]; fwait" |
93 |
+ : [fx] "=m" (fpu->state->fsave)); |
94 |
+- return; |
95 |
++ return 0; |
96 |
+ } |
97 |
+ |
98 |
+- if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) |
99 |
++ /* |
100 |
++ * If exceptions are pending, we need to clear them so |
101 |
++ * that we don't randomly get exceptions later. |
102 |
++ * |
103 |
++ * FIXME! Is this perhaps only true for the old-style |
104 |
++ * irq13 case? Maybe we could leave the x87 state |
105 |
++ * intact otherwise? |
106 |
++ */ |
107 |
++ if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { |
108 |
+ asm volatile("fnclex"); |
109 |
+- |
110 |
+- /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception |
111 |
+- is pending. Clear the x87 state here by setting it to fixed |
112 |
+- values. safe_address is a random variable that should be in L1 */ |
113 |
+- alternative_input( |
114 |
+- ASM_NOP8 ASM_NOP2, |
115 |
+- "emms\n\t" /* clear stack tags */ |
116 |
+- "fildl %P[addr]", /* set F?P to defined value */ |
117 |
+- X86_FEATURE_FXSAVE_LEAK, |
118 |
+- [addr] "m" (safe_address)); |
119 |
++ return 0; |
120 |
++ } |
121 |
++ return 1; |
122 |
+ } |
123 |
+ |
124 |
+-static inline void __save_init_fpu(struct task_struct *tsk) |
125 |
++static inline int __save_init_fpu(struct task_struct *tsk) |
126 |
+ { |
127 |
+- fpu_save_init(&tsk->thread.fpu); |
128 |
+- task_thread_info(tsk)->status &= ~TS_USEDFPU; |
129 |
++ return fpu_save_init(&tsk->thread.fpu); |
130 |
+ } |
131 |
+ |
132 |
+ static inline int fpu_fxrstor_checking(struct fpu *fpu) |
133 |
+@@ -281,39 +273,185 @@ static inline int restore_fpu_checking(struct task_struct *tsk) |
134 |
+ } |
135 |
+ |
136 |
+ /* |
137 |
+- * Signal frame handlers... |
138 |
++ * Software FPU state helpers. Careful: these need to |
139 |
++ * be preemption protection *and* they need to be |
140 |
++ * properly paired with the CR0.TS changes! |
141 |
+ */ |
142 |
+-extern int save_i387_xstate(void __user *buf); |
143 |
+-extern int restore_i387_xstate(void __user *buf); |
144 |
++static inline int __thread_has_fpu(struct task_struct *tsk) |
145 |
++{ |
146 |
++ return tsk->thread.has_fpu; |
147 |
++} |
148 |
+ |
149 |
+-static inline void __unlazy_fpu(struct task_struct *tsk) |
150 |
++/* Must be paired with an 'stts' after! */ |
151 |
++static inline void __thread_clear_has_fpu(struct task_struct *tsk) |
152 |
+ { |
153 |
+- if (task_thread_info(tsk)->status & TS_USEDFPU) { |
154 |
+- __save_init_fpu(tsk); |
155 |
+- stts(); |
156 |
+- } else |
157 |
+- tsk->fpu_counter = 0; |
158 |
++ tsk->thread.has_fpu = 0; |
159 |
++} |
160 |
++ |
161 |
++/* Must be paired with a 'clts' before! */ |
162 |
++static inline void __thread_set_has_fpu(struct task_struct *tsk) |
163 |
++{ |
164 |
++ tsk->thread.has_fpu = 1; |
165 |
+ } |
166 |
+ |
167 |
++/* |
168 |
++ * Encapsulate the CR0.TS handling together with the |
169 |
++ * software flag. |
170 |
++ * |
171 |
++ * These generally need preemption protection to work, |
172 |
++ * do try to avoid using these on their own. |
173 |
++ */ |
174 |
++static inline void __thread_fpu_end(struct task_struct *tsk) |
175 |
++{ |
176 |
++ __thread_clear_has_fpu(tsk); |
177 |
++ stts(); |
178 |
++} |
179 |
++ |
180 |
++static inline void __thread_fpu_begin(struct task_struct *tsk) |
181 |
++{ |
182 |
++ clts(); |
183 |
++ __thread_set_has_fpu(tsk); |
184 |
++} |
185 |
++ |
186 |
++/* |
187 |
++ * FPU state switching for scheduling. |
188 |
++ * |
189 |
++ * This is a two-stage process: |
190 |
++ * |
191 |
++ * - switch_fpu_prepare() saves the old state and |
192 |
++ * sets the new state of the CR0.TS bit. This is |
193 |
++ * done within the context of the old process. |
194 |
++ * |
195 |
++ * - switch_fpu_finish() restores the new state as |
196 |
++ * necessary. |
197 |
++ */ |
198 |
++typedef struct { int preload; } fpu_switch_t; |
199 |
++ |
200 |
++/* |
201 |
++ * FIXME! We could do a totally lazy restore, but we need to |
202 |
++ * add a per-cpu "this was the task that last touched the FPU |
203 |
++ * on this CPU" variable, and the task needs to have a "I last |
204 |
++ * touched the FPU on this CPU" and check them. |
205 |
++ * |
206 |
++ * We don't do that yet, so "fpu_lazy_restore()" always returns |
207 |
++ * false, but some day.. |
208 |
++ */ |
209 |
++#define fpu_lazy_restore(tsk) (0) |
210 |
++#define fpu_lazy_state_intact(tsk) do { } while (0) |
211 |
++ |
212 |
++static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new) |
213 |
++{ |
214 |
++ fpu_switch_t fpu; |
215 |
++ |
216 |
++ fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; |
217 |
++ if (__thread_has_fpu(old)) { |
218 |
++ if (__save_init_fpu(old)) |
219 |
++ fpu_lazy_state_intact(old); |
220 |
++ __thread_clear_has_fpu(old); |
221 |
++ old->fpu_counter++; |
222 |
++ |
223 |
++ /* Don't change CR0.TS if we just switch! */ |
224 |
++ if (fpu.preload) { |
225 |
++ __thread_set_has_fpu(new); |
226 |
++ prefetch(new->thread.fpu.state); |
227 |
++ } else |
228 |
++ stts(); |
229 |
++ } else { |
230 |
++ old->fpu_counter = 0; |
231 |
++ if (fpu.preload) { |
232 |
++ if (fpu_lazy_restore(new)) |
233 |
++ fpu.preload = 0; |
234 |
++ else |
235 |
++ prefetch(new->thread.fpu.state); |
236 |
++ __thread_fpu_begin(new); |
237 |
++ } |
238 |
++ } |
239 |
++ return fpu; |
240 |
++} |
241 |
++ |
242 |
++/* |
243 |
++ * By the time this gets called, we've already cleared CR0.TS and |
244 |
++ * given the process the FPU if we are going to preload the FPU |
245 |
++ * state - all we need to do is to conditionally restore the register |
246 |
++ * state itself. |
247 |
++ */ |
248 |
++static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) |
249 |
++{ |
250 |
++ if (fpu.preload) |
251 |
++ __math_state_restore(new); |
252 |
++} |
253 |
++ |
254 |
++/* |
255 |
++ * Signal frame handlers... |
256 |
++ */ |
257 |
++extern int save_i387_xstate(void __user *buf); |
258 |
++extern int restore_i387_xstate(void __user *buf); |
259 |
++ |
260 |
+ static inline void __clear_fpu(struct task_struct *tsk) |
261 |
+ { |
262 |
+- if (task_thread_info(tsk)->status & TS_USEDFPU) { |
263 |
++ if (__thread_has_fpu(tsk)) { |
264 |
+ /* Ignore delayed exceptions from user space */ |
265 |
+ asm volatile("1: fwait\n" |
266 |
+ "2:\n" |
267 |
+ _ASM_EXTABLE(1b, 2b)); |
268 |
+- task_thread_info(tsk)->status &= ~TS_USEDFPU; |
269 |
+- stts(); |
270 |
++ __thread_fpu_end(tsk); |
271 |
+ } |
272 |
+ } |
273 |
+ |
274 |
++/* |
275 |
++ * Were we in an interrupt that interrupted kernel mode? |
276 |
++ * |
277 |
++ * We can do a kernel_fpu_begin/end() pair *ONLY* if that |
278 |
++ * pair does nothing at all: the thread must not have fpu (so |
279 |
++ * that we don't try to save the FPU state), and TS must |
280 |
++ * be set (so that the clts/stts pair does nothing that is |
281 |
++ * visible in the interrupted kernel thread). |
282 |
++ */ |
283 |
++static inline bool interrupted_kernel_fpu_idle(void) |
284 |
++{ |
285 |
++ return !__thread_has_fpu(current) && |
286 |
++ (read_cr0() & X86_CR0_TS); |
287 |
++} |
288 |
++ |
289 |
++/* |
290 |
++ * Were we in user mode (or vm86 mode) when we were |
291 |
++ * interrupted? |
292 |
++ * |
293 |
++ * Doing kernel_fpu_begin/end() is ok if we are running |
294 |
++ * in an interrupt context from user mode - we'll just |
295 |
++ * save the FPU state as required. |
296 |
++ */ |
297 |
++static inline bool interrupted_user_mode(void) |
298 |
++{ |
299 |
++ struct pt_regs *regs = get_irq_regs(); |
300 |
++ return regs && user_mode_vm(regs); |
301 |
++} |
302 |
++ |
303 |
++/* |
304 |
++ * Can we use the FPU in kernel mode with the |
305 |
++ * whole "kernel_fpu_begin/end()" sequence? |
306 |
++ * |
307 |
++ * It's always ok in process context (ie "not interrupt") |
308 |
++ * but it is sometimes ok even from an irq. |
309 |
++ */ |
310 |
++static inline bool irq_fpu_usable(void) |
311 |
++{ |
312 |
++ return !in_interrupt() || |
313 |
++ interrupted_user_mode() || |
314 |
++ interrupted_kernel_fpu_idle(); |
315 |
++} |
316 |
++ |
317 |
+ static inline void kernel_fpu_begin(void) |
318 |
+ { |
319 |
+- struct thread_info *me = current_thread_info(); |
320 |
++ struct task_struct *me = current; |
321 |
++ |
322 |
++ WARN_ON_ONCE(!irq_fpu_usable()); |
323 |
+ preempt_disable(); |
324 |
+- if (me->status & TS_USEDFPU) |
325 |
+- __save_init_fpu(me->task); |
326 |
+- else |
327 |
++ if (__thread_has_fpu(me)) { |
328 |
++ __save_init_fpu(me); |
329 |
++ __thread_clear_has_fpu(me); |
330 |
++ /* We do 'stts()' in kernel_fpu_end() */ |
331 |
++ } else |
332 |
+ clts(); |
333 |
+ } |
334 |
+ |
335 |
+@@ -323,14 +461,6 @@ static inline void kernel_fpu_end(void) |
336 |
+ preempt_enable(); |
337 |
+ } |
338 |
+ |
339 |
+-static inline bool irq_fpu_usable(void) |
340 |
+-{ |
341 |
+- struct pt_regs *regs; |
342 |
+- |
343 |
+- return !in_interrupt() || !(regs = get_irq_regs()) || \ |
344 |
+- user_mode(regs) || (read_cr0() & X86_CR0_TS); |
345 |
+-} |
346 |
+- |
347 |
+ /* |
348 |
+ * Some instructions like VIA's padlock instructions generate a spurious |
349 |
+ * DNA fault but don't modify SSE registers. And these instructions |
350 |
+@@ -363,20 +493,64 @@ static inline void irq_ts_restore(int TS_state) |
351 |
+ } |
352 |
+ |
353 |
+ /* |
354 |
++ * The question "does this thread have fpu access?" |
355 |
++ * is slightly racy, since preemption could come in |
356 |
++ * and revoke it immediately after the test. |
357 |
++ * |
358 |
++ * However, even in that very unlikely scenario, |
359 |
++ * we can just assume we have FPU access - typically |
360 |
++ * to save the FP state - we'll just take a #NM |
361 |
++ * fault and get the FPU access back. |
362 |
++ * |
363 |
++ * The actual user_fpu_begin/end() functions |
364 |
++ * need to be preemption-safe, though. |
365 |
++ * |
366 |
++ * NOTE! user_fpu_end() must be used only after you |
367 |
++ * have saved the FP state, and user_fpu_begin() must |
368 |
++ * be used only immediately before restoring it. |
369 |
++ * These functions do not do any save/restore on |
370 |
++ * their own. |
371 |
++ */ |
372 |
++static inline int user_has_fpu(void) |
373 |
++{ |
374 |
++ return __thread_has_fpu(current); |
375 |
++} |
376 |
++ |
377 |
++static inline void user_fpu_end(void) |
378 |
++{ |
379 |
++ preempt_disable(); |
380 |
++ __thread_fpu_end(current); |
381 |
++ preempt_enable(); |
382 |
++} |
383 |
++ |
384 |
++static inline void user_fpu_begin(void) |
385 |
++{ |
386 |
++ preempt_disable(); |
387 |
++ if (!user_has_fpu()) |
388 |
++ __thread_fpu_begin(current); |
389 |
++ preempt_enable(); |
390 |
++} |
391 |
++ |
392 |
++/* |
393 |
+ * These disable preemption on their own and are safe |
394 |
+ */ |
395 |
+ static inline void save_init_fpu(struct task_struct *tsk) |
396 |
+ { |
397 |
++ WARN_ON_ONCE(!__thread_has_fpu(tsk)); |
398 |
+ preempt_disable(); |
399 |
+ __save_init_fpu(tsk); |
400 |
+- stts(); |
401 |
++ __thread_fpu_end(tsk); |
402 |
+ preempt_enable(); |
403 |
+ } |
404 |
+ |
405 |
+ static inline void unlazy_fpu(struct task_struct *tsk) |
406 |
+ { |
407 |
+ preempt_disable(); |
408 |
+- __unlazy_fpu(tsk); |
409 |
++ if (__thread_has_fpu(tsk)) { |
410 |
++ __save_init_fpu(tsk); |
411 |
++ __thread_fpu_end(tsk); |
412 |
++ } else |
413 |
++ tsk->fpu_counter = 0; |
414 |
+ preempt_enable(); |
415 |
+ } |
416 |
+ |
417 |
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h |
418 |
+index b650435..bb3ee36 100644 |
419 |
+--- a/arch/x86/include/asm/processor.h |
420 |
++++ b/arch/x86/include/asm/processor.h |
421 |
+@@ -456,6 +456,7 @@ struct thread_struct { |
422 |
+ unsigned long trap_no; |
423 |
+ unsigned long error_code; |
424 |
+ /* floating point and extended processor state */ |
425 |
++ unsigned long has_fpu; |
426 |
+ struct fpu fpu; |
427 |
+ #ifdef CONFIG_X86_32 |
428 |
+ /* Virtual 86 mode info */ |
429 |
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h |
430 |
+index a1fe5c1..d7ef849 100644 |
431 |
+--- a/arch/x86/include/asm/thread_info.h |
432 |
++++ b/arch/x86/include/asm/thread_info.h |
433 |
+@@ -242,8 +242,6 @@ static inline struct thread_info *current_thread_info(void) |
434 |
+ * ever touches our thread-synchronous status, so we don't |
435 |
+ * have to worry about atomic accesses. |
436 |
+ */ |
437 |
+-#define TS_USEDFPU 0x0001 /* FPU was used by this task |
438 |
+- this quantum (SMP) */ |
439 |
+ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ |
440 |
+ #define TS_POLLING 0x0004 /* idle task polling need_resched, |
441 |
+ skip sending interrupt */ |
442 |
+diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c |
443 |
+index 795b79f..8598296 100644 |
444 |
+--- a/arch/x86/kernel/process_32.c |
445 |
++++ b/arch/x86/kernel/process_32.c |
446 |
+@@ -297,22 +297,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
447 |
+ *next = &next_p->thread; |
448 |
+ int cpu = smp_processor_id(); |
449 |
+ struct tss_struct *tss = &per_cpu(init_tss, cpu); |
450 |
+- bool preload_fpu; |
451 |
++ fpu_switch_t fpu; |
452 |
+ |
453 |
+ /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ |
454 |
+ |
455 |
+- /* |
456 |
+- * If the task has used fpu the last 5 timeslices, just do a full |
457 |
+- * restore of the math state immediately to avoid the trap; the |
458 |
+- * chances of needing FPU soon are obviously high now |
459 |
+- */ |
460 |
+- preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; |
461 |
+- |
462 |
+- __unlazy_fpu(prev_p); |
463 |
+- |
464 |
+- /* we're going to use this soon, after a few expensive things */ |
465 |
+- if (preload_fpu) |
466 |
+- prefetch(next->fpu.state); |
467 |
++ fpu = switch_fpu_prepare(prev_p, next_p); |
468 |
+ |
469 |
+ /* |
470 |
+ * Reload esp0. |
471 |
+@@ -352,11 +341,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
472 |
+ task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) |
473 |
+ __switch_to_xtra(prev_p, next_p, tss); |
474 |
+ |
475 |
+- /* If we're going to preload the fpu context, make sure clts |
476 |
+- is run while we're batching the cpu state updates. */ |
477 |
+- if (preload_fpu) |
478 |
+- clts(); |
479 |
+- |
480 |
+ /* |
481 |
+ * Leave lazy mode, flushing any hypercalls made here. |
482 |
+ * This must be done before restoring TLS segments so |
483 |
+@@ -366,15 +350,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
484 |
+ */ |
485 |
+ arch_end_context_switch(next_p); |
486 |
+ |
487 |
+- if (preload_fpu) |
488 |
+- __math_state_restore(); |
489 |
+- |
490 |
+ /* |
491 |
+ * Restore %gs if needed (which is common) |
492 |
+ */ |
493 |
+ if (prev->gs | next->gs) |
494 |
+ lazy_load_gs(next->gs); |
495 |
+ |
496 |
++ switch_fpu_finish(next_p, fpu); |
497 |
++ |
498 |
+ percpu_write(current_task, next_p); |
499 |
+ |
500 |
+ return prev_p; |
501 |
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c |
502 |
+index 3bd7e6e..6a364a6 100644 |
503 |
+--- a/arch/x86/kernel/process_64.c |
504 |
++++ b/arch/x86/kernel/process_64.c |
505 |
+@@ -381,18 +381,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
506 |
+ int cpu = smp_processor_id(); |
507 |
+ struct tss_struct *tss = &per_cpu(init_tss, cpu); |
508 |
+ unsigned fsindex, gsindex; |
509 |
+- bool preload_fpu; |
510 |
++ fpu_switch_t fpu; |
511 |
+ |
512 |
+- /* |
513 |
+- * If the task has used fpu the last 5 timeslices, just do a full |
514 |
+- * restore of the math state immediately to avoid the trap; the |
515 |
+- * chances of needing FPU soon are obviously high now |
516 |
+- */ |
517 |
+- preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; |
518 |
+- |
519 |
+- /* we're going to use this soon, after a few expensive things */ |
520 |
+- if (preload_fpu) |
521 |
+- prefetch(next->fpu.state); |
522 |
++ fpu = switch_fpu_prepare(prev_p, next_p); |
523 |
+ |
524 |
+ /* |
525 |
+ * Reload esp0, LDT and the page table pointer: |
526 |
+@@ -422,13 +413,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
527 |
+ |
528 |
+ load_TLS(next, cpu); |
529 |
+ |
530 |
+- /* Must be after DS reload */ |
531 |
+- __unlazy_fpu(prev_p); |
532 |
+- |
533 |
+- /* Make sure cpu is ready for new context */ |
534 |
+- if (preload_fpu) |
535 |
+- clts(); |
536 |
+- |
537 |
+ /* |
538 |
+ * Leave lazy mode, flushing any hypercalls made here. |
539 |
+ * This must be done before restoring TLS segments so |
540 |
+@@ -469,6 +453,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
541 |
+ wrmsrl(MSR_KERNEL_GS_BASE, next->gs); |
542 |
+ prev->gsindex = gsindex; |
543 |
+ |
544 |
++ switch_fpu_finish(next_p, fpu); |
545 |
++ |
546 |
+ /* |
547 |
+ * Switch the PDA and FPU contexts. |
548 |
+ */ |
549 |
+@@ -487,13 +473,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
550 |
+ task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) |
551 |
+ __switch_to_xtra(prev_p, next_p, tss); |
552 |
+ |
553 |
+- /* |
554 |
+- * Preload the FPU context, now that we've determined that the |
555 |
+- * task is likely to be using it. |
556 |
+- */ |
557 |
+- if (preload_fpu) |
558 |
+- __math_state_restore(); |
559 |
+- |
560 |
+ return prev_p; |
561 |
+ } |
562 |
+ |
563 |
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c |
564 |
+index a8e3eb8..31d9d0f 100644 |
565 |
+--- a/arch/x86/kernel/traps.c |
566 |
++++ b/arch/x86/kernel/traps.c |
567 |
+@@ -562,25 +562,34 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) |
568 |
+ } |
569 |
+ |
570 |
+ /* |
571 |
+- * __math_state_restore assumes that cr0.TS is already clear and the |
572 |
+- * fpu state is all ready for use. Used during context switch. |
573 |
++ * This gets called with the process already owning the |
574 |
++ * FPU state, and with CR0.TS cleared. It just needs to |
575 |
++ * restore the FPU register state. |
576 |
+ */ |
577 |
+-void __math_state_restore(void) |
578 |
++void __math_state_restore(struct task_struct *tsk) |
579 |
+ { |
580 |
+- struct thread_info *thread = current_thread_info(); |
581 |
+- struct task_struct *tsk = thread->task; |
582 |
++ /* We need a safe address that is cheap to find and that is already |
583 |
++ in L1. We've just brought in "tsk->thread.has_fpu", so use that */ |
584 |
++#define safe_address (tsk->thread.has_fpu) |
585 |
++ |
586 |
++ /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception |
587 |
++ is pending. Clear the x87 state here by setting it to fixed |
588 |
++ values. safe_address is a random variable that should be in L1 */ |
589 |
++ alternative_input( |
590 |
++ ASM_NOP8 ASM_NOP2, |
591 |
++ "emms\n\t" /* clear stack tags */ |
592 |
++ "fildl %P[addr]", /* set F?P to defined value */ |
593 |
++ X86_FEATURE_FXSAVE_LEAK, |
594 |
++ [addr] "m" (safe_address)); |
595 |
+ |
596 |
+ /* |
597 |
+ * Paranoid restore. send a SIGSEGV if we fail to restore the state. |
598 |
+ */ |
599 |
+ if (unlikely(restore_fpu_checking(tsk))) { |
600 |
+- stts(); |
601 |
++ __thread_fpu_end(tsk); |
602 |
+ force_sig(SIGSEGV, tsk); |
603 |
+ return; |
604 |
+ } |
605 |
+- |
606 |
+- thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ |
607 |
+- tsk->fpu_counter++; |
608 |
+ } |
609 |
+ |
610 |
+ /* |
611 |
+@@ -590,13 +599,12 @@ void __math_state_restore(void) |
612 |
+ * Careful.. There are problems with IBM-designed IRQ13 behaviour. |
613 |
+ * Don't touch unless you *really* know how it works. |
614 |
+ * |
615 |
+- * Must be called with kernel preemption disabled (in this case, |
616 |
+- * local interrupts are disabled at the call-site in entry.S). |
617 |
++ * Must be called with kernel preemption disabled (eg with local |
618 |
++ * local interrupts as in the case of do_device_not_available). |
619 |
+ */ |
620 |
+-asmlinkage void math_state_restore(void) |
621 |
++void math_state_restore(void) |
622 |
+ { |
623 |
+- struct thread_info *thread = current_thread_info(); |
624 |
+- struct task_struct *tsk = thread->task; |
625 |
++ struct task_struct *tsk = current; |
626 |
+ |
627 |
+ if (!tsk_used_math(tsk)) { |
628 |
+ local_irq_enable(); |
629 |
+@@ -613,9 +621,10 @@ asmlinkage void math_state_restore(void) |
630 |
+ local_irq_disable(); |
631 |
+ } |
632 |
+ |
633 |
+- clts(); /* Allow maths ops (or we recurse) */ |
634 |
++ __thread_fpu_begin(tsk); |
635 |
++ __math_state_restore(tsk); |
636 |
+ |
637 |
+- __math_state_restore(); |
638 |
++ tsk->fpu_counter++; |
639 |
+ } |
640 |
+ EXPORT_SYMBOL_GPL(math_state_restore); |
641 |
+ |
642 |
+diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c |
643 |
+index a391134..7110911 100644 |
644 |
+--- a/arch/x86/kernel/xsave.c |
645 |
++++ b/arch/x86/kernel/xsave.c |
646 |
+@@ -47,7 +47,7 @@ void __sanitize_i387_state(struct task_struct *tsk) |
647 |
+ if (!fx) |
648 |
+ return; |
649 |
+ |
650 |
+- BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU); |
651 |
++ BUG_ON(__thread_has_fpu(tsk)); |
652 |
+ |
653 |
+ xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; |
654 |
+ |
655 |
+@@ -168,7 +168,7 @@ int save_i387_xstate(void __user *buf) |
656 |
+ if (!used_math()) |
657 |
+ return 0; |
658 |
+ |
659 |
+- if (task_thread_info(tsk)->status & TS_USEDFPU) { |
660 |
++ if (user_has_fpu()) { |
661 |
+ if (use_xsave()) |
662 |
+ err = xsave_user(buf); |
663 |
+ else |
664 |
+@@ -176,8 +176,7 @@ int save_i387_xstate(void __user *buf) |
665 |
+ |
666 |
+ if (err) |
667 |
+ return err; |
668 |
+- task_thread_info(tsk)->status &= ~TS_USEDFPU; |
669 |
+- stts(); |
670 |
++ user_fpu_end(); |
671 |
+ } else { |
672 |
+ sanitize_i387_state(tsk); |
673 |
+ if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, |
674 |
+@@ -292,10 +291,7 @@ int restore_i387_xstate(void __user *buf) |
675 |
+ return err; |
676 |
+ } |
677 |
+ |
678 |
+- if (!(task_thread_info(current)->status & TS_USEDFPU)) { |
679 |
+- clts(); |
680 |
+- task_thread_info(current)->status |= TS_USEDFPU; |
681 |
+- } |
682 |
++ user_fpu_begin(); |
683 |
+ if (use_xsave()) |
684 |
+ err = restore_user_xstate(buf); |
685 |
+ else |
686 |
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c |
687 |
+index 579a0b5..4ea7678 100644 |
688 |
+--- a/arch/x86/kvm/vmx.c |
689 |
++++ b/arch/x86/kvm/vmx.c |
690 |
+@@ -1456,7 +1456,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) |
691 |
+ #ifdef CONFIG_X86_64 |
692 |
+ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
693 |
+ #endif |
694 |
+- if (current_thread_info()->status & TS_USEDFPU) |
695 |
++ if (__thread_has_fpu(current)) |
696 |
+ clts(); |
697 |
+ load_gdt(&__get_cpu_var(host_gdt)); |
698 |
+ } |