1 |
commit: d4db41fa4a2549eb2cbd7f12423e34ced91cf9aa |
2 |
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
3 |
AuthorDate: Mon Jun 29 17:40:10 2020 +0000 |
4 |
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
5 |
CommitDate: Mon Jun 29 17:40:10 2020 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=d4db41fa |
7 |
|
8 |
Kernel patch enables gcc = v10.1+ optimizations for additional CPUs. |
9 |
|
10 |
Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org> |
11 |
|
12 |
0000_README | 4 + |
13 |
5013_enable-cpu-optimizations-for-gcc10.patch | 671 ++++++++++++++++++++++++++ |
14 |
2 files changed, 675 insertions(+) |
15 |
|
16 |
diff --git a/0000_README b/0000_README |
17 |
index ca703c6..92c9456 100644 |
18 |
--- a/0000_README |
19 |
+++ b/0000_README |
20 |
@@ -270,3 +270,7 @@ Desc: Kernel patch for >= gccv8 enables kernel >= v4.13 optimizations for addi |
21 |
Patch: 5012_enable-cpu-optimizations-for-gcc91.patch |
22 |
From: https://github.com/graysky2/kernel_gcc_patch/ |
23 |
Desc: Kernel patch enables gcc >= v9.1 optimizations for additional CPUs. |
24 |
+ |
25 |
+Patch: 5013_enable-cpu-optimizations-for-gcc10.patch |
26 |
+From: https://github.com/graysky2/kernel_gcc_patch/ |
27 |
+Desc: Kernel patch enables gcc = v10.1+ optimizations for additional CPUs. |
28 |
|
29 |
diff --git a/5013_enable-cpu-optimizations-for-gcc10.patch b/5013_enable-cpu-optimizations-for-gcc10.patch |
30 |
new file mode 100644 |
31 |
index 0000000..2a1e27d |
32 |
--- /dev/null |
33 |
+++ b/5013_enable-cpu-optimizations-for-gcc10.patch |
34 |
@@ -0,0 +1,671 @@ |
35 |
+WARNING |
36 |
+This patch works with gcc versions 10.1+ and with kernel versions 4.19-5.4 and should |
37 |
+NOT be applied when compiling on older versions of gcc due to key name changes |
38 |
+of the march flags introduced with the version 4.9 release of gcc.[1] |
39 |
+ |
40 |
+Use the older version of this patch hosted on the same github for older |
41 |
+versions of gcc. |
42 |
+ |
43 |
+FEATURES |
44 |
+This patch adds additional CPU options to the Linux kernel accessible under: |
45 |
+ Processor type and features ---> |
46 |
+ Processor family ---> |
47 |
+ |
48 |
+The expanded microarchitectures include: |
49 |
+* AMD Improved K8-family |
50 |
+* AMD K10-family |
51 |
+* AMD Family 10h (Barcelona) |
52 |
+* AMD Family 14h (Bobcat) |
53 |
+* AMD Family 16h (Jaguar) |
54 |
+* AMD Family 15h (Bulldozer) |
55 |
+* AMD Family 15h (Piledriver) |
56 |
+* AMD Family 15h (Steamroller) |
57 |
+* AMD Family 15h (Excavator) |
58 |
+* AMD Family 17h (Zen) |
59 |
+* AMD Family 17h (Zen 2) |
60 |
+* Intel Silvermont low-power processors |
61 |
+* Intel Goldmont low-power processors (Apollo Lake and Denverton) |
62 |
+* Intel Goldmont Plus low-power processors (Gemini Lake) |
63 |
+* Intel 1st Gen Core i3/i5/i7 (Nehalem) |
64 |
+* Intel 1.5 Gen Core i3/i5/i7 (Westmere) |
65 |
+* Intel 2nd Gen Core i3/i5/i7 (Sandybridge) |
66 |
+* Intel 3rd Gen Core i3/i5/i7 (Ivybridge) |
67 |
+* Intel 4th Gen Core i3/i5/i7 (Haswell) |
68 |
+* Intel 5th Gen Core i3/i5/i7 (Broadwell) |
69 |
+* Intel 6th Gen Core i3/i5/i7 (Skylake) |
70 |
+* Intel 6th Gen Core i7/i9 (Skylake X) |
71 |
+* Intel 8th Gen Core i3/i5/i7 (Cannon Lake) |
72 |
+* Intel 10th Gen Core i7/i9 (Ice Lake) |
73 |
+* Intel Xeon (Cascade Lake) |
74 |
+* Intel Xeon (Cooper Lake) |
75 |
+* Intel 3rd Gen 10nm++ i3/i5/i7/i9-family (Tiger Lake) |
76 |
+ |
77 |
+It also offers to compile passing the 'native' option which, "selects the CPU |
78 |
+to generate code for at compilation time by determining the processor type of |
79 |
+the compiling machine. Using -march=native enables all instruction subsets |
80 |
+supported by the local machine and will produce code optimized for the local |
81 |
+machine under the constraints of the selected instruction set."[2] |
82 |
+ |
83 |
+Do NOT try using the 'native' option on AMD Piledriver, Steamroller, or |
84 |
+Excavator CPUs (-march=bdver{2,3,4} flag). The build will error out due the |
85 |
+kernel's objtool issue with these.[3a,b] |
86 |
+ |
87 |
+MINOR NOTES |
88 |
+This patch also changes 'atom' to 'bonnell' in accordance with the gcc v4.9 |
89 |
+changes. Note that upstream is using the deprecated 'match=atom' flags when I |
90 |
+believe it should use the newer 'march=bonnell' flag for atom processors.[4] |
91 |
+ |
92 |
+It is not recommended to compile on Atom-CPUs with the 'native' option.[5] The |
93 |
+recommendation is to use the 'atom' option instead. |
94 |
+ |
95 |
+BENEFITS |
96 |
+Small but real speed increases are measurable using a make endpoint comparing |
97 |
+a generic kernel to one built with one of the respective microarchs. |
98 |
+ |
99 |
+See the following experimental evidence supporting this statement: |
100 |
+https://github.com/graysky2/kernel_gcc_patch |
101 |
+ |
102 |
+REQUIREMENTS |
103 |
+linux version 4.19-lts and 5.4-lts |
104 |
+gcc version >=10.1 |
105 |
+ |
106 |
+ACKNOWLEDGMENTS |
107 |
+This patch builds on the seminal work by Jeroen.[6] |
108 |
+ |
109 |
+REFERENCES |
110 |
+1. https://gcc.gnu.org/gcc-4.9/changes.html |
111 |
+2. https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html |
112 |
+3a. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95671#c11 |
113 |
+3b. https://github.com/graysky2/kernel_gcc_patch/issues/55 |
114 |
+4. https://bugzilla.kernel.org/show_bug.cgi?id=77461 |
115 |
+5. https://github.com/graysky2/kernel_gcc_patch/issues/15 |
116 |
+6. http://www.linuxforge.net/docs/linux/linux-gcc.php |
117 |
+ |
118 |
+--- a/arch/x86/include/asm/module.h 2020-06-10 15:35:02.000000000 -0400 |
119 |
++++ b/arch/x86/include/asm/module.h 2020-06-15 10:31:48.627486708 -0400 |
120 |
+@@ -25,6 +25,40 @@ struct mod_arch_specific { |
121 |
+ #define MODULE_PROC_FAMILY "586MMX " |
122 |
+ #elif defined CONFIG_MCORE2 |
123 |
+ #define MODULE_PROC_FAMILY "CORE2 " |
124 |
++#elif defined CONFIG_MNATIVE |
125 |
++#define MODULE_PROC_FAMILY "NATIVE " |
126 |
++#elif defined CONFIG_MNEHALEM |
127 |
++#define MODULE_PROC_FAMILY "NEHALEM " |
128 |
++#elif defined CONFIG_MWESTMERE |
129 |
++#define MODULE_PROC_FAMILY "WESTMERE " |
130 |
++#elif defined CONFIG_MSILVERMONT |
131 |
++#define MODULE_PROC_FAMILY "SILVERMONT " |
132 |
++#elif defined CONFIG_MGOLDMONT |
133 |
++#define MODULE_PROC_FAMILY "GOLDMONT " |
134 |
++#elif defined CONFIG_MGOLDMONTPLUS |
135 |
++#define MODULE_PROC_FAMILY "GOLDMONTPLUS " |
136 |
++#elif defined CONFIG_MSANDYBRIDGE |
137 |
++#define MODULE_PROC_FAMILY "SANDYBRIDGE " |
138 |
++#elif defined CONFIG_MIVYBRIDGE |
139 |
++#define MODULE_PROC_FAMILY "IVYBRIDGE " |
140 |
++#elif defined CONFIG_MHASWELL |
141 |
++#define MODULE_PROC_FAMILY "HASWELL " |
142 |
++#elif defined CONFIG_MBROADWELL |
143 |
++#define MODULE_PROC_FAMILY "BROADWELL " |
144 |
++#elif defined CONFIG_MSKYLAKE |
145 |
++#define MODULE_PROC_FAMILY "SKYLAKE " |
146 |
++#elif defined CONFIG_MSKYLAKEX |
147 |
++#define MODULE_PROC_FAMILY "SKYLAKEX " |
148 |
++#elif defined CONFIG_MCANNONLAKE |
149 |
++#define MODULE_PROC_FAMILY "CANNONLAKE " |
150 |
++#elif defined CONFIG_MICELAKE |
151 |
++#define MODULE_PROC_FAMILY "ICELAKE " |
152 |
++#elif defined CONFIG_MCASCADELAKE |
153 |
++#define MODULE_PROC_FAMILY "CASCADELAKE " |
154 |
++#elif defined CONFIG_MCOOPERLAKE |
155 |
++#define MODULE_PROC_FAMILY "COOPERLAKE " |
156 |
++#elif defined CONFIG_MTIGERLAKE |
157 |
++#define MODULE_PROC_FAMILY "TIGERLAKE " |
158 |
+ #elif defined CONFIG_MATOM |
159 |
+ #define MODULE_PROC_FAMILY "ATOM " |
160 |
+ #elif defined CONFIG_M686 |
161 |
+@@ -43,6 +77,28 @@ struct mod_arch_specific { |
162 |
+ #define MODULE_PROC_FAMILY "K7 " |
163 |
+ #elif defined CONFIG_MK8 |
164 |
+ #define MODULE_PROC_FAMILY "K8 " |
165 |
++#elif defined CONFIG_MK8SSE3 |
166 |
++#define MODULE_PROC_FAMILY "K8SSE3 " |
167 |
++#elif defined CONFIG_MK10 |
168 |
++#define MODULE_PROC_FAMILY "K10 " |
169 |
++#elif defined CONFIG_MBARCELONA |
170 |
++#define MODULE_PROC_FAMILY "BARCELONA " |
171 |
++#elif defined CONFIG_MBOBCAT |
172 |
++#define MODULE_PROC_FAMILY "BOBCAT " |
173 |
++#elif defined CONFIG_MBULLDOZER |
174 |
++#define MODULE_PROC_FAMILY "BULLDOZER " |
175 |
++#elif defined CONFIG_MPILEDRIVER |
176 |
++#define MODULE_PROC_FAMILY "PILEDRIVER " |
177 |
++#elif defined CONFIG_MSTEAMROLLER |
178 |
++#define MODULE_PROC_FAMILY "STEAMROLLER " |
179 |
++#elif defined CONFIG_MJAGUAR |
180 |
++#define MODULE_PROC_FAMILY "JAGUAR " |
181 |
++#elif defined CONFIG_MEXCAVATOR |
182 |
++#define MODULE_PROC_FAMILY "EXCAVATOR " |
183 |
++#elif defined CONFIG_MZEN |
184 |
++#define MODULE_PROC_FAMILY "ZEN " |
185 |
++#elif defined CONFIG_MZEN2 |
186 |
++#define MODULE_PROC_FAMILY "ZEN2 " |
187 |
+ #elif defined CONFIG_MELAN |
188 |
+ #define MODULE_PROC_FAMILY "ELAN " |
189 |
+ #elif defined CONFIG_MCRUSOE |
190 |
+--- a/arch/x86/Kconfig.cpu 2020-06-10 15:35:02.000000000 -0400 |
191 |
++++ b/arch/x86/Kconfig.cpu 2020-06-15 10:31:48.627486708 -0400 |
192 |
+@@ -116,6 +116,7 @@ config MPENTIUMM |
193 |
+ config MPENTIUM4 |
194 |
+ bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon" |
195 |
+ depends on X86_32 |
196 |
++ select X86_P6_NOP |
197 |
+ ---help--- |
198 |
+ Select this for Intel Pentium 4 chips. This includes the |
199 |
+ Pentium 4, Pentium D, P4-based Celeron and Xeon, and |
200 |
+@@ -148,9 +149,8 @@ config MPENTIUM4 |
201 |
+ -Paxville |
202 |
+ -Dempsey |
203 |
+ |
204 |
+- |
205 |
+ config MK6 |
206 |
+- bool "K6/K6-II/K6-III" |
207 |
++ bool "AMD K6/K6-II/K6-III" |
208 |
+ depends on X86_32 |
209 |
+ ---help--- |
210 |
+ Select this for an AMD K6-family processor. Enables use of |
211 |
+@@ -158,7 +158,7 @@ config MK6 |
212 |
+ flags to GCC. |
213 |
+ |
214 |
+ config MK7 |
215 |
+- bool "Athlon/Duron/K7" |
216 |
++ bool "AMD Athlon/Duron/K7" |
217 |
+ depends on X86_32 |
218 |
+ ---help--- |
219 |
+ Select this for an AMD Athlon K7-family processor. Enables use of |
220 |
+@@ -166,12 +166,90 @@ config MK7 |
221 |
+ flags to GCC. |
222 |
+ |
223 |
+ config MK8 |
224 |
+- bool "Opteron/Athlon64/Hammer/K8" |
225 |
++ bool "AMD Opteron/Athlon64/Hammer/K8" |
226 |
+ ---help--- |
227 |
+ Select this for an AMD Opteron or Athlon64 Hammer-family processor. |
228 |
+ Enables use of some extended instructions, and passes appropriate |
229 |
+ optimization flags to GCC. |
230 |
+ |
231 |
++config MK8SSE3 |
232 |
++ bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3" |
233 |
++ ---help--- |
234 |
++ Select this for improved AMD Opteron or Athlon64 Hammer-family processors. |
235 |
++ Enables use of some extended instructions, and passes appropriate |
236 |
++ optimization flags to GCC. |
237 |
++ |
238 |
++config MK10 |
239 |
++ bool "AMD 61xx/7x50/PhenomX3/X4/II/K10" |
240 |
++ ---help--- |
241 |
++ Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50, |
242 |
++ Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor. |
243 |
++ Enables use of some extended instructions, and passes appropriate |
244 |
++ optimization flags to GCC. |
245 |
++ |
246 |
++config MBARCELONA |
247 |
++ bool "AMD Barcelona" |
248 |
++ ---help--- |
249 |
++ Select this for AMD Family 10h Barcelona processors. |
250 |
++ |
251 |
++ Enables -march=barcelona |
252 |
++ |
253 |
++config MBOBCAT |
254 |
++ bool "AMD Bobcat" |
255 |
++ ---help--- |
256 |
++ Select this for AMD Family 14h Bobcat processors. |
257 |
++ |
258 |
++ Enables -march=btver1 |
259 |
++ |
260 |
++config MJAGUAR |
261 |
++ bool "AMD Jaguar" |
262 |
++ ---help--- |
263 |
++ Select this for AMD Family 16h Jaguar processors. |
264 |
++ |
265 |
++ Enables -march=btver2 |
266 |
++ |
267 |
++config MBULLDOZER |
268 |
++ bool "AMD Bulldozer" |
269 |
++ ---help--- |
270 |
++ Select this for AMD Family 15h Bulldozer processors. |
271 |
++ |
272 |
++ Enables -march=bdver1 |
273 |
++ |
274 |
++config MPILEDRIVER |
275 |
++ bool "AMD Piledriver" |
276 |
++ ---help--- |
277 |
++ Select this for AMD Family 15h Piledriver processors. |
278 |
++ |
279 |
++ Enables -march=bdver2 |
280 |
++ |
281 |
++config MSTEAMROLLER |
282 |
++ bool "AMD Steamroller" |
283 |
++ ---help--- |
284 |
++ Select this for AMD Family 15h Steamroller processors. |
285 |
++ |
286 |
++ Enables -march=bdver3 |
287 |
++ |
288 |
++config MEXCAVATOR |
289 |
++ bool "AMD Excavator" |
290 |
++ ---help--- |
291 |
++ Select this for AMD Family 15h Excavator processors. |
292 |
++ |
293 |
++ Enables -march=bdver4 |
294 |
++ |
295 |
++config MZEN |
296 |
++ bool "AMD Zen" |
297 |
++ ---help--- |
298 |
++ Select this for AMD Family 17h Zen processors. |
299 |
++ |
300 |
++ Enables -march=znver1 |
301 |
++ |
302 |
++config MZEN2 |
303 |
++ bool "AMD Zen 2" |
304 |
++ ---help--- |
305 |
++ Select this for AMD Family 17h Zen 2 processors. |
306 |
++ |
307 |
++ Enables -march=znver2 |
308 |
++ |
309 |
+ config MCRUSOE |
310 |
+ bool "Crusoe" |
311 |
+ depends on X86_32 |
312 |
+@@ -253,6 +331,7 @@ config MVIAC7 |
313 |
+ |
314 |
+ config MPSC |
315 |
+ bool "Intel P4 / older Netburst based Xeon" |
316 |
++ select X86_P6_NOP |
317 |
+ depends on X86_64 |
318 |
+ ---help--- |
319 |
+ Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey |
320 |
+@@ -262,8 +341,19 @@ config MPSC |
321 |
+ using the cpu family field |
322 |
+ in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. |
323 |
+ |
324 |
++config MATOM |
325 |
++ bool "Intel Atom" |
326 |
++ select X86_P6_NOP |
327 |
++ ---help--- |
328 |
++ |
329 |
++ Select this for the Intel Atom platform. Intel Atom CPUs have an |
330 |
++ in-order pipelining architecture and thus can benefit from |
331 |
++ accordingly optimized code. Use a recent GCC with specific Atom |
332 |
++ support in order to fully benefit from selecting this option. |
333 |
++ |
334 |
+ config MCORE2 |
335 |
+- bool "Core 2/newer Xeon" |
336 |
++ bool "Intel Core 2" |
337 |
++ select X86_P6_NOP |
338 |
+ ---help--- |
339 |
+ |
340 |
+ Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and |
341 |
+@@ -271,14 +361,151 @@ config MCORE2 |
342 |
+ family in /proc/cpuinfo. Newer ones have 6 and older ones 15 |
343 |
+ (not a typo) |
344 |
+ |
345 |
+-config MATOM |
346 |
+- bool "Intel Atom" |
347 |
++ Enables -march=core2 |
348 |
++ |
349 |
++config MNEHALEM |
350 |
++ bool "Intel Nehalem" |
351 |
++ select X86_P6_NOP |
352 |
+ ---help--- |
353 |
+ |
354 |
+- Select this for the Intel Atom platform. Intel Atom CPUs have an |
355 |
+- in-order pipelining architecture and thus can benefit from |
356 |
+- accordingly optimized code. Use a recent GCC with specific Atom |
357 |
+- support in order to fully benefit from selecting this option. |
358 |
++ Select this for 1st Gen Core processors in the Nehalem family. |
359 |
++ |
360 |
++ Enables -march=nehalem |
361 |
++ |
362 |
++config MWESTMERE |
363 |
++ bool "Intel Westmere" |
364 |
++ select X86_P6_NOP |
365 |
++ ---help--- |
366 |
++ |
367 |
++ Select this for the Intel Westmere formerly Nehalem-C family. |
368 |
++ |
369 |
++ Enables -march=westmere |
370 |
++ |
371 |
++config MSILVERMONT |
372 |
++ bool "Intel Silvermont" |
373 |
++ select X86_P6_NOP |
374 |
++ ---help--- |
375 |
++ |
376 |
++ Select this for the Intel Silvermont platform. |
377 |
++ |
378 |
++ Enables -march=silvermont |
379 |
++ |
380 |
++config MGOLDMONT |
381 |
++ bool "Intel Goldmont" |
382 |
++ select X86_P6_NOP |
383 |
++ ---help--- |
384 |
++ |
385 |
++ Select this for the Intel Goldmont platform including Apollo Lake and Denverton. |
386 |
++ |
387 |
++ Enables -march=goldmont |
388 |
++ |
389 |
++config MGOLDMONTPLUS |
390 |
++ bool "Intel Goldmont Plus" |
391 |
++ select X86_P6_NOP |
392 |
++ ---help--- |
393 |
++ |
394 |
++ Select this for the Intel Goldmont Plus platform including Gemini Lake. |
395 |
++ |
396 |
++ Enables -march=goldmont-plus |
397 |
++ |
398 |
++config MSANDYBRIDGE |
399 |
++ bool "Intel Sandy Bridge" |
400 |
++ select X86_P6_NOP |
401 |
++ ---help--- |
402 |
++ |
403 |
++ Select this for 2nd Gen Core processors in the Sandy Bridge family. |
404 |
++ |
405 |
++ Enables -march=sandybridge |
406 |
++ |
407 |
++config MIVYBRIDGE |
408 |
++ bool "Intel Ivy Bridge" |
409 |
++ select X86_P6_NOP |
410 |
++ ---help--- |
411 |
++ |
412 |
++ Select this for 3rd Gen Core processors in the Ivy Bridge family. |
413 |
++ |
414 |
++ Enables -march=ivybridge |
415 |
++ |
416 |
++config MHASWELL |
417 |
++ bool "Intel Haswell" |
418 |
++ select X86_P6_NOP |
419 |
++ ---help--- |
420 |
++ |
421 |
++ Select this for 4th Gen Core processors in the Haswell family. |
422 |
++ |
423 |
++ Enables -march=haswell |
424 |
++ |
425 |
++config MBROADWELL |
426 |
++ bool "Intel Broadwell" |
427 |
++ select X86_P6_NOP |
428 |
++ ---help--- |
429 |
++ |
430 |
++ Select this for 5th Gen Core processors in the Broadwell family. |
431 |
++ |
432 |
++ Enables -march=broadwell |
433 |
++ |
434 |
++config MSKYLAKE |
435 |
++ bool "Intel Skylake" |
436 |
++ select X86_P6_NOP |
437 |
++ ---help--- |
438 |
++ |
439 |
++ Select this for 6th Gen Core processors in the Skylake family. |
440 |
++ |
441 |
++ Enables -march=skylake |
442 |
++ |
443 |
++config MSKYLAKEX |
444 |
++ bool "Intel Skylake X" |
445 |
++ select X86_P6_NOP |
446 |
++ ---help--- |
447 |
++ |
448 |
++ Select this for 6th Gen Core processors in the Skylake X family. |
449 |
++ |
450 |
++ Enables -march=skylake-avx512 |
451 |
++ |
452 |
++config MCANNONLAKE |
453 |
++ bool "Intel Cannon Lake" |
454 |
++ select X86_P6_NOP |
455 |
++ ---help--- |
456 |
++ |
457 |
++ Select this for 8th Gen Core processors |
458 |
++ |
459 |
++ Enables -march=cannonlake |
460 |
++ |
461 |
++config MICELAKE |
462 |
++ bool "Intel Ice Lake" |
463 |
++ select X86_P6_NOP |
464 |
++ ---help--- |
465 |
++ |
466 |
++ Select this for 10th Gen Core processors in the Ice Lake family. |
467 |
++ |
468 |
++ Enables -march=icelake-client |
469 |
++ |
470 |
++config MCASCADELAKE |
471 |
++ bool "Intel Cascade Lake" |
472 |
++ select X86_P6_NOP |
473 |
++ ---help--- |
474 |
++ |
475 |
++ Select this for Xeon processors in the Cascade Lake family. |
476 |
++ |
477 |
++ Enables -march=cascadelake |
478 |
++ |
479 |
++config MCOOPERLAKE |
480 |
++ bool "Intel Cooper Lake" |
481 |
++ select X86_P6_NOP |
482 |
++ ---help--- |
483 |
++ |
484 |
++ Select this for Xeon processors in the Cooper Lake family. |
485 |
++ |
486 |
++ Enables -march=cooperlake |
487 |
++ |
488 |
++config MTIGERLAKE |
489 |
++ bool "Intel Tiger Lake" |
490 |
++ select X86_P6_NOP |
491 |
++ ---help--- |
492 |
++ |
493 |
++ Select this for third-generation 10 nm process processors in the Tiger Lake family. |
494 |
++ |
495 |
++ Enables -march=tigerlake |
496 |
+ |
497 |
+ config GENERIC_CPU |
498 |
+ bool "Generic-x86-64" |
499 |
+@@ -287,6 +514,19 @@ config GENERIC_CPU |
500 |
+ Generic x86-64 CPU. |
501 |
+ Run equally well on all x86-64 CPUs. |
502 |
+ |
503 |
++config MNATIVE |
504 |
++ bool "Native optimizations autodetected by GCC" |
505 |
++ ---help--- |
506 |
++ |
507 |
++ GCC 4.2 and above support -march=native, which automatically detects |
508 |
++ the optimum settings to use based on your processor. -march=native |
509 |
++ also detects and applies additional settings beyond -march specific |
510 |
++ to your CPU, (eg. -msse4). Unless you have a specific reason not to |
511 |
++ (e.g. distcc cross-compiling), you should probably be using |
512 |
++ -march=native rather than anything listed below. |
513 |
++ |
514 |
++ Enables -march=native |
515 |
++ |
516 |
+ endchoice |
517 |
+ |
518 |
+ config X86_GENERIC |
519 |
+@@ -311,7 +551,7 @@ config X86_INTERNODE_CACHE_SHIFT |
520 |
+ config X86_L1_CACHE_SHIFT |
521 |
+ int |
522 |
+ default "7" if MPENTIUM4 || MPSC |
523 |
+- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU |
524 |
++ default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU |
525 |
+ default "4" if MELAN || M486 || MGEODEGX1 |
526 |
+ default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX |
527 |
+ |
528 |
+@@ -329,35 +569,36 @@ config X86_ALIGNMENT_16 |
529 |
+ |
530 |
+ config X86_INTEL_USERCOPY |
531 |
+ def_bool y |
532 |
+- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 |
533 |
++ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK8SSE3 || MK7 || MEFFICEON || MCORE2 || MK10 || MBARCELONA || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MNATIVE |
534 |
+ |
535 |
+ config X86_USE_PPRO_CHECKSUM |
536 |
+ def_bool y |
537 |
+- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM |
538 |
++ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MATOM || MNATIVE |
539 |
+ |
540 |
+ config X86_USE_3DNOW |
541 |
+ def_bool y |
542 |
+ depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML |
543 |
+ |
544 |
+-# |
545 |
+-# P6_NOPs are a relatively minor optimization that require a family >= |
546 |
+-# 6 processor, except that it is broken on certain VIA chips. |
547 |
+-# Furthermore, AMD chips prefer a totally different sequence of NOPs |
548 |
+-# (which work on all CPUs). In addition, it looks like Virtual PC |
549 |
+-# does not understand them. |
550 |
+-# |
551 |
+-# As a result, disallow these if we're not compiling for X86_64 (these |
552 |
+-# NOPs do work on all x86-64 capable chips); the list of processors in |
553 |
+-# the right-hand clause are the cores that benefit from this optimization. |
554 |
+-# |
555 |
+ config X86_P6_NOP |
556 |
+- def_bool y |
557 |
+- depends on X86_64 |
558 |
+- depends on (MCORE2 || MPENTIUM4 || MPSC) |
559 |
++ default n |
560 |
++ bool "Support for P6_NOPs on Intel chips" |
561 |
++ depends on (MCORE2 || MPENTIUM4 || MPSC || MATOM || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MNATIVE) |
562 |
++ ---help--- |
563 |
++ P6_NOPs are a relatively minor optimization that require a family >= |
564 |
++ 6 processor, except that it is broken on certain VIA chips. |
565 |
++ Furthermore, AMD chips prefer a totally different sequence of NOPs |
566 |
++ (which work on all CPUs). In addition, it looks like Virtual PC |
567 |
++ does not understand them. |
568 |
++ |
569 |
++ As a result, disallow these if we're not compiling for X86_64 (these |
570 |
++ NOPs do work on all x86-64 capable chips); the list of processors in |
571 |
++ the right-hand clause are the cores that benefit from this optimization. |
572 |
++ |
573 |
++ Say Y if you have Intel CPU newer than Pentium Pro, N otherwise. |
574 |
+ |
575 |
+ config X86_TSC |
576 |
+ def_bool y |
577 |
+- depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 |
578 |
++ depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MNATIVE || MATOM) || X86_64 |
579 |
+ |
580 |
+ config X86_CMPXCHG64 |
581 |
+ def_bool y |
582 |
+@@ -367,7 +608,7 @@ config X86_CMPXCHG64 |
583 |
+ # generates cmov. |
584 |
+ config X86_CMOV |
585 |
+ def_bool y |
586 |
+- depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) |
587 |
++ depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX) |
588 |
+ |
589 |
+ config X86_MINIMUM_CPU_FAMILY |
590 |
+ int |
591 |
+--- a/arch/x86/Makefile 2020-06-10 15:35:02.000000000 -0400 |
592 |
++++ b/arch/x86/Makefile 2020-06-15 10:32:39.508648036 -0400 |
593 |
+@@ -119,13 +119,60 @@ else |
594 |
+ KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) |
595 |
+ |
596 |
+ # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) |
597 |
++ cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native) |
598 |
+ cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) |
599 |
++ cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-mtune=k8) |
600 |
++ cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10) |
601 |
++ cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona) |
602 |
++ cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1) |
603 |
++ cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2) |
604 |
++ cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1) |
605 |
++ cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2) |
606 |
++ cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-mno-tbm) |
607 |
++ cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3) |
608 |
++ cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-mno-tbm) |
609 |
++ cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4) |
610 |
++ cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-mno-tbm) |
611 |
++ cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1) |
612 |
++ cflags-$(CONFIG_MZEN2) += $(call cc-option,-march=znver2) |
613 |
+ cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) |
614 |
+ |
615 |
+ cflags-$(CONFIG_MCORE2) += \ |
616 |
+- $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) |
617 |
+- cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \ |
618 |
+- $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) |
619 |
++ $(call cc-option,-march=core2,$(call cc-option,-mtune=core2)) |
620 |
++ cflags-$(CONFIG_MNEHALEM) += \ |
621 |
++ $(call cc-option,-march=nehalem,$(call cc-option,-mtune=nehalem)) |
622 |
++ cflags-$(CONFIG_MWESTMERE) += \ |
623 |
++ $(call cc-option,-march=westmere,$(call cc-option,-mtune=westmere)) |
624 |
++ cflags-$(CONFIG_MSILVERMONT) += \ |
625 |
++ $(call cc-option,-march=silvermont,$(call cc-option,-mtune=silvermont)) |
626 |
++ cflags-$(CONFIG_MGOLDMONT) += \ |
627 |
++ $(call cc-option,-march=goldmont,$(call cc-option,-mtune=goldmont)) |
628 |
++ cflags-$(CONFIG_MGOLDMONTPLUS) += \ |
629 |
++ $(call cc-option,-march=goldmont-plus,$(call cc-option,-mtune=goldmont-plus)) |
630 |
++ cflags-$(CONFIG_MSANDYBRIDGE) += \ |
631 |
++ $(call cc-option,-march=sandybridge,$(call cc-option,-mtune=sandybridge)) |
632 |
++ cflags-$(CONFIG_MIVYBRIDGE) += \ |
633 |
++ $(call cc-option,-march=ivybridge,$(call cc-option,-mtune=ivybridge)) |
634 |
++ cflags-$(CONFIG_MHASWELL) += \ |
635 |
++ $(call cc-option,-march=haswell,$(call cc-option,-mtune=haswell)) |
636 |
++ cflags-$(CONFIG_MBROADWELL) += \ |
637 |
++ $(call cc-option,-march=broadwell,$(call cc-option,-mtune=broadwell)) |
638 |
++ cflags-$(CONFIG_MSKYLAKE) += \ |
639 |
++ $(call cc-option,-march=skylake,$(call cc-option,-mtune=skylake)) |
640 |
++ cflags-$(CONFIG_MSKYLAKEX) += \ |
641 |
++ $(call cc-option,-march=skylake-avx512,$(call cc-option,-mtune=skylake-avx512)) |
642 |
++ cflags-$(CONFIG_MCANNONLAKE) += \ |
643 |
++ $(call cc-option,-march=cannonlake,$(call cc-option,-mtune=cannonlake)) |
644 |
++ cflags-$(CONFIG_MICELAKE) += \ |
645 |
++ $(call cc-option,-march=icelake-client,$(call cc-option,-mtune=icelake-client)) |
646 |
++ cflags-$(CONFIG_MCASCADELAKE) += \ |
647 |
++ $(call cc-option,-march=cascadelake,$(call cc-option,-mtune=cascadelake)) |
648 |
++ cflags-$(CONFIG_MCOOPERLAKE) += \ |
649 |
++ $(call cc-option,-march=cooperlake,$(call cc-option,-mtune=cooperlake)) |
650 |
++ cflags-$(CONFIG_MTIGERLAKE) += \ |
651 |
++ $(call cc-option,-march=tigerlake,$(call cc-option,-mtune=tigerlake)) |
652 |
++ cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell) \ |
653 |
++ $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic)) |
654 |
+ cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) |
655 |
+ KBUILD_CFLAGS += $(cflags-y) |
656 |
+ |
657 |
+--- a/arch/x86/Makefile_32.cpu 2020-06-10 15:35:02.000000000 -0400 |
658 |
++++ b/arch/x86/Makefile_32.cpu 2020-06-15 10:31:48.627486708 -0400 |
659 |
+@@ -23,7 +23,19 @@ cflags-$(CONFIG_MK6) += -march=k6 |
660 |
+ # Please note, that patches that add -march=athlon-xp and friends are pointless. |
661 |
+ # They make zero difference whatsosever to performance at this time. |
662 |
+ cflags-$(CONFIG_MK7) += -march=athlon |
663 |
++cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native) |
664 |
+ cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) |
665 |
++cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-march=athlon) |
666 |
++cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10,-march=athlon) |
667 |
++cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona,-march=athlon) |
668 |
++cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1,-march=athlon) |
669 |
++cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon) |
670 |
++cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1,-march=athlon) |
671 |
++cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2,-march=athlon) |
672 |
++cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3,-march=athlon) |
673 |
++cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4,-march=athlon) |
674 |
++cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1,-march=athlon) |
675 |
++cflags-$(CONFIG_MZEN2) += $(call cc-option,-march=znver2,-march=athlon) |
676 |
+ cflags-$(CONFIG_MCRUSOE) += -march=i686 -falign-functions=0 -falign-jumps=0 -falign-loops=0 |
677 |
+ cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) -falign-functions=0 -falign-jumps=0 -falign-loops=0 |
678 |
+ cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) |
679 |
+@@ -32,8 +44,24 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc- |
680 |
+ cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) |
681 |
+ cflags-$(CONFIG_MVIAC7) += -march=i686 |
682 |
+ cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) |
683 |
+-cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \ |
684 |
+- $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) |
685 |
++cflags-$(CONFIG_MNEHALEM) += -march=i686 $(call tune,nehalem) |
686 |
++cflags-$(CONFIG_MWESTMERE) += -march=i686 $(call tune,westmere) |
687 |
++cflags-$(CONFIG_MSILVERMONT) += -march=i686 $(call tune,silvermont) |
688 |
++cflags-$(CONFIG_MGOLDMONT) += -march=i686 $(call tune,goldmont) |
689 |
++cflags-$(CONFIG_MGOLDMONTPLUS) += -march=i686 $(call tune,goldmont-plus) |
690 |
++cflags-$(CONFIG_MSANDYBRIDGE) += -march=i686 $(call tune,sandybridge) |
691 |
++cflags-$(CONFIG_MIVYBRIDGE) += -march=i686 $(call tune,ivybridge) |
692 |
++cflags-$(CONFIG_MHASWELL) += -march=i686 $(call tune,haswell) |
693 |
++cflags-$(CONFIG_MBROADWELL) += -march=i686 $(call tune,broadwell) |
694 |
++cflags-$(CONFIG_MSKYLAKE) += -march=i686 $(call tune,skylake) |
695 |
++cflags-$(CONFIG_MSKYLAKEX) += -march=i686 $(call tune,skylake-avx512) |
696 |
++cflags-$(CONFIG_MCANNONLAKE) += -march=i686 $(call tune,cannonlake) |
697 |
++cflags-$(CONFIG_MICELAKE) += -march=i686 $(call tune,icelake-client) |
698 |
++cflags-$(CONFIG_MCASCADELAKE) += -march=i686 $(call tune,cascadelake) |
699 |
++cflags-$(CONFIG_MCOOPERLAKE) += -march=i686 $(call tune,cooperlake) |
700 |
++cflags-$(CONFIG_MTIGERLAKE) += -march=i686 $(call tune,tigerlake) |
701 |
++cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell,$(call cc-option,-march=core2,-march=i686)) \ |
702 |
++ $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic)) |
703 |
+ |
704 |
+ # AMD Elan support |
705 |
+ cflags-$(CONFIG_MELAN) += -march=i486 |