Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:5.1 commit in: /
Date: Fri, 10 May 2019 23:40:21
Message-Id: 1557531585.a19b0b75f0aac978ab7823ff4f3b2c031c7ba90c.mpagano@gentoo
1 commit: a19b0b75f0aac978ab7823ff4f3b2c031c7ba90c
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Fri May 10 23:39:45 2019 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Fri May 10 23:39:45 2019 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=a19b0b75
7
8 Add cpu optimization patches
9
10 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>
11
12 0000_README | 8 +
13 ...-additional-cpu-optimizations-for-gcc-4.9.patch | 545 ++++++++++++++++++++
14 5011_enable-cpu-optimizations-for-gcc8.patch | 569 +++++++++++++++++++++
15 3 files changed, 1122 insertions(+)
16
17 diff --git a/0000_README b/0000_README
18 index 90e376f..cfba4e3 100644
19 --- a/0000_README
20 +++ b/0000_README
21 @@ -62,3 +62,11 @@ Desc: This hid-apple patch enables swapping of the FN and left Control keys an
22 Patch: 4567_distro-Gentoo-Kconfig.patch
23 From: Tom Wijsman <TomWij@g.o>
24 Desc: Add Gentoo Linux support config settings and defaults.
25 +
26 +Patch: 5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
27 +From: https://github.com/graysky2/kernel_gcc_patch/
28 +Desc: Kernel patch enables gcc >= v4.13 optimizations for additional CPUs.
29 +
30 +Patch: 5011_enable-cpu-optimizations-for-gcc8.patch
31 +From: https://github.com/graysky2/kernel_gcc_patch/
32 +Desc: Kernel patch for >= gccv8 enables kernel >= v4.13 optimizations for additional CPUs.
33
34 diff --git a/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch b/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
35 new file mode 100644
36 index 0000000..a8aa759
37 --- /dev/null
38 +++ b/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
39 @@ -0,0 +1,545 @@
40 +WARNING
41 +This patch works with gcc versions 4.9+ and with kernel version 4.13+ and should
42 +NOT be applied when compiling on older versions of gcc due to key name changes
43 +of the march flags introduced with the version 4.9 release of gcc.[1]
44 +
45 +Use the older version of this patch hosted on the same github for older
46 +versions of gcc.
47 +
48 +FEATURES
49 +This patch adds additional CPU options to the Linux kernel accessible under:
50 + Processor type and features --->
51 + Processor family --->
52 +
53 +The expanded microarchitectures include:
54 +* AMD Improved K8-family
55 +* AMD K10-family
56 +* AMD Family 10h (Barcelona)
57 +* AMD Family 14h (Bobcat)
58 +* AMD Family 16h (Jaguar)
59 +* AMD Family 15h (Bulldozer)
60 +* AMD Family 15h (Piledriver)
61 +* AMD Family 15h (Steamroller)
62 +* AMD Family 15h (Excavator)
63 +* AMD Family 17h (Zen)
64 +* Intel Silvermont low-power processors
65 +* Intel 1st Gen Core i3/i5/i7 (Nehalem)
66 +* Intel 1.5 Gen Core i3/i5/i7 (Westmere)
67 +* Intel 2nd Gen Core i3/i5/i7 (Sandybridge)
68 +* Intel 3rd Gen Core i3/i5/i7 (Ivybridge)
69 +* Intel 4th Gen Core i3/i5/i7 (Haswell)
70 +* Intel 5th Gen Core i3/i5/i7 (Broadwell)
71 +* Intel 6th Gen Core i3/i5/i7 (Skylake)
72 +* Intel 6th Gen Core i7/i9 (Skylake X)
73 +
74 +It also offers to compile passing the 'native' option which, "selects the CPU
75 +to generate code for at compilation time by determining the processor type of
76 +the compiling machine. Using -march=native enables all instruction subsets
77 +supported by the local machine and will produce code optimized for the local
78 +machine under the constraints of the selected instruction set."[3]
79 +
80 +MINOR NOTES
81 +This patch also changes 'atom' to 'bonnell' in accordance with the gcc v4.9
82 +changes. Note that upstream is using the deprecated 'match=atom' flags when I
83 +believe it should use the newer 'march=bonnell' flag for atom processors.[2]
84 +
85 +It is not recommended to compile on Atom-CPUs with the 'native' option.[4] The
86 +recommendation is to use the 'atom' option instead.
87 +
88 +BENEFITS
89 +Small but real speed increases are measurable using a make endpoint comparing
90 +a generic kernel to one built with one of the respective microarchs.
91 +
92 +See the following experimental evidence supporting this statement:
93 +https://github.com/graysky2/kernel_gcc_patch
94 +
95 +REQUIREMENTS
96 +linux version >=4.13
97 +gcc version >=4.9
98 +
99 +ACKNOWLEDGMENTS
100 +This patch builds on the seminal work by Jeroen.[5]
101 +
102 +REFERENCES
103 +1. https://gcc.gnu.org/gcc-4.9/changes.html
104 +2. https://bugzilla.kernel.org/show_bug.cgi?id=77461
105 +3. https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html
106 +4. https://github.com/graysky2/kernel_gcc_patch/issues/15
107 +5. http://www.linuxforge.net/docs/linux/linux-gcc.php
108 +
109 +--- a/arch/x86/include/asm/module.h 2018-01-28 16:20:33.000000000 -0500
110 ++++ b/arch/x86/include/asm/module.h 2018-03-10 06:42:38.688317317 -0500
111 +@@ -25,6 +25,26 @@ struct mod_arch_specific {
112 + #define MODULE_PROC_FAMILY "586MMX "
113 + #elif defined CONFIG_MCORE2
114 + #define MODULE_PROC_FAMILY "CORE2 "
115 ++#elif defined CONFIG_MNATIVE
116 ++#define MODULE_PROC_FAMILY "NATIVE "
117 ++#elif defined CONFIG_MNEHALEM
118 ++#define MODULE_PROC_FAMILY "NEHALEM "
119 ++#elif defined CONFIG_MWESTMERE
120 ++#define MODULE_PROC_FAMILY "WESTMERE "
121 ++#elif defined CONFIG_MSILVERMONT
122 ++#define MODULE_PROC_FAMILY "SILVERMONT "
123 ++#elif defined CONFIG_MSANDYBRIDGE
124 ++#define MODULE_PROC_FAMILY "SANDYBRIDGE "
125 ++#elif defined CONFIG_MIVYBRIDGE
126 ++#define MODULE_PROC_FAMILY "IVYBRIDGE "
127 ++#elif defined CONFIG_MHASWELL
128 ++#define MODULE_PROC_FAMILY "HASWELL "
129 ++#elif defined CONFIG_MBROADWELL
130 ++#define MODULE_PROC_FAMILY "BROADWELL "
131 ++#elif defined CONFIG_MSKYLAKE
132 ++#define MODULE_PROC_FAMILY "SKYLAKE "
133 ++#elif defined CONFIG_MSKYLAKEX
134 ++#define MODULE_PROC_FAMILY "SKYLAKEX "
135 + #elif defined CONFIG_MATOM
136 + #define MODULE_PROC_FAMILY "ATOM "
137 + #elif defined CONFIG_M686
138 +@@ -43,6 +63,26 @@ struct mod_arch_specific {
139 + #define MODULE_PROC_FAMILY "K7 "
140 + #elif defined CONFIG_MK8
141 + #define MODULE_PROC_FAMILY "K8 "
142 ++#elif defined CONFIG_MK8SSE3
143 ++#define MODULE_PROC_FAMILY "K8SSE3 "
144 ++#elif defined CONFIG_MK10
145 ++#define MODULE_PROC_FAMILY "K10 "
146 ++#elif defined CONFIG_MBARCELONA
147 ++#define MODULE_PROC_FAMILY "BARCELONA "
148 ++#elif defined CONFIG_MBOBCAT
149 ++#define MODULE_PROC_FAMILY "BOBCAT "
150 ++#elif defined CONFIG_MBULLDOZER
151 ++#define MODULE_PROC_FAMILY "BULLDOZER "
152 ++#elif defined CONFIG_MPILEDRIVER
153 ++#define MODULE_PROC_FAMILY "PILEDRIVER "
154 ++#elif defined CONFIG_MSTEAMROLLER
155 ++#define MODULE_PROC_FAMILY "STEAMROLLER "
156 ++#elif defined CONFIG_MJAGUAR
157 ++#define MODULE_PROC_FAMILY "JAGUAR "
158 ++#elif defined CONFIG_MEXCAVATOR
159 ++#define MODULE_PROC_FAMILY "EXCAVATOR "
160 ++#elif defined CONFIG_MZEN
161 ++#define MODULE_PROC_FAMILY "ZEN "
162 + #elif defined CONFIG_MELAN
163 + #define MODULE_PROC_FAMILY "ELAN "
164 + #elif defined CONFIG_MCRUSOE
165 +--- a/arch/x86/Kconfig.cpu 2018-01-28 16:20:33.000000000 -0500
166 ++++ b/arch/x86/Kconfig.cpu 2018-03-10 06:45:50.244371799 -0500
167 +@@ -116,6 +116,7 @@ config MPENTIUMM
168 + config MPENTIUM4
169 + bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon"
170 + depends on X86_32
171 ++ select X86_P6_NOP
172 + ---help---
173 + Select this for Intel Pentium 4 chips. This includes the
174 + Pentium 4, Pentium D, P4-based Celeron and Xeon, and
175 +@@ -148,9 +149,8 @@ config MPENTIUM4
176 + -Paxville
177 + -Dempsey
178 +
179 +-
180 + config MK6
181 +- bool "K6/K6-II/K6-III"
182 ++ bool "AMD K6/K6-II/K6-III"
183 + depends on X86_32
184 + ---help---
185 + Select this for an AMD K6-family processor. Enables use of
186 +@@ -158,7 +158,7 @@ config MK6
187 + flags to GCC.
188 +
189 + config MK7
190 +- bool "Athlon/Duron/K7"
191 ++ bool "AMD Athlon/Duron/K7"
192 + depends on X86_32
193 + ---help---
194 + Select this for an AMD Athlon K7-family processor. Enables use of
195 +@@ -166,12 +166,83 @@ config MK7
196 + flags to GCC.
197 +
198 + config MK8
199 +- bool "Opteron/Athlon64/Hammer/K8"
200 ++ bool "AMD Opteron/Athlon64/Hammer/K8"
201 + ---help---
202 + Select this for an AMD Opteron or Athlon64 Hammer-family processor.
203 + Enables use of some extended instructions, and passes appropriate
204 + optimization flags to GCC.
205 +
206 ++config MK8SSE3
207 ++ bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3"
208 ++ ---help---
209 ++ Select this for improved AMD Opteron or Athlon64 Hammer-family processors.
210 ++ Enables use of some extended instructions, and passes appropriate
211 ++ optimization flags to GCC.
212 ++
213 ++config MK10
214 ++ bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"
215 ++ ---help---
216 ++ Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,
217 ++ Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor.
218 ++ Enables use of some extended instructions, and passes appropriate
219 ++ optimization flags to GCC.
220 ++
221 ++config MBARCELONA
222 ++ bool "AMD Barcelona"
223 ++ ---help---
224 ++ Select this for AMD Family 10h Barcelona processors.
225 ++
226 ++ Enables -march=barcelona
227 ++
228 ++config MBOBCAT
229 ++ bool "AMD Bobcat"
230 ++ ---help---
231 ++ Select this for AMD Family 14h Bobcat processors.
232 ++
233 ++ Enables -march=btver1
234 ++
235 ++config MJAGUAR
236 ++ bool "AMD Jaguar"
237 ++ ---help---
238 ++ Select this for AMD Family 16h Jaguar processors.
239 ++
240 ++ Enables -march=btver2
241 ++
242 ++config MBULLDOZER
243 ++ bool "AMD Bulldozer"
244 ++ ---help---
245 ++ Select this for AMD Family 15h Bulldozer processors.
246 ++
247 ++ Enables -march=bdver1
248 ++
249 ++config MPILEDRIVER
250 ++ bool "AMD Piledriver"
251 ++ ---help---
252 ++ Select this for AMD Family 15h Piledriver processors.
253 ++
254 ++ Enables -march=bdver2
255 ++
256 ++config MSTEAMROLLER
257 ++ bool "AMD Steamroller"
258 ++ ---help---
259 ++ Select this for AMD Family 15h Steamroller processors.
260 ++
261 ++ Enables -march=bdver3
262 ++
263 ++config MEXCAVATOR
264 ++ bool "AMD Excavator"
265 ++ ---help---
266 ++ Select this for AMD Family 15h Excavator processors.
267 ++
268 ++ Enables -march=bdver4
269 ++
270 ++config MZEN
271 ++ bool "AMD Zen"
272 ++ ---help---
273 ++ Select this for AMD Family 17h Zen processors.
274 ++
275 ++ Enables -march=znver1
276 ++
277 + config MCRUSOE
278 + bool "Crusoe"
279 + depends on X86_32
280 +@@ -253,6 +324,7 @@ config MVIAC7
281 +
282 + config MPSC
283 + bool "Intel P4 / older Netburst based Xeon"
284 ++ select X86_P6_NOP
285 + depends on X86_64
286 + ---help---
287 + Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
288 +@@ -262,8 +334,19 @@ config MPSC
289 + using the cpu family field
290 + in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
291 +
292 ++config MATOM
293 ++ bool "Intel Atom"
294 ++ select X86_P6_NOP
295 ++ ---help---
296 ++
297 ++ Select this for the Intel Atom platform. Intel Atom CPUs have an
298 ++ in-order pipelining architecture and thus can benefit from
299 ++ accordingly optimized code. Use a recent GCC with specific Atom
300 ++ support in order to fully benefit from selecting this option.
301 ++
302 + config MCORE2
303 +- bool "Core 2/newer Xeon"
304 ++ bool "Intel Core 2"
305 ++ select X86_P6_NOP
306 + ---help---
307 +
308 + Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
309 +@@ -271,14 +354,88 @@ config MCORE2
310 + family in /proc/cpuinfo. Newer ones have 6 and older ones 15
311 + (not a typo)
312 +
313 +-config MATOM
314 +- bool "Intel Atom"
315 ++ Enables -march=core2
316 ++
317 ++config MNEHALEM
318 ++ bool "Intel Nehalem"
319 ++ select X86_P6_NOP
320 + ---help---
321 +
322 +- Select this for the Intel Atom platform. Intel Atom CPUs have an
323 +- in-order pipelining architecture and thus can benefit from
324 +- accordingly optimized code. Use a recent GCC with specific Atom
325 +- support in order to fully benefit from selecting this option.
326 ++ Select this for 1st Gen Core processors in the Nehalem family.
327 ++
328 ++ Enables -march=nehalem
329 ++
330 ++config MWESTMERE
331 ++ bool "Intel Westmere"
332 ++ select X86_P6_NOP
333 ++ ---help---
334 ++
335 ++ Select this for the Intel Westmere formerly Nehalem-C family.
336 ++
337 ++ Enables -march=westmere
338 ++
339 ++config MSILVERMONT
340 ++ bool "Intel Silvermont"
341 ++ select X86_P6_NOP
342 ++ ---help---
343 ++
344 ++ Select this for the Intel Silvermont platform.
345 ++
346 ++ Enables -march=silvermont
347 ++
348 ++config MSANDYBRIDGE
349 ++ bool "Intel Sandy Bridge"
350 ++ select X86_P6_NOP
351 ++ ---help---
352 ++
353 ++ Select this for 2nd Gen Core processors in the Sandy Bridge family.
354 ++
355 ++ Enables -march=sandybridge
356 ++
357 ++config MIVYBRIDGE
358 ++ bool "Intel Ivy Bridge"
359 ++ select X86_P6_NOP
360 ++ ---help---
361 ++
362 ++ Select this for 3rd Gen Core processors in the Ivy Bridge family.
363 ++
364 ++ Enables -march=ivybridge
365 ++
366 ++config MHASWELL
367 ++ bool "Intel Haswell"
368 ++ select X86_P6_NOP
369 ++ ---help---
370 ++
371 ++ Select this for 4th Gen Core processors in the Haswell family.
372 ++
373 ++ Enables -march=haswell
374 ++
375 ++config MBROADWELL
376 ++ bool "Intel Broadwell"
377 ++ select X86_P6_NOP
378 ++ ---help---
379 ++
380 ++ Select this for 5th Gen Core processors in the Broadwell family.
381 ++
382 ++ Enables -march=broadwell
383 ++
384 ++config MSKYLAKE
385 ++ bool "Intel Skylake"
386 ++ select X86_P6_NOP
387 ++ ---help---
388 ++
389 ++ Select this for 6th Gen Core processors in the Skylake family.
390 ++
391 ++ Enables -march=skylake
392 ++
393 ++config MSKYLAKEX
394 ++ bool "Intel Skylake X"
395 ++ select X86_P6_NOP
396 ++ ---help---
397 ++
398 ++ Select this for 6th Gen Core processors in the Skylake X family.
399 ++
400 ++ Enables -march=skylake-avx512
401 +
402 + config GENERIC_CPU
403 + bool "Generic-x86-64"
404 +@@ -287,6 +444,19 @@ config GENERIC_CPU
405 + Generic x86-64 CPU.
406 + Run equally well on all x86-64 CPUs.
407 +
408 ++config MNATIVE
409 ++ bool "Native optimizations autodetected by GCC"
410 ++ ---help---
411 ++
412 ++ GCC 4.2 and above support -march=native, which automatically detects
413 ++ the optimum settings to use based on your processor. -march=native
414 ++ also detects and applies additional settings beyond -march specific
415 ++ to your CPU, (eg. -msse4). Unless you have a specific reason not to
416 ++ (e.g. distcc cross-compiling), you should probably be using
417 ++ -march=native rather than anything listed below.
418 ++
419 ++ Enables -march=native
420 ++
421 + endchoice
422 +
423 + config X86_GENERIC
424 +@@ -311,7 +481,7 @@ config X86_INTERNODE_CACHE_SHIFT
425 + config X86_L1_CACHE_SHIFT
426 + int
427 + default "7" if MPENTIUM4 || MPSC
428 +- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
429 ++ default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
430 + default "4" if MELAN || M486 || MGEODEGX1
431 + default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
432 +
433 +@@ -342,35 +512,36 @@ config X86_ALIGNMENT_16
434 +
435 + config X86_INTEL_USERCOPY
436 + def_bool y
437 +- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
438 ++ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK8SSE3 || MK7 || MEFFICEON || MCORE2 || MK10 || MBARCELONA || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MNATIVE
439 +
440 + config X86_USE_PPRO_CHECKSUM
441 + def_bool y
442 +- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
443 ++ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MATOM || MNATIVE
444 +
445 + config X86_USE_3DNOW
446 + def_bool y
447 + depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML
448 +
449 +-#
450 +-# P6_NOPs are a relatively minor optimization that require a family >=
451 +-# 6 processor, except that it is broken on certain VIA chips.
452 +-# Furthermore, AMD chips prefer a totally different sequence of NOPs
453 +-# (which work on all CPUs). In addition, it looks like Virtual PC
454 +-# does not understand them.
455 +-#
456 +-# As a result, disallow these if we're not compiling for X86_64 (these
457 +-# NOPs do work on all x86-64 capable chips); the list of processors in
458 +-# the right-hand clause are the cores that benefit from this optimization.
459 +-#
460 + config X86_P6_NOP
461 +- def_bool y
462 +- depends on X86_64
463 +- depends on (MCORE2 || MPENTIUM4 || MPSC)
464 ++ default n
465 ++ bool "Support for P6_NOPs on Intel chips"
466 ++ depends on (MCORE2 || MPENTIUM4 || MPSC || MATOM || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MNATIVE)
467 ++ ---help---
468 ++ P6_NOPs are a relatively minor optimization that require a family >=
469 ++ 6 processor, except that it is broken on certain VIA chips.
470 ++ Furthermore, AMD chips prefer a totally different sequence of NOPs
471 ++ (which work on all CPUs). In addition, it looks like Virtual PC
472 ++ does not understand them.
473 ++
474 ++ As a result, disallow these if we're not compiling for X86_64 (these
475 ++ NOPs do work on all x86-64 capable chips); the list of processors in
476 ++ the right-hand clause are the cores that benefit from this optimization.
477 ++
478 ++ Say Y if you have Intel CPU newer than Pentium Pro, N otherwise.
479 +
480 + config X86_TSC
481 + def_bool y
482 +- depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
483 ++ depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MNATIVE || MATOM) || X86_64
484 +
485 + config X86_CMPXCHG64
486 + def_bool y
487 +@@ -380,7 +551,7 @@ config X86_CMPXCHG64
488 + # generates cmov.
489 + config X86_CMOV
490 + def_bool y
491 +- depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
492 ++ depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
493 +
494 + config X86_MINIMUM_CPU_FAMILY
495 + int
496 +--- a/arch/x86/Makefile 2018-01-28 16:20:33.000000000 -0500
497 ++++ b/arch/x86/Makefile 2018-03-10 06:47:00.284240139 -0500
498 +@@ -124,13 +124,42 @@ else
499 + KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
500 +
501 + # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
502 ++ cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
503 + cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
504 ++ cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-mtune=k8)
505 ++ cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10)
506 ++ cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona)
507 ++ cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1)
508 ++ cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)
509 ++ cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1)
510 ++ cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2)
511 ++ cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3)
512 ++ cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4)
513 ++ cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1)
514 + cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
515 +
516 + cflags-$(CONFIG_MCORE2) += \
517 +- $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
518 +- cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
519 +- $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
520 ++ $(call cc-option,-march=core2,$(call cc-option,-mtune=core2))
521 ++ cflags-$(CONFIG_MNEHALEM) += \
522 ++ $(call cc-option,-march=nehalem,$(call cc-option,-mtune=nehalem))
523 ++ cflags-$(CONFIG_MWESTMERE) += \
524 ++ $(call cc-option,-march=westmere,$(call cc-option,-mtune=westmere))
525 ++ cflags-$(CONFIG_MSILVERMONT) += \
526 ++ $(call cc-option,-march=silvermont,$(call cc-option,-mtune=silvermont))
527 ++ cflags-$(CONFIG_MSANDYBRIDGE) += \
528 ++ $(call cc-option,-march=sandybridge,$(call cc-option,-mtune=sandybridge))
529 ++ cflags-$(CONFIG_MIVYBRIDGE) += \
530 ++ $(call cc-option,-march=ivybridge,$(call cc-option,-mtune=ivybridge))
531 ++ cflags-$(CONFIG_MHASWELL) += \
532 ++ $(call cc-option,-march=haswell,$(call cc-option,-mtune=haswell))
533 ++ cflags-$(CONFIG_MBROADWELL) += \
534 ++ $(call cc-option,-march=broadwell,$(call cc-option,-mtune=broadwell))
535 ++ cflags-$(CONFIG_MSKYLAKE) += \
536 ++ $(call cc-option,-march=skylake,$(call cc-option,-mtune=skylake))
537 ++ cflags-$(CONFIG_MSKYLAKEX) += \
538 ++ $(call cc-option,-march=skylake-avx512,$(call cc-option,-mtune=skylake-avx512))
539 ++ cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell) \
540 ++ $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic))
541 + cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
542 + KBUILD_CFLAGS += $(cflags-y)
543 +
544 +--- a/arch/x86/Makefile_32.cpu 2018-01-28 16:20:33.000000000 -0500
545 ++++ b/arch/x86/Makefile_32.cpu 2018-03-10 06:47:46.025992644 -0500
546 +@@ -23,7 +23,18 @@ cflags-$(CONFIG_MK6) += -march=k6
547 + # Please note, that patches that add -march=athlon-xp and friends are pointless.
548 + # They make zero difference whatsosever to performance at this time.
549 + cflags-$(CONFIG_MK7) += -march=athlon
550 ++cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
551 + cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon)
552 ++cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-march=athlon)
553 ++cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10,-march=athlon)
554 ++cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona,-march=athlon)
555 ++cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1,-march=athlon)
556 ++cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon)
557 ++cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1,-march=athlon)
558 ++cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2,-march=athlon)
559 ++cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3,-march=athlon)
560 ++cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4,-march=athlon)
561 ++cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1,-march=athlon)
562 + cflags-$(CONFIG_MCRUSOE) += -march=i686 -falign-functions=0 -falign-jumps=0 -falign-loops=0
563 + cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) -falign-functions=0 -falign-jumps=0 -falign-loops=0
564 + cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
565 +@@ -32,8 +43,17 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-
566 + cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
567 + cflags-$(CONFIG_MVIAC7) += -march=i686
568 + cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
569 +-cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
570 +- $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
571 ++cflags-$(CONFIG_MNEHALEM) += -march=i686 $(call tune,nehalem)
572 ++cflags-$(CONFIG_MWESTMERE) += -march=i686 $(call tune,westmere)
573 ++cflags-$(CONFIG_MSILVERMONT) += -march=i686 $(call tune,silvermont)
574 ++cflags-$(CONFIG_MSANDYBRIDGE) += -march=i686 $(call tune,sandybridge)
575 ++cflags-$(CONFIG_MIVYBRIDGE) += -march=i686 $(call tune,ivybridge)
576 ++cflags-$(CONFIG_MHASWELL) += -march=i686 $(call tune,haswell)
577 ++cflags-$(CONFIG_MBROADWELL) += -march=i686 $(call tune,broadwell)
578 ++cflags-$(CONFIG_MSKYLAKE) += -march=i686 $(call tune,skylake)
579 ++cflags-$(CONFIG_MSKYLAKEX) += -march=i686 $(call tune,skylake-avx512)
580 ++cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell,$(call cc-option,-march=core2,-march=i686)) \
581 ++ $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic))
582 +
583 + # AMD Elan support
584 + cflags-$(CONFIG_MELAN) += -march=i486
585
586 diff --git a/5011_enable-cpu-optimizations-for-gcc8.patch b/5011_enable-cpu-optimizations-for-gcc8.patch
587 new file mode 100644
588 index 0000000..bfd2065
589 --- /dev/null
590 +++ b/5011_enable-cpu-optimizations-for-gcc8.patch
591 @@ -0,0 +1,569 @@
592 +WARNING
593 +This patch works with gcc versions 8.1+ and with kernel version 4.13+ and should
594 +NOT be applied when compiling on older versions of gcc due to key name changes
595 +of the march flags introduced with the version 4.9 release of gcc.[1]
596 +
597 +Use the older version of this patch hosted on the same github for older
598 +versions of gcc.
599 +
600 +FEATURES
601 +This patch adds additional CPU options to the Linux kernel accessible under:
602 + Processor type and features --->
603 + Processor family --->
604 +
605 +The expanded microarchitectures include:
606 +* AMD Improved K8-family
607 +* AMD K10-family
608 +* AMD Family 10h (Barcelona)
609 +* AMD Family 14h (Bobcat)
610 +* AMD Family 16h (Jaguar)
611 +* AMD Family 15h (Bulldozer)
612 +* AMD Family 15h (Piledriver)
613 +* AMD Family 15h (Steamroller)
614 +* AMD Family 15h (Excavator)
615 +* AMD Family 17h (Zen)
616 +* Intel Silvermont low-power processors
617 +* Intel 1st Gen Core i3/i5/i7 (Nehalem)
618 +* Intel 1.5 Gen Core i3/i5/i7 (Westmere)
619 +* Intel 2nd Gen Core i3/i5/i7 (Sandybridge)
620 +* Intel 3rd Gen Core i3/i5/i7 (Ivybridge)
621 +* Intel 4th Gen Core i3/i5/i7 (Haswell)
622 +* Intel 5th Gen Core i3/i5/i7 (Broadwell)
623 +* Intel 6th Gen Core i3/i5/i7 (Skylake)
624 +* Intel 6th Gen Core i7/i9 (Skylake X)
625 +* Intel 8th Gen Core i3/i5/i7 (Cannon Lake)
626 +* Intel 8th Gen Core i7/i9 (Ice Lake)
627 +
628 +It also offers to compile passing the 'native' option which, "selects the CPU
629 +to generate code for at compilation time by determining the processor type of
630 +the compiling machine. Using -march=native enables all instruction subsets
631 +supported by the local machine and will produce code optimized for the local
632 +machine under the constraints of the selected instruction set."[3]
633 +
634 +MINOR NOTES
635 +This patch also changes 'atom' to 'bonnell' in accordance with the gcc v4.9
636 +changes. Note that upstream is using the deprecated 'match=atom' flags when I
637 +believe it should use the newer 'march=bonnell' flag for atom processors.[2]
638 +
639 +It is not recommended to compile on Atom-CPUs with the 'native' option.[4] The
640 +recommendation is to use the 'atom' option instead.
641 +
642 +BENEFITS
643 +Small but real speed increases are measurable using a make endpoint comparing
644 +a generic kernel to one built with one of the respective microarchs.
645 +
646 +See the following experimental evidence supporting this statement:
647 +https://github.com/graysky2/kernel_gcc_patch
648 +
649 +REQUIREMENTS
650 +linux version >=4.20
651 +gcc version >=8.1
652 +
653 +ACKNOWLEDGMENTS
654 +This patch builds on the seminal work by Jeroen.[5]
655 +
656 +REFERENCES
657 +1. https://gcc.gnu.org/gcc-4.9/changes.html
658 +2. https://bugzilla.kernel.org/show_bug.cgi?id=77461
659 +3. https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html
660 +4. https://github.com/graysky2/kernel_gcc_patch/issues/15
661 +5. http://www.linuxforge.net/docs/linux/linux-gcc.php
662 +
663 +--- a/arch/x86/Makefile_32.cpu 2019-02-22 09:22:03.426937735 -0500
664 ++++ b/arch/x86/Makefile_32.cpu 2019-02-22 09:37:58.680968580 -0500
665 +@@ -23,7 +23,18 @@ cflags-$(CONFIG_MK6) += -march=k6
666 + # Please note, that patches that add -march=athlon-xp and friends are pointless.
667 + # They make zero difference whatsosever to performance at this time.
668 + cflags-$(CONFIG_MK7) += -march=athlon
669 ++cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
670 + cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon)
671 ++cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-march=athlon)
672 ++cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10,-march=athlon)
673 ++cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona,-march=athlon)
674 ++cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1,-march=athlon)
675 ++cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon)
676 ++cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1,-march=athlon)
677 ++cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2,-march=athlon)
678 ++cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3,-march=athlon)
679 ++cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4,-march=athlon)
680 ++cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1,-march=athlon)
681 + cflags-$(CONFIG_MCRUSOE) += -march=i686 -falign-functions=0 -falign-jumps=0 -falign-loops=0
682 + cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) -falign-functions=0 -falign-jumps=0 -falign-loops=0
683 + cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
684 +@@ -32,9 +43,20 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-
685 + cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
686 + cflags-$(CONFIG_MVIAC7) += -march=i686
687 + cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
688 +-cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
689 +- $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
690 +-
691 ++cflags-$(CONFIG_MNEHALEM) += -march=i686 $(call tune,nehalem)
692 ++cflags-$(CONFIG_MWESTMERE) += -march=i686 $(call tune,westmere)
693 ++cflags-$(CONFIG_MSILVERMONT) += -march=i686 $(call tune,silvermont)
694 ++cflags-$(CONFIG_MSANDYBRIDGE) += -march=i686 $(call tune,sandybridge)
695 ++cflags-$(CONFIG_MIVYBRIDGE) += -march=i686 $(call tune,ivybridge)
696 ++cflags-$(CONFIG_MHASWELL) += -march=i686 $(call tune,haswell)
697 ++cflags-$(CONFIG_MBROADWELL) += -march=i686 $(call tune,broadwell)
698 ++cflags-$(CONFIG_MSKYLAKE) += -march=i686 $(call tune,skylake)
699 ++cflags-$(CONFIG_MSKYLAKEX) += -march=i686 $(call tune,skylake-avx512)
700 ++cflags-$(CONFIG_MCANNONLAKE) += -march=i686 $(call tune,cannonlake)
701 ++cflags-$(CONFIG_MICELAKE) += -march=i686 $(call tune,icelake)
702 ++cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell,$(call cc-option,-march=core2,-march=i686)) \
703 ++ $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic))
704 ++
705 + # AMD Elan support
706 + cflags-$(CONFIG_MELAN) += -march=i486
707 +
708 +--- a/arch/x86/Kconfig.cpu 2019-02-22 09:22:11.576958595 -0500
709 ++++ b/arch/x86/Kconfig.cpu 2019-02-22 09:34:16.490003911 -0500
710 +@@ -116,6 +116,7 @@ config MPENTIUMM
711 + config MPENTIUM4
712 + bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon"
713 + depends on X86_32
714 ++ select X86_P6_NOP
715 + ---help---
716 + Select this for Intel Pentium 4 chips. This includes the
717 + Pentium 4, Pentium D, P4-based Celeron and Xeon, and
718 +@@ -150,7 +151,7 @@ config MPENTIUM4
719 +
720 +
721 + config MK6
722 +- bool "K6/K6-II/K6-III"
723 ++ bool "AMD K6/K6-II/K6-III"
724 + depends on X86_32
725 + ---help---
726 + Select this for an AMD K6-family processor. Enables use of
727 +@@ -158,7 +159,7 @@ config MK6
728 + flags to GCC.
729 +
730 + config MK7
731 +- bool "Athlon/Duron/K7"
732 ++ bool "AMD Athlon/Duron/K7"
733 + depends on X86_32
734 + ---help---
735 + Select this for an AMD Athlon K7-family processor. Enables use of
736 +@@ -166,11 +167,81 @@ config MK7
737 + flags to GCC.
738 +
739 + config MK8
740 +- bool "Opteron/Athlon64/Hammer/K8"
741 ++ bool "AMD Opteron/Athlon64/Hammer/K8"
742 + ---help---
743 + Select this for an AMD Opteron or Athlon64 Hammer-family processor.
744 + Enables use of some extended instructions, and passes appropriate
745 + optimization flags to GCC.
746 ++config MK8SSE3
747 ++ bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3"
748 ++ ---help---
749 ++ Select this for improved AMD Opteron or Athlon64 Hammer-family processors.
750 ++ Enables use of some extended instructions, and passes appropriate
751 ++ optimization flags to GCC.
752 ++
753 ++config MK10
754 ++ bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"
755 ++ ---help---
756 ++ Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,
757 ++ Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor.
758 ++ Enables use of some extended instructions, and passes appropriate
759 ++ optimization flags to GCC.
760 ++
761 ++config MBARCELONA
762 ++ bool "AMD Barcelona"
763 ++ ---help---
764 ++ Select this for AMD Family 10h Barcelona processors.
765 ++
766 ++ Enables -march=barcelona
767 ++
768 ++config MBOBCAT
769 ++ bool "AMD Bobcat"
770 ++ ---help---
771 ++ Select this for AMD Family 14h Bobcat processors.
772 ++
773 ++ Enables -march=btver1
774 ++
775 ++config MJAGUAR
776 ++ bool "AMD Jaguar"
777 ++ ---help---
778 ++ Select this for AMD Family 16h Jaguar processors.
779 ++
780 ++ Enables -march=btver2
781 ++
782 ++config MBULLDOZER
783 ++ bool "AMD Bulldozer"
784 ++ ---help---
785 ++ Select this for AMD Family 15h Bulldozer processors.
786 ++
787 ++ Enables -march=bdver1
788 ++
789 ++config MPILEDRIVER
790 ++ bool "AMD Piledriver"
791 ++ ---help---
792 ++ Select this for AMD Family 15h Piledriver processors.
793 ++
794 ++ Enables -march=bdver2
795 ++
796 ++config MSTEAMROLLER
797 ++ bool "AMD Steamroller"
798 ++ ---help---
799 ++ Select this for AMD Family 15h Steamroller processors.
800 ++
801 ++ Enables -march=bdver3
802 ++
803 ++config MEXCAVATOR
804 ++ bool "AMD Excavator"
805 ++ ---help---
806 ++ Select this for AMD Family 15h Excavator processors.
807 ++
808 ++ Enables -march=bdver4
809 ++
810 ++config MZEN
811 ++ bool "AMD Zen"
812 ++ ---help---
813 ++ Select this for AMD Family 17h Zen processors.
814 ++
815 ++ Enables -march=znver1
816 +
817 + config MCRUSOE
818 + bool "Crusoe"
819 +@@ -253,6 +324,7 @@ config MVIAC7
820 +
821 + config MPSC
822 + bool "Intel P4 / older Netburst based Xeon"
823 ++ select X86_P6_NOP
824 + depends on X86_64
825 + ---help---
826 + Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
827 +@@ -262,23 +334,126 @@ config MPSC
828 + using the cpu family field
829 + in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
830 +
831 ++config MATOM
832 ++ bool "Intel Atom"
833 ++ select X86_P6_NOP
834 ++ ---help---
835 ++
836 ++ Select this for the Intel Atom platform. Intel Atom CPUs have an
837 ++ in-order pipelining architecture and thus can benefit from
838 ++ accordingly optimized code. Use a recent GCC with specific Atom
839 ++ support in order to fully benefit from selecting this option.
840 ++
841 + config MCORE2
842 +- bool "Core 2/newer Xeon"
843 ++ bool "Intel Core 2"
844 ++ select X86_P6_NOP
845 ++
846 + ---help---
847 +
848 + Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
849 + 53xx) CPUs. You can distinguish newer from older Xeons by the CPU
850 + family in /proc/cpuinfo. Newer ones have 6 and older ones 15
851 + (not a typo)
852 ++ Enables -march=core2
853 +
854 +-config MATOM
855 +- bool "Intel Atom"
856 ++config MNEHALEM
857 ++ bool "Intel Nehalem"
858 ++ select X86_P6_NOP
859 + ---help---
860 +
861 +- Select this for the Intel Atom platform. Intel Atom CPUs have an
862 +- in-order pipelining architecture and thus can benefit from
863 +- accordingly optimized code. Use a recent GCC with specific Atom
864 +- support in order to fully benefit from selecting this option.
865 ++ Select this for 1st Gen Core processors in the Nehalem family.
866 ++
867 ++ Enables -march=nehalem
868 ++
869 ++config MWESTMERE
870 ++ bool "Intel Westmere"
871 ++ select X86_P6_NOP
872 ++ ---help---
873 ++
874 ++ Select this for the Intel Westmere formerly Nehalem-C family.
875 ++
876 ++ Enables -march=westmere
877 ++
878 ++config MSILVERMONT
879 ++ bool "Intel Silvermont"
880 ++ select X86_P6_NOP
881 ++ ---help---
882 ++
883 ++ Select this for the Intel Silvermont platform.
884 ++
885 ++ Enables -march=silvermont
886 ++
887 ++config MSANDYBRIDGE
888 ++ bool "Intel Sandy Bridge"
889 ++ select X86_P6_NOP
890 ++ ---help---
891 ++
892 ++ Select this for 2nd Gen Core processors in the Sandy Bridge family.
893 ++
894 ++ Enables -march=sandybridge
895 ++
896 ++config MIVYBRIDGE
897 ++ bool "Intel Ivy Bridge"
898 ++ select X86_P6_NOP
899 ++ ---help---
900 ++
901 ++ Select this for 3rd Gen Core processors in the Ivy Bridge family.
902 ++
903 ++ Enables -march=ivybridge
904 ++
905 ++config MHASWELL
906 ++ bool "Intel Haswell"
907 ++ select X86_P6_NOP
908 ++ ---help---
909 ++
910 ++ Select this for 4th Gen Core processors in the Haswell family.
911 ++
912 ++ Enables -march=haswell
913 ++
914 ++config MBROADWELL
915 ++ bool "Intel Broadwell"
916 ++ select X86_P6_NOP
917 ++ ---help---
918 ++
919 ++ Select this for 5th Gen Core processors in the Broadwell family.
920 ++
921 ++ Enables -march=broadwell
922 ++
923 ++config MSKYLAKE
924 ++ bool "Intel Skylake"
925 ++ select X86_P6_NOP
926 ++ ---help---
927 ++
928 ++ Select this for 6th Gen Core processors in the Skylake family.
929 ++
930 ++ Enables -march=skylake
931 ++
932 ++config MSKYLAKEX
933 ++ bool "Intel Skylake X"
934 ++ select X86_P6_NOP
935 ++ ---help---
936 ++
937 ++ Select this for 6th Gen Core processors in the Skylake X family.
938 ++
939 ++ Enables -march=skylake-avx512
940 ++
941 ++config MCANNONLAKE
942 ++ bool "Intel Cannon Lake"
943 ++ select X86_P6_NOP
944 ++ ---help---
945 ++
946 ++ Select this for 8th Gen Core processors
947 ++
948 ++ Enables -march=cannonlake
949 ++
950 ++config MICELAKE
951 ++ bool "Intel Ice Lake"
952 ++ select X86_P6_NOP
953 ++ ---help---
954 ++
955 ++ Select this for 8th Gen Core processors in the Ice Lake family.
956 ++
957 ++ Enables -march=icelake
958 +
959 + config GENERIC_CPU
960 + bool "Generic-x86-64"
961 +@@ -287,6 +462,19 @@ config GENERIC_CPU
962 + Generic x86-64 CPU.
963 + Run equally well on all x86-64 CPUs.
964 +
965 ++config MNATIVE
966 ++ bool "Native optimizations autodetected by GCC"
967 ++ ---help---
968 ++
969 ++ GCC 4.2 and above support -march=native, which automatically detects
970 ++ the optimum settings to use based on your processor. -march=native
971 ++ also detects and applies additional settings beyond -march specific
972 ++ to your CPU, (eg. -msse4). Unless you have a specific reason not to
973 ++ (e.g. distcc cross-compiling), you should probably be using
974 ++ -march=native rather than anything listed below.
975 ++
976 ++ Enables -march=native
977 ++
978 + endchoice
979 +
980 + config X86_GENERIC
981 +@@ -311,7 +499,7 @@ config X86_INTERNODE_CACHE_SHIFT
982 + config X86_L1_CACHE_SHIFT
983 + int
984 + default "7" if MPENTIUM4 || MPSC
985 +- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
986 ++ default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
987 + default "4" if MELAN || M486 || MGEODEGX1
988 + default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
989 +
990 +@@ -329,39 +517,40 @@ config X86_ALIGNMENT_16
991 +
992 + config X86_INTEL_USERCOPY
993 + def_bool y
994 +- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
995 ++ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK8SSE3 || MK7 || MEFFICEON || MCORE2 || MK10 || MBARCELONA || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MNATIVE
996 +
997 + config X86_USE_PPRO_CHECKSUM
998 + def_bool y
999 +- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
1000 ++ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MATOM || MNATIVE
1001 +
1002 + config X86_USE_3DNOW
1003 + def_bool y
1004 + depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML
1005 +
1006 +-#
1007 +-# P6_NOPs are a relatively minor optimization that require a family >=
1008 +-# 6 processor, except that it is broken on certain VIA chips.
1009 +-# Furthermore, AMD chips prefer a totally different sequence of NOPs
1010 +-# (which work on all CPUs). In addition, it looks like Virtual PC
1011 +-# does not understand them.
1012 +-#
1013 +-# As a result, disallow these if we're not compiling for X86_64 (these
1014 +-# NOPs do work on all x86-64 capable chips); the list of processors in
1015 +-# the right-hand clause are the cores that benefit from this optimization.
1016 +-#
1017 + config X86_P6_NOP
1018 +- def_bool y
1019 +- depends on X86_64
1020 +- depends on (MCORE2 || MPENTIUM4 || MPSC)
1021 ++ default n
1022 ++ bool "Support for P6_NOPs on Intel chips"
1023 ++ depends on (MCORE2 || MPENTIUM4 || MPSC || MATOM || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MNATIVE)
1024 ++ ---help---
1025 ++ P6_NOPs are a relatively minor optimization that require a family >=
1026 ++ 6 processor, except that it is broken on certain VIA chips.
1027 ++ Furthermore, AMD chips prefer a totally different sequence of NOPs
1028 ++ (which work on all CPUs). In addition, it looks like Virtual PC
1029 ++ does not understand them.
1030 ++
1031 ++ As a result, disallow these if we're not compiling for X86_64 (these
1032 ++ NOPs do work on all x86-64 capable chips); the list of processors in
1033 ++ the right-hand clause are the cores that benefit from this optimization.
1034 +
1035 ++ Say Y if you have Intel CPU newer than Pentium Pro, N otherwise.
1036 ++
1037 + config X86_TSC
1038 + def_bool y
1039 +- depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
1040 ++ depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MNATIVE || MATOM) || X86_64
1041 +
1042 + config X86_CMPXCHG64
1043 + def_bool y
1044 +- depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8
1045 ++ depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
1046 +
1047 + # this should be set for all -march=.. options where the compiler
1048 + # generates cmov.
1049 +--- a/arch/x86/Makefile 2019-02-22 09:21:58.196924367 -0500
1050 ++++ b/arch/x86/Makefile 2019-02-22 09:36:27.310577832 -0500
1051 +@@ -118,13 +118,46 @@ else
1052 + KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
1053 +
1054 + # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
1055 ++ cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
1056 + cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
1057 ++ cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-mtune=k8)
1058 ++ cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10)
1059 ++ cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona)
1060 ++ cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1)
1061 ++ cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)
1062 ++ cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1)
1063 ++ cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2)
1064 ++ cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3)
1065 ++ cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4)
1066 ++ cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1)
1067 + cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
1068 +
1069 + cflags-$(CONFIG_MCORE2) += \
1070 +- $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
1071 +- cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
1072 +- $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
1073 ++ $(call cc-option,-march=core2,$(call cc-option,-mtune=core2))
1074 ++ cflags-$(CONFIG_MNEHALEM) += \
1075 ++ $(call cc-option,-march=nehalem,$(call cc-option,-mtune=nehalem))
1076 ++ cflags-$(CONFIG_MWESTMERE) += \
1077 ++ $(call cc-option,-march=westmere,$(call cc-option,-mtune=westmere))
1078 ++ cflags-$(CONFIG_MSILVERMONT) += \
1079 ++ $(call cc-option,-march=silvermont,$(call cc-option,-mtune=silvermont))
1080 ++ cflags-$(CONFIG_MSANDYBRIDGE) += \
1081 ++ $(call cc-option,-march=sandybridge,$(call cc-option,-mtune=sandybridge))
1082 ++ cflags-$(CONFIG_MIVYBRIDGE) += \
1083 ++ $(call cc-option,-march=ivybridge,$(call cc-option,-mtune=ivybridge))
1084 ++ cflags-$(CONFIG_MHASWELL) += \
1085 ++ $(call cc-option,-march=haswell,$(call cc-option,-mtune=haswell))
1086 ++ cflags-$(CONFIG_MBROADWELL) += \
1087 ++ $(call cc-option,-march=broadwell,$(call cc-option,-mtune=broadwell))
1088 ++ cflags-$(CONFIG_MSKYLAKE) += \
1089 ++ $(call cc-option,-march=skylake,$(call cc-option,-mtune=skylake))
1090 ++ cflags-$(CONFIG_MSKYLAKEX) += \
1091 ++ $(call cc-option,-march=skylake-avx512,$(call cc-option,-mtune=skylake-avx512))
1092 ++ cflags-$(CONFIG_MCANNONLAKE) += \
1093 ++ $(call cc-option,-march=cannonlake,$(call cc-option,-mtune=cannonlake))
1094 ++ cflags-$(CONFIG_MICELAKE) += \
1095 ++ $(call cc-option,-march=icelake,$(call cc-option,-mtune=icelake))
1096 ++ cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell) \
1097 ++ $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic))
1098 + cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
1099 + KBUILD_CFLAGS += $(cflags-y)
1100 +
1101 +--- a/arch/x86/include/asm/module.h 2019-02-22 09:22:26.726997480 -0500
1102 ++++ b/arch/x86/include/asm/module.h 2019-02-22 09:40:04.231493392 -0500
1103 +@@ -25,6 +25,30 @@ struct mod_arch_specific {
1104 + #define MODULE_PROC_FAMILY "586MMX "
1105 + #elif defined CONFIG_MCORE2
1106 + #define MODULE_PROC_FAMILY "CORE2 "
1107 ++#elif defined CONFIG_MNATIVE
1108 ++#define MODULE_PROC_FAMILY "NATIVE "
1109 ++#elif defined CONFIG_MNEHALEM
1110 ++#define MODULE_PROC_FAMILY "NEHALEM "
1111 ++#elif defined CONFIG_MWESTMERE
1112 ++#define MODULE_PROC_FAMILY "WESTMERE "
1113 ++#elif defined CONFIG_MSILVERMONT
1114 ++#define MODULE_PROC_FAMILY "SILVERMONT "
1115 ++#elif defined CONFIG_MSANDYBRIDGE
1116 ++#define MODULE_PROC_FAMILY "SANDYBRIDGE "
1117 ++#elif defined CONFIG_MIVYBRIDGE
1118 ++#define MODULE_PROC_FAMILY "IVYBRIDGE "
1119 ++#elif defined CONFIG_MHASWELL
1120 ++#define MODULE_PROC_FAMILY "HASWELL "
1121 ++#elif defined CONFIG_MBROADWELL
1122 ++#define MODULE_PROC_FAMILY "BROADWELL "
1123 ++#elif defined CONFIG_MSKYLAKE
1124 ++#define MODULE_PROC_FAMILY "SKYLAKE "
1125 ++#elif defined CONFIG_MSKYLAKEX
1126 ++#define MODULE_PROC_FAMILY "SKYLAKEX "
1127 ++#elif defined CONFIG_MCANNONLAKE
1128 ++#define MODULE_PROC_FAMILY "CANNONLAKE "
1129 ++#elif defined CONFIG_MICELAKE
1130 ++#define MODULE_PROC_FAMILY "ICELAKE "
1131 + #elif defined CONFIG_MATOM
1132 + #define MODULE_PROC_FAMILY "ATOM "
1133 + #elif defined CONFIG_M686
1134 +@@ -43,6 +67,26 @@ struct mod_arch_specific {
1135 + #define MODULE_PROC_FAMILY "K7 "
1136 + #elif defined CONFIG_MK8
1137 + #define MODULE_PROC_FAMILY "K8 "
1138 ++#elif defined CONFIG_MK8SSE3
1139 ++#define MODULE_PROC_FAMILY "K8SSE3 "
1140 ++#elif defined CONFIG_MK10
1141 ++#define MODULE_PROC_FAMILY "K10 "
1142 ++#elif defined CONFIG_MBARCELONA
1143 ++#define MODULE_PROC_FAMILY "BARCELONA "
1144 ++#elif defined CONFIG_MBOBCAT
1145 ++#define MODULE_PROC_FAMILY "BOBCAT "
1146 ++#elif defined CONFIG_MBULLDOZER
1147 ++#define MODULE_PROC_FAMILY "BULLDOZER "
1148 ++#elif defined CONFIG_MPILEDRIVER
1149 ++#define MODULE_PROC_FAMILY "PILEDRIVER "
1150 ++#elif defined CONFIG_MSTEAMROLLER
1151 ++#define MODULE_PROC_FAMILY "STEAMROLLER "
1152 ++#elif defined CONFIG_MJAGUAR
1153 ++#define MODULE_PROC_FAMILY "JAGUAR "
1154 ++#elif defined CONFIG_MEXCAVATOR
1155 ++#define MODULE_PROC_FAMILY "EXCAVATOR "
1156 ++#elif defined CONFIG_MZEN
1157 ++#define MODULE_PROC_FAMILY "ZEN "
1158 + #elif defined CONFIG_MELAN
1159 + #define MODULE_PROC_FAMILY "ELAN "
1160 + #elif defined CONFIG_MCRUSOE