Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:3.15 commit in: /
Date: Mon, 23 Jun 2014 16:53:31
Message-Id: 1403542392.8c5cd709043492aca480863b56254137aa71f8e9.mpagano@gentoo
1 commit: 8c5cd709043492aca480863b56254137aa71f8e9
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Mon Jun 23 16:53:12 2014 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Mon Jun 23 16:53:12 2014 +0000
6 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/linux-patches.git;a=commit;h=8c5cd709
7
8 Adding cpu optimization patch
9
10 ---
11 0000_README | 4 +
12 ...able-additional-cpu-optimizations-for-gcc.patch | 327 +++++++++++++++++++++
13 2 files changed, 331 insertions(+)
14
15 diff --git a/0000_README b/0000_README
16 index 5737a82..019dbd2 100644
17 --- a/0000_README
18 +++ b/0000_README
19 @@ -79,6 +79,10 @@ Patch: 4567_distro-Gentoo-Kconfig.patch
20 From: Tom Wijsman <TomWij@g.o>
21 Desc: Add Gentoo Linux support config settings and defaults.
22
23 +Patch: 5000_enable-additional-cpu-optimizations-for-gcc.patch
24 +From: https://github.com/graysky2/kernel_gcc_patch/
25 +Desc: Kernel patch enables gcc optimizations for additional CPUs.
26 +
27 Patch: 5001_BFQ-1-block-cgroups-kconfig-build-bits-for-v7r4-3.15.patch
28 From: http://algo.ing.unimo.it/people/paolo/disk_sched/
29 Desc: BFQ v7r2 patch 1 for 3.14: Build, cgroups and kconfig bits
30
31 diff --git a/5000_enable-additional-cpu-optimizations-for-gcc.patch b/5000_enable-additional-cpu-optimizations-for-gcc.patch
32 new file mode 100644
33 index 0000000..f7ab6f0
34 --- /dev/null
35 +++ b/5000_enable-additional-cpu-optimizations-for-gcc.patch
36 @@ -0,0 +1,327 @@
37 +This patch has been tested on and known to work with kernel versions from 3.2
38 +up to the latest git version (pulled on 12/14/2013).
39 +
40 +This patch will expand the number of microarchitectures to include new
41 +processors including: AMD K10-family, AMD Family 10h (Barcelona), AMD Family
42 +14h (Bobcat), AMD Family 15h (Bulldozer), AMD Family 15h (Piledriver), AMD
43 +Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7 (Nehalem), Intel 2nd Gen Core
44 +i3/i5/i7 (Sandybridge), Intel 3rd Gen Core i3/i5/i7 (Ivybridge), and Intel 4th
45 +Gen Core i3/i5/i7 (Haswell). It also offers the compiler the 'native' flag.
46 +
47 +Small but real speed increases are measurable using a make endpoint comparing
48 +a generic kernel to one built with one of the respective microarchs.
49 +
50 +See the following experimental evidence supporting this statement:
51 +https://github.com/graysky2/kernel_gcc_patch
52 +
53 +REQUIREMENTS
54 +linux version >=3.15
55 +gcc version <4.9
56 +
57 +---
58 +diff -uprN a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
59 +--- a/arch/x86/include/asm/module.h 2013-11-03 18:41:51.000000000 -0500
60 ++++ b/arch/x86/include/asm/module.h 2013-12-15 06:21:24.351122516 -0500
61 +@@ -15,6 +15,16 @@
62 + #define MODULE_PROC_FAMILY "586MMX "
63 + #elif defined CONFIG_MCORE2
64 + #define MODULE_PROC_FAMILY "CORE2 "
65 ++#elif defined CONFIG_MNATIVE
66 ++#define MODULE_PROC_FAMILY "NATIVE "
67 ++#elif defined CONFIG_MCOREI7
68 ++#define MODULE_PROC_FAMILY "COREI7 "
69 ++#elif defined CONFIG_MCOREI7AVX
70 ++#define MODULE_PROC_FAMILY "COREI7AVX "
71 ++#elif defined CONFIG_MCOREAVXI
72 ++#define MODULE_PROC_FAMILY "COREAVXI "
73 ++#elif defined CONFIG_MCOREAVX2
74 ++#define MODULE_PROC_FAMILY "COREAVX2 "
75 + #elif defined CONFIG_MATOM
76 + #define MODULE_PROC_FAMILY "ATOM "
77 + #elif defined CONFIG_M686
78 +@@ -33,6 +43,18 @@
79 + #define MODULE_PROC_FAMILY "K7 "
80 + #elif defined CONFIG_MK8
81 + #define MODULE_PROC_FAMILY "K8 "
82 ++#elif defined CONFIG_MK10
83 ++#define MODULE_PROC_FAMILY "K10 "
84 ++#elif defined CONFIG_MBARCELONA
85 ++#define MODULE_PROC_FAMILY "BARCELONA "
86 ++#elif defined CONFIG_MBOBCAT
87 ++#define MODULE_PROC_FAMILY "BOBCAT "
88 ++#elif defined CONFIG_MBULLDOZER
89 ++#define MODULE_PROC_FAMILY "BULLDOZER "
90 ++#elif defined CONFIG_MPILEDRIVER
91 ++#define MODULE_PROC_FAMILY "PILEDRIVER "
92 ++#elif defined CONFIG_MJAGUAR
93 ++#define MODULE_PROC_FAMILY "JAGUAR "
94 + #elif defined CONFIG_MELAN
95 + #define MODULE_PROC_FAMILY "ELAN "
96 + #elif defined CONFIG_MCRUSOE
97 +diff -uprN a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
98 +--- a/arch/x86/Kconfig.cpu 2013-11-03 18:41:51.000000000 -0500
99 ++++ b/arch/x86/Kconfig.cpu 2013-12-15 06:21:24.351122516 -0500
100 +@@ -139,7 +139,7 @@ config MPENTIUM4
101 +
102 +
103 + config MK6
104 +- bool "K6/K6-II/K6-III"
105 ++ bool "AMD K6/K6-II/K6-III"
106 + depends on X86_32
107 + ---help---
108 + Select this for an AMD K6-family processor. Enables use of
109 +@@ -147,7 +147,7 @@ config MK6
110 + flags to GCC.
111 +
112 + config MK7
113 +- bool "Athlon/Duron/K7"
114 ++ bool "AMD Athlon/Duron/K7"
115 + depends on X86_32
116 + ---help---
117 + Select this for an AMD Athlon K7-family processor. Enables use of
118 +@@ -155,12 +155,55 @@ config MK7
119 + flags to GCC.
120 +
121 + config MK8
122 +- bool "Opteron/Athlon64/Hammer/K8"
123 ++ bool "AMD Opteron/Athlon64/Hammer/K8"
124 + ---help---
125 + Select this for an AMD Opteron or Athlon64 Hammer-family processor.
126 + Enables use of some extended instructions, and passes appropriate
127 + optimization flags to GCC.
128 +
129 ++config MK10
130 ++ bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"
131 ++ ---help---
132 ++ Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,
133 ++ Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor.
134 ++ Enables use of some extended instructions, and passes appropriate
135 ++ optimization flags to GCC.
136 ++
137 ++config MBARCELONA
138 ++ bool "AMD Barcelona"
139 ++ ---help---
140 ++ Select this for AMD Barcelona and newer processors.
141 ++
142 ++ Enables -march=barcelona
143 ++
144 ++config MBOBCAT
145 ++ bool "AMD Bobcat"
146 ++ ---help---
147 ++ Select this for AMD Bobcat processors.
148 ++
149 ++ Enables -march=btver1
150 ++
151 ++config MBULLDOZER
152 ++ bool "AMD Bulldozer"
153 ++ ---help---
154 ++ Select this for AMD Bulldozer processors.
155 ++
156 ++ Enables -march=bdver1
157 ++
158 ++config MPILEDRIVER
159 ++ bool "AMD Piledriver"
160 ++ ---help---
161 ++ Select this for AMD Piledriver processors.
162 ++
163 ++ Enables -march=bdver2
164 ++
165 ++config MJAGUAR
166 ++ bool "AMD Jaguar"
167 ++ ---help---
168 ++ Select this for AMD Jaguar processors.
169 ++
170 ++ Enables -march=btver2
171 ++
172 + config MCRUSOE
173 + bool "Crusoe"
174 + depends on X86_32
175 +@@ -251,8 +294,17 @@ config MPSC
176 + using the cpu family field
177 + in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
178 +
179 ++config MATOM
180 ++ bool "Intel Atom"
181 ++ ---help---
182 ++
183 ++ Select this for the Intel Atom platform. Intel Atom CPUs have an
184 ++ in-order pipelining architecture and thus can benefit from
185 ++ accordingly optimized code. Use a recent GCC with specific Atom
186 ++ support in order to fully benefit from selecting this option.
187 ++
188 + config MCORE2
189 +- bool "Core 2/newer Xeon"
190 ++ bool "Intel Core 2"
191 + ---help---
192 +
193 + Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
194 +@@ -260,14 +312,40 @@ config MCORE2
195 + family in /proc/cpuinfo. Newer ones have 6 and older ones 15
196 + (not a typo)
197 +
198 +-config MATOM
199 +- bool "Intel Atom"
200 ++ Enables -march=core2
201 ++
202 ++config MCOREI7
203 ++ bool "Intel Core i7"
204 + ---help---
205 +
206 +- Select this for the Intel Atom platform. Intel Atom CPUs have an
207 +- in-order pipelining architecture and thus can benefit from
208 +- accordingly optimized code. Use a recent GCC with specific Atom
209 +- support in order to fully benefit from selecting this option.
210 ++ Select this for the Intel Nehalem platform. Intel Nehalem proecessors
211 ++ include Core i3, i5, i7, Xeon: 34xx, 35xx, 55xx, 56xx, 75xx processors.
212 ++
213 ++ Enables -march=corei7
214 ++
215 ++config MCOREI7AVX
216 ++ bool "Intel Core 2nd Gen AVX"
217 ++ ---help---
218 ++
219 ++ Select this for 2nd Gen Core processors including Sandy Bridge.
220 ++
221 ++ Enables -march=corei7-avx
222 ++
223 ++config MCOREAVXI
224 ++ bool "Intel Core 3rd Gen AVX"
225 ++ ---help---
226 ++
227 ++ Select this for 3rd Gen Core processors including Ivy Bridge.
228 ++
229 ++ Enables -march=core-avx-i
230 ++
231 ++config MCOREAVX2
232 ++ bool "Intel Core AVX2"
233 ++ ---help---
234 ++
235 ++ Select this for AVX2 enabled processors including Haswell.
236 ++
237 ++ Enables -march=core-avx2
238 +
239 + config GENERIC_CPU
240 + bool "Generic-x86-64"
241 +@@ -276,6 +354,19 @@ config GENERIC_CPU
242 + Generic x86-64 CPU.
243 + Run equally well on all x86-64 CPUs.
244 +
245 ++config MNATIVE
246 ++ bool "Native optimizations autodetected by GCC"
247 ++ ---help---
248 ++
249 ++ GCC 4.2 and above support -march=native, which automatically detects
250 ++ the optimum settings to use based on your processor. -march=native
251 ++ also detects and applies additional settings beyond -march specific
252 ++ to your CPU, (eg. -msse4). Unless you have a specific reason not to
253 ++ (e.g. distcc cross-compiling), you should probably be using
254 ++ -march=native rather than anything listed below.
255 ++
256 ++ Enables -march=native
257 ++
258 + endchoice
259 +
260 + config X86_GENERIC
261 +@@ -300,7 +391,7 @@ config X86_INTERNODE_CACHE_SHIFT
262 + config X86_L1_CACHE_SHIFT
263 + int
264 + default "7" if MPENTIUM4 || MPSC
265 +- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
266 ++ default "6" if MK7 || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MPENTIUMM || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MVIAC7 || X86_GENERIC || MNATIVE || GENERIC_CPU
267 + default "4" if MELAN || M486 || MGEODEGX1
268 + default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
269 +
270 +@@ -331,11 +422,11 @@ config X86_ALIGNMENT_16
271 +
272 + config X86_INTEL_USERCOPY
273 + def_bool y
274 +- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
275 ++ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || MNATIVE || X86_GENERIC || MK8 || MK7 || MK10 || MBARCELONA || MEFFICEON || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2
276 +
277 + config X86_USE_PPRO_CHECKSUM
278 + def_bool y
279 +- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
280 ++ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MNATIVE
281 +
282 + config X86_USE_3DNOW
283 + def_bool y
284 +@@ -363,17 +454,17 @@ config X86_P6_NOP
285 +
286 + config X86_TSC
287 + def_bool y
288 +- depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
289 ++ depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7-AVX || MATOM) || X86_64 || MNATIVE
290 +
291 + config X86_CMPXCHG64
292 + def_bool y
293 +- depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
294 ++ depends on X86_PAE || X86_64 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM || MNATIVE
295 +
296 + # this should be set for all -march=.. options where the compiler
297 + # generates cmov.
298 + config X86_CMOV
299 + def_bool y
300 +- depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
301 ++ depends on (MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MK7 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
302 +
303 + config X86_MINIMUM_CPU_FAMILY
304 + int
305 +diff -uprN a/arch/x86/Makefile b/arch/x86/Makefile
306 +--- a/arch/x86/Makefile 2013-11-03 18:41:51.000000000 -0500
307 ++++ b/arch/x86/Makefile 2013-12-15 06:21:24.354455723 -0500
308 +@@ -61,11 +61,26 @@ else
309 + KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3)
310 +
311 + # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
312 ++ cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
313 + cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
314 ++ cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10)
315 ++ cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona)
316 ++ cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1)
317 ++ cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1)
318 ++ cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2)
319 ++ cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)
320 + cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
321 +
322 + cflags-$(CONFIG_MCORE2) += \
323 +- $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
324 ++ $(call cc-option,-march=core2,$(call cc-option,-mtune=core2))
325 ++ cflags-$(CONFIG_MCOREI7) += \
326 ++ $(call cc-option,-march=corei7,$(call cc-option,-mtune=corei7))
327 ++ cflags-$(CONFIG_MCOREI7AVX) += \
328 ++ $(call cc-option,-march=corei7-avx,$(call cc-option,-mtune=corei7-avx))
329 ++ cflags-$(CONFIG_MCOREAVXI) += \
330 ++ $(call cc-option,-march=core-avx-i,$(call cc-option,-mtune=core-avx-i))
331 ++ cflags-$(CONFIG_MCOREAVX2) += \
332 ++ $(call cc-option,-march=core-avx2,$(call cc-option,-mtune=core-avx2))
333 + cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
334 + $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
335 + cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
336 +diff -uprN a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
337 +--- a/arch/x86/Makefile_32.cpu 2013-11-03 18:41:51.000000000 -0500
338 ++++ b/arch/x86/Makefile_32.cpu 2013-12-15 06:21:24.354455723 -0500
339 +@@ -23,7 +23,14 @@ cflags-$(CONFIG_MK6) += -march=k6
340 + # Please note, that patches that add -march=athlon-xp and friends are pointless.
341 + # They make zero difference whatsosever to performance at this time.
342 + cflags-$(CONFIG_MK7) += -march=athlon
343 ++cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
344 + cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon)
345 ++cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10,-march=athlon)
346 ++cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona,-march=athlon)
347 ++cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1,-march=athlon)
348 ++cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1,-march=athlon)
349 ++cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2,-march=athlon)
350 ++cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon)
351 + cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
352 + cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
353 + cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
354 +@@ -32,6 +39,10 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-
355 + cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
356 + cflags-$(CONFIG_MVIAC7) += -march=i686
357 + cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
358 ++cflags-$(CONFIG_MCOREI7) += -march=i686 $(call tune,corei7)
359 ++cflags-$(CONFIG_MCOREI7AVX) += -march=i686 $(call tune,corei7-avx)
360 ++cflags-$(CONFIG_MCOREAVXI) += -march=i686 $(call tune,core-avx-i)
361 ++cflags-$(CONFIG_MCOREAVX2) += -march=i686 $(call tune,core-avx2)
362 + cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
363 + $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))