1 |
commit: bd3abb7ea65a7c0c7a1c12f1dc536c62f65f6840 |
2 |
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
3 |
AuthorDate: Fri Aug 19 13:16:00 2022 +0000 |
4 |
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
5 |
CommitDate: Fri Aug 19 13:16:00 2022 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=bd3abb7e |
7 |
|
8 |
Fixes for BMQ, thanks to TK-Glitch |
9 |
|
10 |
Source: https://github.com/Frogging-Family/linux-tkg |
11 |
|
12 |
Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org> |
13 |
|
14 |
0000_README | 4 +- |
15 |
...Q-and-PDS-io-scheduler-v5.19-r0-linux-tkg.patch | 318 +++++++++++++++++++++ |
16 |
2 files changed, 320 insertions(+), 2 deletions(-) |
17 |
|
18 |
diff --git a/0000_README b/0000_README |
19 |
index 8f7da639..d4f51c59 100644 |
20 |
--- a/0000_README |
21 |
+++ b/0000_README |
22 |
@@ -87,8 +87,8 @@ Patch: 5010_enable-cpu-optimizations-universal.patch |
23 |
From: https://github.com/graysky2/kernel_compiler_patch |
24 |
Desc: Kernel >= 5.15 patch enables gcc = v11.1+ optimizations for additional CPUs. |
25 |
|
26 |
-Patch: 5020_BMQ-and-PDS-io-scheduler-v5.19-r0.patch |
27 |
-From: https://gitlab.com/alfredchen/linux-prjc |
28 |
+Patch: 5020_BMQ-and-PDS-io-scheduler-v5.19-r0-linux-tkg.patch |
29 |
+From: https://github.com/Frogging-Family/linux-tkg |
30 |
Desc: BMQ(BitMap Queue) Scheduler. A new CPU scheduler developed from PDS(incld). Inspired by the scheduler in zircon. |
31 |
|
32 |
Patch: 5021_BMQ-and-PDS-gentoo-defaults.patch |
33 |
|
34 |
diff --git a/5020_BMQ-and-PDS-io-scheduler-v5.19-r0.patch b/5020_BMQ-and-PDS-io-scheduler-v5.19-r0-linux-tkg.patch |
35 |
similarity index 96% |
36 |
rename from 5020_BMQ-and-PDS-io-scheduler-v5.19-r0.patch |
37 |
rename to 5020_BMQ-and-PDS-io-scheduler-v5.19-r0-linux-tkg.patch |
38 |
index 610cfe83..25c71a6c 100644 |
39 |
--- a/5020_BMQ-and-PDS-io-scheduler-v5.19-r0.patch |
40 |
+++ b/5020_BMQ-and-PDS-io-scheduler-v5.19-r0-linux-tkg.patch |
41 |
@@ -9954,3 +9954,321 @@ index a2d301f58ced..2ccdede8585c 100644 |
42 |
}; |
43 |
struct wakeup_test_data *x = data; |
44 |
|
45 |
+From 3728c383c5031dce5ae0f5ea53fc47afba71270f Mon Sep 17 00:00:00 2001 |
46 |
+From: Juuso Alasuutari <juuso.alasuutari@×××××.com> |
47 |
+Date: Sun, 14 Aug 2022 18:19:09 +0300 |
48 |
+Subject: [PATCH 01/10] sched/alt: [Sync] sched/core: Always flush pending |
49 |
+ blk_plug |
50 |
+ |
51 |
+--- |
52 |
+ kernel/sched/alt_core.c | 8 ++++++-- |
53 |
+ 1 file changed, 6 insertions(+), 2 deletions(-) |
54 |
+ |
55 |
+diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c |
56 |
+index 588c7b983e3ba..8a6aa5b7279d3 100644 |
57 |
+--- a/kernel/sched/alt_core.c |
58 |
++++ b/kernel/sched/alt_core.c |
59 |
+@@ -4663,8 +4663,12 @@ static inline void sched_submit_work(struct task_struct *tsk) |
60 |
+ io_wq_worker_sleeping(tsk); |
61 |
+ } |
62 |
+ |
63 |
+- if (tsk_is_pi_blocked(tsk)) |
64 |
+- return; |
65 |
++ /* |
66 |
++ * spinlock and rwlock must not flush block requests. This will |
67 |
++ * deadlock if the callback attempts to acquire a lock which is |
68 |
++ * already acquired. |
69 |
++ */ |
70 |
++ SCHED_WARN_ON(current->__state & TASK_RTLOCK_WAIT); |
71 |
+ |
72 |
+ /* |
73 |
+ * If we are going to sleep and we have plugged IO queued, |
74 |
+ |
75 |
+From 379df22366dfa47d021a6bfe149c10a02d39a59e Mon Sep 17 00:00:00 2001 |
76 |
+From: Juuso Alasuutari <juuso.alasuutari@×××××.com> |
77 |
+Date: Sun, 14 Aug 2022 18:19:09 +0300 |
78 |
+Subject: [PATCH 02/10] sched/alt: [Sync] io_uring: move to separate directory |
79 |
+ |
80 |
+--- |
81 |
+ kernel/sched/alt_core.c | 2 +- |
82 |
+ 1 file changed, 1 insertion(+), 1 deletion(-) |
83 |
+ |
84 |
+diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c |
85 |
+index 8a6aa5b7279d3..200d12b0ba6a9 100644 |
86 |
+--- a/kernel/sched/alt_core.c |
87 |
++++ b/kernel/sched/alt_core.c |
88 |
+@@ -43,7 +43,7 @@ |
89 |
+ |
90 |
+ #include "pelt.h" |
91 |
+ |
92 |
+-#include "../../fs/io-wq.h" |
93 |
++#include "../../io_uring/io-wq.h" |
94 |
+ #include "../smpboot.h" |
95 |
+ |
96 |
+ /* |
97 |
+ |
98 |
+From 289d4f9619656155c2d467f9ea9fa5258b4aacd0 Mon Sep 17 00:00:00 2001 |
99 |
+From: Juuso Alasuutari <juuso.alasuutari@×××××.com> |
100 |
+Date: Sun, 14 Aug 2022 18:19:09 +0300 |
101 |
+Subject: [PATCH 03/10] sched/alt: [Sync] sched, cpuset: Fix dl_cpu_busy() |
102 |
+ panic due to empty cs->cpus_allowed |
103 |
+ |
104 |
+--- |
105 |
+ kernel/sched/alt_core.c | 2 +- |
106 |
+ 1 file changed, 1 insertion(+), 1 deletion(-) |
107 |
+ |
108 |
+diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c |
109 |
+index 200d12b0ba6a9..1aeb7a225d9bd 100644 |
110 |
+--- a/kernel/sched/alt_core.c |
111 |
++++ b/kernel/sched/alt_core.c |
112 |
+@@ -6737,7 +6737,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur, |
113 |
+ } |
114 |
+ |
115 |
+ int task_can_attach(struct task_struct *p, |
116 |
+- const struct cpumask *cs_cpus_allowed) |
117 |
++ const struct cpumask *cs_effective_cpus) |
118 |
+ { |
119 |
+ int ret = 0; |
120 |
+ |
121 |
+ |
122 |
+From 95e712f92034119e23b4157aba72e8ffb2d74fed Mon Sep 17 00:00:00 2001 |
123 |
+From: Tor Vic <torvic9@×××××××.org> |
124 |
+Date: Wed, 17 Aug 2022 21:44:18 +0200 |
125 |
+Subject: [PATCH 05/10] sched/alt: Transpose the sched_rq_watermark array |
126 |
+ |
127 |
+This is not my work. |
128 |
+All credits go to Torge Matthies as in below link. |
129 |
+ |
130 |
+Link: https://gitlab.com/alfredchen/linux-prjc/-/merge_requests/11 |
131 |
+--- |
132 |
+ kernel/sched/alt_core.c | 124 +++++++++++++++++++++++++++++++++------- |
133 |
+ 1 file changed, 104 insertions(+), 20 deletions(-) |
134 |
+ |
135 |
+diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c |
136 |
+index cf71defb0e0be..7929b810ba74f 100644 |
137 |
+--- a/kernel/sched/alt_core.c |
138 |
++++ b/kernel/sched/alt_core.c |
139 |
+@@ -147,7 +147,87 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
140 |
+ #ifdef CONFIG_SCHED_SMT |
141 |
+ static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; |
142 |
+ #endif |
143 |
+-static cpumask_t sched_rq_watermark[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp; |
144 |
++ |
145 |
++#define BITS_PER_ATOMIC_LONG_T BITS_PER_LONG |
146 |
++typedef struct sched_bitmask { |
147 |
++ atomic_long_t bits[DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T)]; |
148 |
++} sched_bitmask_t; |
149 |
++static sched_bitmask_t sched_rq_watermark[NR_CPUS] ____cacheline_aligned_in_smp; |
150 |
++ |
151 |
++#define x(p, set, mask) \ |
152 |
++ do { \ |
153 |
++ if (set) \ |
154 |
++ atomic_long_or((mask), (p)); \ |
155 |
++ else \ |
156 |
++ atomic_long_and(~(mask), (p)); \ |
157 |
++ } while (0) |
158 |
++ |
159 |
++static __always_inline void sched_rq_watermark_fill_downwards(int cpu, unsigned int end, |
160 |
++ unsigned int start, bool set) |
161 |
++{ |
162 |
++ unsigned int start_idx, start_bit; |
163 |
++ unsigned int end_idx, end_bit; |
164 |
++ atomic_long_t *p; |
165 |
++ |
166 |
++ if (end == start) { |
167 |
++ return; |
168 |
++ } |
169 |
++ |
170 |
++ start_idx = start / BITS_PER_ATOMIC_LONG_T; |
171 |
++ start_bit = start % BITS_PER_ATOMIC_LONG_T; |
172 |
++ end_idx = (end - 1) / BITS_PER_ATOMIC_LONG_T; |
173 |
++ end_bit = (end - 1) % BITS_PER_ATOMIC_LONG_T; |
174 |
++ p = &sched_rq_watermark[cpu].bits[end_idx]; |
175 |
++ |
176 |
++ if (end_idx == start_idx) { |
177 |
++ x(p, set, (~0UL >> (BITS_PER_ATOMIC_LONG_T - 1 - end_bit)) & (~0UL << start_bit)); |
178 |
++ return; |
179 |
++ } |
180 |
++ |
181 |
++ if (end_bit != BITS_PER_ATOMIC_LONG_T - 1) { |
182 |
++ x(p, set, (~0UL >> (BITS_PER_ATOMIC_LONG_T - 1 - end_bit))); |
183 |
++ p -= 1; |
184 |
++ end_idx -= 1; |
185 |
++ } |
186 |
++ |
187 |
++ while (end_idx != start_idx) { |
188 |
++ atomic_long_set(p, set ? ~0UL : 0); |
189 |
++ p -= 1; |
190 |
++ end_idx -= 1; |
191 |
++ } |
192 |
++ |
193 |
++ x(p, set, ~0UL << start_bit); |
194 |
++} |
195 |
++ |
196 |
++#undef x |
197 |
++ |
198 |
++static __always_inline bool sched_rq_watermark_and(cpumask_t *dstp, const cpumask_t *cpus, int prio, bool not) |
199 |
++{ |
200 |
++ int cpu; |
201 |
++ bool ret = false; |
202 |
++ int idx = prio / BITS_PER_ATOMIC_LONG_T; |
203 |
++ int bit = prio % BITS_PER_ATOMIC_LONG_T; |
204 |
++ |
205 |
++ cpumask_clear(dstp); |
206 |
++ for_each_cpu(cpu, cpus) |
207 |
++ if (test_bit(bit, (long*)&sched_rq_watermark[cpu].bits[idx].counter) == !not) { |
208 |
++ __cpumask_set_cpu(cpu, dstp); |
209 |
++ ret = true; |
210 |
++ } |
211 |
++ return ret; |
212 |
++} |
213 |
++ |
214 |
++static __always_inline bool sched_rq_watermark_test(const cpumask_t *cpus, int prio, bool not) |
215 |
++{ |
216 |
++ int cpu; |
217 |
++ int idx = prio / BITS_PER_ATOMIC_LONG_T; |
218 |
++ int bit = prio % BITS_PER_ATOMIC_LONG_T; |
219 |
++ |
220 |
++ for_each_cpu(cpu, cpus) |
221 |
++ if (test_bit(bit, (long*)&sched_rq_watermark[cpu].bits[idx].counter) == !not) |
222 |
++ return true; |
223 |
++ return false; |
224 |
++} |
225 |
+ |
226 |
+ /* sched_queue related functions */ |
227 |
+ static inline void sched_queue_init(struct sched_queue *q) |
228 |
+@@ -176,7 +256,6 @@ static inline void update_sched_rq_watermark(struct rq *rq) |
229 |
+ { |
230 |
+ unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); |
231 |
+ unsigned long last_wm = rq->watermark; |
232 |
+- unsigned long i; |
233 |
+ int cpu; |
234 |
+ |
235 |
+ if (watermark == last_wm) |
236 |
+@@ -185,28 +264,25 @@ static inline void update_sched_rq_watermark(struct rq *rq) |
237 |
+ rq->watermark = watermark; |
238 |
+ cpu = cpu_of(rq); |
239 |
+ if (watermark < last_wm) { |
240 |
+- for (i = last_wm; i > watermark; i--) |
241 |
+- cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); |
242 |
++ sched_rq_watermark_fill_downwards(cpu, SCHED_QUEUE_BITS - watermark, SCHED_QUEUE_BITS - last_wm, false); |
243 |
+ #ifdef CONFIG_SCHED_SMT |
244 |
+ if (static_branch_likely(&sched_smt_present) && |
245 |
+- IDLE_TASK_SCHED_PRIO == last_wm) |
246 |
++ unlikely(IDLE_TASK_SCHED_PRIO == last_wm)) |
247 |
+ cpumask_andnot(&sched_sg_idle_mask, |
248 |
+ &sched_sg_idle_mask, cpu_smt_mask(cpu)); |
249 |
+ #endif |
250 |
+ return; |
251 |
+ } |
252 |
+ /* last_wm < watermark */ |
253 |
+- for (i = watermark; i > last_wm; i--) |
254 |
+- cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); |
255 |
++ sched_rq_watermark_fill_downwards(cpu, SCHED_QUEUE_BITS - last_wm, SCHED_QUEUE_BITS - watermark, true); |
256 |
+ #ifdef CONFIG_SCHED_SMT |
257 |
+ if (static_branch_likely(&sched_smt_present) && |
258 |
+- IDLE_TASK_SCHED_PRIO == watermark) { |
259 |
+- cpumask_t tmp; |
260 |
++ unlikely(IDLE_TASK_SCHED_PRIO == watermark)) { |
261 |
++ const cpumask_t *smt_mask = cpu_smt_mask(cpu); |
262 |
+ |
263 |
+- cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark); |
264 |
+- if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) |
265 |
++ if (!sched_rq_watermark_test(smt_mask, 0, true)) |
266 |
+ cpumask_or(&sched_sg_idle_mask, |
267 |
+- &sched_sg_idle_mask, cpu_smt_mask(cpu)); |
268 |
++ &sched_sg_idle_mask, smt_mask); |
269 |
+ } |
270 |
+ #endif |
271 |
+ } |
272 |
+@@ -1903,9 +1979,9 @@ static inline int select_task_rq(struct task_struct *p) |
273 |
+ #ifdef CONFIG_SCHED_SMT |
274 |
+ cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) || |
275 |
+ #endif |
276 |
+- cpumask_and(&tmp, &chk_mask, sched_rq_watermark) || |
277 |
+- cpumask_and(&tmp, &chk_mask, |
278 |
+- sched_rq_watermark + SCHED_QUEUE_BITS - 1 - task_sched_prio(p))) |
279 |
++ sched_rq_watermark_and(&tmp, &chk_mask, 0, false) || |
280 |
++ sched_rq_watermark_and(&tmp, &chk_mask, |
281 |
++ SCHED_QUEUE_BITS - 1 - task_sched_prio(p), false)) |
282 |
+ return best_mask_cpu(task_cpu(p), &tmp); |
283 |
+ |
284 |
+ return best_mask_cpu(task_cpu(p), &chk_mask); |
285 |
+@@ -3977,7 +4053,7 @@ static inline void sg_balance(struct rq *rq) |
286 |
+ * find potential cpus which can migrate the current running task |
287 |
+ */ |
288 |
+ if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && |
289 |
+- cpumask_andnot(&chk, cpu_online_mask, sched_rq_watermark) && |
290 |
++ sched_rq_watermark_and(&chk, cpu_online_mask, 0, true) && |
291 |
+ cpumask_andnot(&chk, &chk, &sched_rq_pending_mask)) { |
292 |
+ int i; |
293 |
+ |
294 |
+@@ -4285,9 +4361,8 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) |
295 |
+ #ifdef ALT_SCHED_DEBUG |
296 |
+ void alt_sched_debug(void) |
297 |
+ { |
298 |
+- printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n", |
299 |
++ printk(KERN_INFO "sched: pending: 0x%04lx, sg_idle: 0x%04lx\n", |
300 |
+ sched_rq_pending_mask.bits[0], |
301 |
+- sched_rq_watermark[0].bits[0], |
302 |
+ sched_sg_idle_mask.bits[0]); |
303 |
+ } |
304 |
+ #else |
305 |
+@@ -7285,8 +7360,17 @@ void __init sched_init(void) |
306 |
+ wait_bit_init(); |
307 |
+ |
308 |
+ #ifdef CONFIG_SMP |
309 |
+- for (i = 0; i < SCHED_QUEUE_BITS; i++) |
310 |
+- cpumask_copy(sched_rq_watermark + i, cpu_present_mask); |
311 |
++ for (i = 0; i < nr_cpu_ids; i++) { |
312 |
++ long val = cpumask_test_cpu(i, cpu_present_mask) ? -1L : 0; |
313 |
++ int j; |
314 |
++ for (j = 0; j < DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T); j++) |
315 |
++ atomic_long_set(&sched_rq_watermark[i].bits[j], val); |
316 |
++ } |
317 |
++ for (i = nr_cpu_ids; i < NR_CPUS; i++) { |
318 |
++ int j; |
319 |
++ for (j = 0; j < DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T); j++) |
320 |
++ atomic_long_set(&sched_rq_watermark[i].bits[j], 0); |
321 |
++ } |
322 |
+ #endif |
323 |
+ |
324 |
+ #ifdef CONFIG_CGROUP_SCHED |
325 |
+ |
326 |
+From 5b3b4b3d14c234196c807568905ee2e013565508 Mon Sep 17 00:00:00 2001 |
327 |
+From: Torge Matthies <openglfreak@××××××××××.com> |
328 |
+Date: Tue, 15 Mar 2022 23:08:54 +0100 |
329 |
+Subject: [PATCH 06/10] sched/alt: Add memory barriers around atomics. |
330 |
+ |
331 |
+--- |
332 |
+ kernel/sched/alt_core.c | 4 ++++ |
333 |
+ 1 file changed, 4 insertions(+) |
334 |
+ |
335 |
+diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c |
336 |
+index 7929b810ba74f..b0cb6b772d5fa 100644 |
337 |
+--- a/kernel/sched/alt_core.c |
338 |
++++ b/kernel/sched/alt_core.c |
339 |
+@@ -156,10 +156,12 @@ static sched_bitmask_t sched_rq_watermark[NR_CPUS] ____cacheline_aligned_in_smp; |
340 |
+ |
341 |
+ #define x(p, set, mask) \ |
342 |
+ do { \ |
343 |
++ smp_mb__before_atomic(); \ |
344 |
+ if (set) \ |
345 |
+ atomic_long_or((mask), (p)); \ |
346 |
+ else \ |
347 |
+ atomic_long_and(~(mask), (p)); \ |
348 |
++ smp_mb__after_atomic(); \ |
349 |
+ } while (0) |
350 |
+ |
351 |
+ static __always_inline void sched_rq_watermark_fill_downwards(int cpu, unsigned int end, |
352 |
+@@ -191,7 +193,9 @@ static __always_inline void sched_rq_watermark_fill_downwards(int cpu, unsigned |
353 |
+ } |
354 |
+ |
355 |
+ while (end_idx != start_idx) { |
356 |
++ smp_mb__before_atomic(); |
357 |
+ atomic_long_set(p, set ? ~0UL : 0); |
358 |
++ smp_mb__after_atomic(); |
359 |
+ p -= 1; |
360 |
+ end_idx -= 1; |
361 |
+ } |
362 |
+ |