1 |
Author: mpagano |
2 |
Date: 2013-09-02 23:10:55 +0000 (Mon, 02 Sep 2013) |
3 |
New Revision: 2508 |
4 |
|
5 |
Removed: |
6 |
genpatches-2.6/trunk/3.11/1801_block-cgroups-kconfig-build-bits-for-BFQ-v6r2-3.10.patch |
7 |
genpatches-2.6/trunk/3.11/1802_block-introduce-the-BFQ-v6r2-I-O-sched-for-3.10.patch1 |
8 |
genpatches-2.6/trunk/3.11/1803_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v6r2-for-3.10.0.patch1 |
9 |
Modified: |
10 |
genpatches-2.6/trunk/3.11/0000_README |
11 |
Log: |
12 |
Remove BFQ patches, waiting on updated patchset |
13 |
|
14 |
Modified: genpatches-2.6/trunk/3.11/0000_README |
15 |
=================================================================== |
16 |
--- genpatches-2.6/trunk/3.11/0000_README 2013-09-02 23:07:59 UTC (rev 2507) |
17 |
+++ genpatches-2.6/trunk/3.11/0000_README 2013-09-02 23:10:55 UTC (rev 2508) |
18 |
@@ -47,22 +47,6 @@ |
19 |
From: https://bugs.gentoo.org/show_bug.cgi?id=449248 |
20 |
Desc: Enable mic mute led in thinkpads |
21 |
|
22 |
-Patch: 1800_memcg-OOM-revert-ZFS-deadlock.patch |
23 |
-From: https://bugs.gentoo.org/show_bug.cgi?id=462066 |
24 |
-Desc: Revert memcg patches that prevent OOM with too many dirty pages. |
25 |
- |
26 |
-Patch: 1801_block-cgroups-kconfig-build-bits-for-BFQ-v6r2-3.10.patch |
27 |
-From: http://algo.ing.unimo.it/people/paolo/disk_sched/ |
28 |
-Desc: BFQ v6r2 patch 1 for 3.10: Build, cgroups and kconfig bits |
29 |
- |
30 |
-Patch: 1802_block-introduce-the-BFQ-v6r2-I-O-sched-for-3.10.patch1 |
31 |
-From: http://algo.ing.unimo.it/people/paolo/disk_sched/ |
32 |
-Desc: BFQ v6r2 patch 2 for 3.10: BFQ Scheduler |
33 |
- |
34 |
-Patch: 1803_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v6r2-for-3.10.0.patch1 |
35 |
-From: http://algo.ing.unimo.it/people/paolo/disk_sched/ |
36 |
-Desc: BFQ v6r2 patch 3 for 3.10: Early Queue Merge (EQM) |
37 |
- |
38 |
Patch: 2400_kcopy-patch-for-infiniband-driver.patch |
39 |
From: Alexey Shvetsov <alexxy@g.o> |
40 |
Desc: Zero copy for infiniband psm userspace driver |
41 |
|
42 |
Deleted: genpatches-2.6/trunk/3.11/1801_block-cgroups-kconfig-build-bits-for-BFQ-v6r2-3.10.patch |
43 |
=================================================================== |
44 |
--- genpatches-2.6/trunk/3.11/1801_block-cgroups-kconfig-build-bits-for-BFQ-v6r2-3.10.patch 2013-09-02 23:07:59 UTC (rev 2507) |
45 |
+++ genpatches-2.6/trunk/3.11/1801_block-cgroups-kconfig-build-bits-for-BFQ-v6r2-3.10.patch 2013-09-02 23:10:55 UTC (rev 2508) |
46 |
@@ -1,97 +0,0 @@ |
47 |
-From 13fa5ddac2963e304e90c5beb4bc996e3557479d Mon Sep 17 00:00:00 2001 |
48 |
-From: Matteo Bernardini <matteo.bernardini@×××××.com> |
49 |
-Date: Thu, 9 May 2013 18:58:50 +0200 |
50 |
-Subject: [PATCH 1/3] block: cgroups, kconfig, build bits for BFQ-v6r2-3.10 |
51 |
- |
52 |
-Update Kconfig.iosched and do the related Makefile changes to include |
53 |
-kernel configuration options for BFQ. Also add the bfqio controller |
54 |
-to the cgroups subsystem. |
55 |
- |
56 |
-Signed-off-by: Paolo Valente <paolo.valente@×××××××.it> |
57 |
-Signed-off-by: Arianna Avanzini <avanzini.arianna@×××××.com> |
58 |
-Signed-off-by: Matteo Bernardini <matteo.bernardini@×××××.com> |
59 |
---- |
60 |
- block/Kconfig.iosched | 25 +++++++++++++++++++++++++ |
61 |
- block/Makefile | 1 + |
62 |
- include/linux/cgroup_subsys.h | 6 ++++++ |
63 |
- 3 files changed, 32 insertions(+) |
64 |
- |
65 |
-diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched |
66 |
-index 421bef9..695e064 100644 |
67 |
---- a/block/Kconfig.iosched |
68 |
-+++ b/block/Kconfig.iosched |
69 |
-@@ -39,6 +39,27 @@ config CFQ_GROUP_IOSCHED |
70 |
- ---help--- |
71 |
- Enable group IO scheduling in CFQ. |
72 |
- |
73 |
-+config IOSCHED_BFQ |
74 |
-+ tristate "BFQ I/O scheduler" |
75 |
-+ default n |
76 |
-+ ---help--- |
77 |
-+ The BFQ I/O scheduler tries to distribute bandwidth among |
78 |
-+ all processes according to their weights. |
79 |
-+ It aims at distributing the bandwidth as desired, independently of |
80 |
-+ the disk parameters and with any workload. It also tries to |
81 |
-+ guarantee low latency to interactive and soft real-time |
82 |
-+ applications. If compiled built-in (saying Y here), BFQ can |
83 |
-+ be configured to support hierarchical scheduling. |
84 |
-+ |
85 |
-+config CGROUP_BFQIO |
86 |
-+ bool "BFQ hierarchical scheduling support" |
87 |
-+ depends on CGROUPS && IOSCHED_BFQ=y |
88 |
-+ default n |
89 |
-+ ---help--- |
90 |
-+ Enable hierarchical scheduling in BFQ, using the cgroups |
91 |
-+ filesystem interface. The name of the subsystem will be |
92 |
-+ bfqio. |
93 |
-+ |
94 |
- choice |
95 |
- prompt "Default I/O scheduler" |
96 |
- default DEFAULT_CFQ |
97 |
-@@ -52,6 +73,9 @@ choice |
98 |
- config DEFAULT_CFQ |
99 |
- bool "CFQ" if IOSCHED_CFQ=y |
100 |
- |
101 |
-+ config DEFAULT_BFQ |
102 |
-+ bool "BFQ" if IOSCHED_BFQ=y |
103 |
-+ |
104 |
- config DEFAULT_NOOP |
105 |
- bool "No-op" |
106 |
- |
107 |
-@@ -61,6 +85,7 @@ config DEFAULT_IOSCHED |
108 |
- string |
109 |
- default "deadline" if DEFAULT_DEADLINE |
110 |
- default "cfq" if DEFAULT_CFQ |
111 |
-+ default "bfq" if DEFAULT_BFQ |
112 |
- default "noop" if DEFAULT_NOOP |
113 |
- |
114 |
- endmenu |
115 |
-diff --git a/block/Makefile b/block/Makefile |
116 |
-index 39b76ba..c0d20fa 100644 |
117 |
---- a/block/Makefile |
118 |
-+++ b/block/Makefile |
119 |
-@@ -15,6 +15,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o |
120 |
- obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o |
121 |
- obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o |
122 |
- obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o |
123 |
-+obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o |
124 |
- |
125 |
- obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o |
126 |
- obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o |
127 |
-diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h |
128 |
-index 6e7ec64..ffa1d1f 100644 |
129 |
---- a/include/linux/cgroup_subsys.h |
130 |
-+++ b/include/linux/cgroup_subsys.h |
131 |
-@@ -84,3 +84,9 @@ SUBSYS(bcache) |
132 |
- #endif |
133 |
- |
134 |
- /* */ |
135 |
-+ |
136 |
-+#ifdef CONFIG_CGROUP_BFQIO |
137 |
-+SUBSYS(bfqio) |
138 |
-+#endif |
139 |
-+ |
140 |
-+/* */ |
141 |
--- |
142 |
-1.8.1.4 |
143 |
- |
144 |
|
145 |
Deleted: genpatches-2.6/trunk/3.11/1802_block-introduce-the-BFQ-v6r2-I-O-sched-for-3.10.patch1 |
146 |
=================================================================== |
147 |
--- genpatches-2.6/trunk/3.11/1802_block-introduce-the-BFQ-v6r2-I-O-sched-for-3.10.patch1 2013-09-02 23:07:59 UTC (rev 2507) |
148 |
+++ genpatches-2.6/trunk/3.11/1802_block-introduce-the-BFQ-v6r2-I-O-sched-for-3.10.patch1 2013-09-02 23:10:55 UTC (rev 2508) |
149 |
@@ -1,5775 +0,0 @@ |
150 |
-From 2e949c3d4d8ba2af46dcedc80707ebba277d759f Mon Sep 17 00:00:00 2001 |
151 |
-From: Arianna Avanzini <avanzini.arianna@×××××.com> |
152 |
-Date: Thu, 9 May 2013 19:10:02 +0200 |
153 |
-Subject: [PATCH 2/3] block: introduce the BFQ-v6r2 I/O sched for 3.10 |
154 |
- |
155 |
-Add the BFQ-v6r2 I/O scheduler to 3.10. |
156 |
-The general structure is borrowed from CFQ, as much code. A (bfq_)queue |
157 |
-is associated to each task doing I/O on a device, and each time a |
158 |
-scheduling decision has to be made a queue is selected and served until |
159 |
-it expires. |
160 |
- |
161 |
- - Slices are given in the service domain: tasks are assigned |
162 |
- budgets, measured in number of sectors. Once got the disk, a task |
163 |
- must however consume its assigned budget within a configurable |
164 |
- maximum time (by default, the maximum possible value of the |
165 |
- budgets is automatically computed to comply with this timeout). |
166 |
- This allows the desired latency vs "throughput boosting" tradeoff |
167 |
- to be set. |
168 |
- |
169 |
- - Budgets are scheduled according to a variant of WF2Q+, implemented |
170 |
- using an augmented rb-tree to take eligibility into account while |
171 |
- preserving an O(log N) overall complexity. |
172 |
- |
173 |
- - A low-latency tunable is provided; if enabled, both interactive |
174 |
- and soft real-time applications are guaranteed very low latency. |
175 |
- |
176 |
- - Latency guarantees are preserved also in presence of NCQ. |
177 |
- |
178 |
- - Also with flash-based devices, a high throughput is achieved while |
179 |
- still preserving latency guarantees. |
180 |
- |
181 |
- - Useful features borrowed from CFQ: cooperating-queues merging (with |
182 |
- some additional optimizations with respect to the original CFQ version), |
183 |
- static fallback queue for OOM. |
184 |
- |
185 |
- - BFQ supports full hierarchical scheduling, exporting a cgroups |
186 |
- interface. Each node has a full scheduler, so each group can |
187 |
- be assigned its own ioprio (mapped to a weight, see next point) |
188 |
- and an ioprio_class. |
189 |
- |
190 |
- - If the cgroups interface is used, weights can be explictly |
191 |
- assigned, otherwise ioprio values are mapped to weights using the |
192 |
- relation weight = IOPRIO_BE_NR - ioprio. |
193 |
- |
194 |
- - ioprio classes are served in strict priority order, i.e., lower |
195 |
- priority queues are not served as long as there are higher |
196 |
- priority queues. Among queues in the same class the bandwidth is |
197 |
- distributed in proportion to the weight of each queue. A very |
198 |
- thin extra bandwidth is however guaranteed to the Idle class, to |
199 |
- prevent it from starving. |
200 |
- |
201 |
-Signed-off-by: Paolo Valente <paolo.valente@×××××××.it> |
202 |
-Signed-off-by: Arianna Avanzini <avanzini.arianna@×××××.com> |
203 |
---- |
204 |
- block/bfq-cgroup.c | 881 ++++++++++++ |
205 |
- block/bfq-ioc.c | 36 + |
206 |
- block/bfq-iosched.c | 3070 +++++++++++++++++++++++++++++++++++++++++ |
207 |
- block/bfq-sched.c | 1072 ++++++++++++++ |
208 |
- block/bfq.h | 603 ++++++++ |
209 |
- include/linux/cgroup_subsys.h | 2 +- |
210 |
- 6 files changed, 5663 insertions(+), 1 deletion(-) |
211 |
- create mode 100644 block/bfq-cgroup.c |
212 |
- create mode 100644 block/bfq-ioc.c |
213 |
- create mode 100644 block/bfq-iosched.c |
214 |
- create mode 100644 block/bfq-sched.c |
215 |
- create mode 100644 block/bfq.h |
216 |
- |
217 |
-diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c |
218 |
-new file mode 100644 |
219 |
-index 0000000..6d57239 |
220 |
---- /dev/null |
221 |
-+++ b/block/bfq-cgroup.c |
222 |
-@@ -0,0 +1,881 @@ |
223 |
-+/* |
224 |
-+ * BFQ: CGROUPS support. |
225 |
-+ * |
226 |
-+ * Based on ideas and code from CFQ: |
227 |
-+ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk> |
228 |
-+ * |
229 |
-+ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it> |
230 |
-+ * Paolo Valente <paolo.valente@×××××××.it> |
231 |
-+ * |
232 |
-+ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it> |
233 |
-+ * |
234 |
-+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file. |
235 |
-+ */ |
236 |
-+ |
237 |
-+#ifdef CONFIG_CGROUP_BFQIO |
238 |
-+ |
239 |
-+static DEFINE_MUTEX(bfqio_mutex); |
240 |
-+ |
241 |
-+static bool bfqio_is_removed(struct cgroup *cgroup) |
242 |
-+{ |
243 |
-+ return test_bit(CGRP_REMOVED, &cgroup->flags); |
244 |
-+} |
245 |
-+ |
246 |
-+static struct bfqio_cgroup bfqio_root_cgroup = { |
247 |
-+ .weight = BFQ_DEFAULT_GRP_WEIGHT, |
248 |
-+ .ioprio = BFQ_DEFAULT_GRP_IOPRIO, |
249 |
-+ .ioprio_class = BFQ_DEFAULT_GRP_CLASS, |
250 |
-+}; |
251 |
-+ |
252 |
-+static inline void bfq_init_entity(struct bfq_entity *entity, |
253 |
-+ struct bfq_group *bfqg) |
254 |
-+{ |
255 |
-+ entity->weight = entity->new_weight; |
256 |
-+ entity->orig_weight = entity->new_weight; |
257 |
-+ entity->ioprio = entity->new_ioprio; |
258 |
-+ entity->ioprio_class = entity->new_ioprio_class; |
259 |
-+ entity->parent = bfqg->my_entity; |
260 |
-+ entity->sched_data = &bfqg->sched_data; |
261 |
-+} |
262 |
-+ |
263 |
-+static struct bfqio_cgroup *cgroup_to_bfqio(struct cgroup *cgroup) |
264 |
-+{ |
265 |
-+ return container_of(cgroup_subsys_state(cgroup, bfqio_subsys_id), |
266 |
-+ struct bfqio_cgroup, css); |
267 |
-+} |
268 |
-+ |
269 |
-+/* |
270 |
-+ * Search the bfq_group for bfqd into the hash table (by now only a list) |
271 |
-+ * of bgrp. Must be called under rcu_read_lock(). |
272 |
-+ */ |
273 |
-+static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp, |
274 |
-+ struct bfq_data *bfqd) |
275 |
-+{ |
276 |
-+ struct bfq_group *bfqg; |
277 |
-+ void *key; |
278 |
-+ |
279 |
-+ hlist_for_each_entry_rcu(bfqg, &bgrp->group_data, group_node) { |
280 |
-+ key = rcu_dereference(bfqg->bfqd); |
281 |
-+ if (key == bfqd) |
282 |
-+ return bfqg; |
283 |
-+ } |
284 |
-+ |
285 |
-+ return NULL; |
286 |
-+} |
287 |
-+ |
288 |
-+static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp, |
289 |
-+ struct bfq_group *bfqg) |
290 |
-+{ |
291 |
-+ struct bfq_entity *entity = &bfqg->entity; |
292 |
-+ |
293 |
-+ /* |
294 |
-+ * If the weight of the entity has never been set via the sysfs |
295 |
-+ * interface, then bgrp->weight == 0. In this case we initialize |
296 |
-+ * the weight from the current ioprio value. Otherwise, the group |
297 |
-+ * weight, if set, has priority over the ioprio value. |
298 |
-+ */ |
299 |
-+ if (bgrp->weight == 0) { |
300 |
-+ entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio); |
301 |
-+ entity->new_ioprio = bgrp->ioprio; |
302 |
-+ } else { |
303 |
-+ entity->new_weight = bgrp->weight; |
304 |
-+ entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight); |
305 |
-+ } |
306 |
-+ entity->orig_weight = entity->weight = entity->new_weight; |
307 |
-+ entity->ioprio = entity->new_ioprio; |
308 |
-+ entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class; |
309 |
-+ entity->my_sched_data = &bfqg->sched_data; |
310 |
-+} |
311 |
-+ |
312 |
-+static inline void bfq_group_set_parent(struct bfq_group *bfqg, |
313 |
-+ struct bfq_group *parent) |
314 |
-+{ |
315 |
-+ struct bfq_entity *entity; |
316 |
-+ |
317 |
-+ BUG_ON(parent == NULL); |
318 |
-+ BUG_ON(bfqg == NULL); |
319 |
-+ |
320 |
-+ entity = &bfqg->entity; |
321 |
-+ entity->parent = parent->my_entity; |
322 |
-+ entity->sched_data = &parent->sched_data; |
323 |
-+} |
324 |
-+ |
325 |
-+/** |
326 |
-+ * bfq_group_chain_alloc - allocate a chain of groups. |
327 |
-+ * @bfqd: queue descriptor. |
328 |
-+ * @cgroup: the leaf cgroup this chain starts from. |
329 |
-+ * |
330 |
-+ * Allocate a chain of groups starting from the one belonging to |
331 |
-+ * @cgroup up to the root cgroup. Stop if a cgroup on the chain |
332 |
-+ * to the root has already an allocated group on @bfqd. |
333 |
-+ */ |
334 |
-+static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd, |
335 |
-+ struct cgroup *cgroup) |
336 |
-+{ |
337 |
-+ struct bfqio_cgroup *bgrp; |
338 |
-+ struct bfq_group *bfqg, *prev = NULL, *leaf = NULL; |
339 |
-+ |
340 |
-+ for (; cgroup != NULL; cgroup = cgroup->parent) { |
341 |
-+ bgrp = cgroup_to_bfqio(cgroup); |
342 |
-+ |
343 |
-+ bfqg = bfqio_lookup_group(bgrp, bfqd); |
344 |
-+ if (bfqg != NULL) { |
345 |
-+ /* |
346 |
-+ * All the cgroups in the path from there to the |
347 |
-+ * root must have a bfq_group for bfqd, so we don't |
348 |
-+ * need any more allocations. |
349 |
-+ */ |
350 |
-+ break; |
351 |
-+ } |
352 |
-+ |
353 |
-+ bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC); |
354 |
-+ if (bfqg == NULL) |
355 |
-+ goto cleanup; |
356 |
-+ |
357 |
-+ bfq_group_init_entity(bgrp, bfqg); |
358 |
-+ bfqg->my_entity = &bfqg->entity; |
359 |
-+ |
360 |
-+ if (leaf == NULL) { |
361 |
-+ leaf = bfqg; |
362 |
-+ prev = leaf; |
363 |
-+ } else { |
364 |
-+ bfq_group_set_parent(prev, bfqg); |
365 |
-+ /* |
366 |
-+ * Build a list of allocated nodes using the bfqd |
367 |
-+ * filed, that is still unused and will be initialized |
368 |
-+ * only after the node will be connected. |
369 |
-+ */ |
370 |
-+ prev->bfqd = bfqg; |
371 |
-+ prev = bfqg; |
372 |
-+ } |
373 |
-+ } |
374 |
-+ |
375 |
-+ return leaf; |
376 |
-+ |
377 |
-+cleanup: |
378 |
-+ while (leaf != NULL) { |
379 |
-+ prev = leaf; |
380 |
-+ leaf = leaf->bfqd; |
381 |
-+ kfree(prev); |
382 |
-+ } |
383 |
-+ |
384 |
-+ return NULL; |
385 |
-+} |
386 |
-+ |
387 |
-+/** |
388 |
-+ * bfq_group_chain_link - link an allocatd group chain to a cgroup hierarchy. |
389 |
-+ * @bfqd: the queue descriptor. |
390 |
-+ * @cgroup: the leaf cgroup to start from. |
391 |
-+ * @leaf: the leaf group (to be associated to @cgroup). |
392 |
-+ * |
393 |
-+ * Try to link a chain of groups to a cgroup hierarchy, connecting the |
394 |
-+ * nodes bottom-up, so we can be sure that when we find a cgroup in the |
395 |
-+ * hierarchy that already as a group associated to @bfqd all the nodes |
396 |
-+ * in the path to the root cgroup have one too. |
397 |
-+ * |
398 |
-+ * On locking: the queue lock protects the hierarchy (there is a hierarchy |
399 |
-+ * per device) while the bfqio_cgroup lock protects the list of groups |
400 |
-+ * belonging to the same cgroup. |
401 |
-+ */ |
402 |
-+static void bfq_group_chain_link(struct bfq_data *bfqd, struct cgroup *cgroup, |
403 |
-+ struct bfq_group *leaf) |
404 |
-+{ |
405 |
-+ struct bfqio_cgroup *bgrp; |
406 |
-+ struct bfq_group *bfqg, *next, *prev = NULL; |
407 |
-+ unsigned long flags; |
408 |
-+ |
409 |
-+ assert_spin_locked(bfqd->queue->queue_lock); |
410 |
-+ |
411 |
-+ for (; cgroup != NULL && leaf != NULL; cgroup = cgroup->parent) { |
412 |
-+ bgrp = cgroup_to_bfqio(cgroup); |
413 |
-+ next = leaf->bfqd; |
414 |
-+ |
415 |
-+ bfqg = bfqio_lookup_group(bgrp, bfqd); |
416 |
-+ BUG_ON(bfqg != NULL); |
417 |
-+ |
418 |
-+ spin_lock_irqsave(&bgrp->lock, flags); |
419 |
-+ |
420 |
-+ rcu_assign_pointer(leaf->bfqd, bfqd); |
421 |
-+ hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data); |
422 |
-+ hlist_add_head(&leaf->bfqd_node, &bfqd->group_list); |
423 |
-+ |
424 |
-+ spin_unlock_irqrestore(&bgrp->lock, flags); |
425 |
-+ |
426 |
-+ prev = leaf; |
427 |
-+ leaf = next; |
428 |
-+ } |
429 |
-+ |
430 |
-+ BUG_ON(cgroup == NULL && leaf != NULL); |
431 |
-+ if (cgroup != NULL && prev != NULL) { |
432 |
-+ bgrp = cgroup_to_bfqio(cgroup); |
433 |
-+ bfqg = bfqio_lookup_group(bgrp, bfqd); |
434 |
-+ bfq_group_set_parent(prev, bfqg); |
435 |
-+ } |
436 |
-+} |
437 |
-+ |
438 |
-+/** |
439 |
-+ * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup. |
440 |
-+ * @bfqd: queue descriptor. |
441 |
-+ * @cgroup: cgroup being searched for. |
442 |
-+ * |
443 |
-+ * Return a group associated to @bfqd in @cgroup, allocating one if |
444 |
-+ * necessary. When a group is returned all the cgroups in the path |
445 |
-+ * to the root have a group associated to @bfqd. |
446 |
-+ * |
447 |
-+ * If the allocation fails, return the root group: this breaks guarantees |
448 |
-+ * but is a safe fallbak. If this loss becames a problem it can be |
449 |
-+ * mitigated using the equivalent weight (given by the product of the |
450 |
-+ * weights of the groups in the path from @group to the root) in the |
451 |
-+ * root scheduler. |
452 |
-+ * |
453 |
-+ * We allocate all the missing nodes in the path from the leaf cgroup |
454 |
-+ * to the root and we connect the nodes only after all the allocations |
455 |
-+ * have been successful. |
456 |
-+ */ |
457 |
-+static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, |
458 |
-+ struct cgroup *cgroup) |
459 |
-+{ |
460 |
-+ struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); |
461 |
-+ struct bfq_group *bfqg; |
462 |
-+ |
463 |
-+ bfqg = bfqio_lookup_group(bgrp, bfqd); |
464 |
-+ if (bfqg != NULL) |
465 |
-+ return bfqg; |
466 |
-+ |
467 |
-+ bfqg = bfq_group_chain_alloc(bfqd, cgroup); |
468 |
-+ if (bfqg != NULL) |
469 |
-+ bfq_group_chain_link(bfqd, cgroup, bfqg); |
470 |
-+ else |
471 |
-+ bfqg = bfqd->root_group; |
472 |
-+ |
473 |
-+ return bfqg; |
474 |
-+} |
475 |
-+ |
476 |
-+/** |
477 |
-+ * bfq_bfqq_move - migrate @bfqq to @bfqg. |
478 |
-+ * @bfqd: queue descriptor. |
479 |
-+ * @bfqq: the queue to move. |
480 |
-+ * @entity: @bfqq's entity. |
481 |
-+ * @bfqg: the group to move to. |
482 |
-+ * |
483 |
-+ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating |
484 |
-+ * it on the new one. Avoid putting the entity on the old group idle tree. |
485 |
-+ * |
486 |
-+ * Must be called under the queue lock; the cgroup owning @bfqg must |
487 |
-+ * not disappear (by now this just means that we are called under |
488 |
-+ * rcu_read_lock()). |
489 |
-+ */ |
490 |
-+static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
491 |
-+ struct bfq_entity *entity, struct bfq_group *bfqg) |
492 |
-+{ |
493 |
-+ int busy, resume; |
494 |
-+ |
495 |
-+ busy = bfq_bfqq_busy(bfqq); |
496 |
-+ resume = !RB_EMPTY_ROOT(&bfqq->sort_list); |
497 |
-+ |
498 |
-+ BUG_ON(resume && !entity->on_st); |
499 |
-+ BUG_ON(busy && !resume && entity->on_st && bfqq != bfqd->active_queue); |
500 |
-+ |
501 |
-+ if (busy) { |
502 |
-+ BUG_ON(atomic_read(&bfqq->ref) < 2); |
503 |
-+ |
504 |
-+ if (!resume) |
505 |
-+ bfq_del_bfqq_busy(bfqd, bfqq, 0); |
506 |
-+ else |
507 |
-+ bfq_deactivate_bfqq(bfqd, bfqq, 0); |
508 |
-+ } else if (entity->on_st) |
509 |
-+ bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); |
510 |
-+ |
511 |
-+ /* |
512 |
-+ * Here we use a reference to bfqg. We don't need a refcounter |
513 |
-+ * as the cgroup reference will not be dropped, so that its |
514 |
-+ * destroy() callback will not be invoked. |
515 |
-+ */ |
516 |
-+ entity->parent = bfqg->my_entity; |
517 |
-+ entity->sched_data = &bfqg->sched_data; |
518 |
-+ |
519 |
-+ if (busy && resume) |
520 |
-+ bfq_activate_bfqq(bfqd, bfqq); |
521 |
-+ |
522 |
-+ if (bfqd->active_queue == NULL && !bfqd->rq_in_driver) |
523 |
-+ bfq_schedule_dispatch(bfqd); |
524 |
-+} |
525 |
-+ |
526 |
-+/** |
527 |
-+ * __bfq_bic_change_cgroup - move @bic to @cgroup. |
528 |
-+ * @bfqd: the queue descriptor. |
529 |
-+ * @bic: the bic to move. |
530 |
-+ * @cgroup: the cgroup to move to. |
531 |
-+ * |
532 |
-+ * Move bic to cgroup, assuming that bfqd->queue is locked; the caller |
533 |
-+ * has to make sure that the reference to cgroup is valid across the call. |
534 |
-+ * |
535 |
-+ * NOTE: an alternative approach might have been to store the current |
536 |
-+ * cgroup in bfqq and getting a reference to it, reducing the lookup |
537 |
-+ * time here, at the price of slightly more complex code. |
538 |
-+ */ |
539 |
-+static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, |
540 |
-+ struct bfq_io_cq *bic, |
541 |
-+ struct cgroup *cgroup) |
542 |
-+{ |
543 |
-+ struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0); |
544 |
-+ struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1); |
545 |
-+ struct bfq_entity *entity; |
546 |
-+ struct bfq_group *bfqg; |
547 |
-+ struct bfqio_cgroup *bgrp; |
548 |
-+ |
549 |
-+ bgrp = cgroup_to_bfqio(cgroup); |
550 |
-+ |
551 |
-+ bfqg = bfq_find_alloc_group(bfqd, cgroup); |
552 |
-+ if (async_bfqq != NULL) { |
553 |
-+ entity = &async_bfqq->entity; |
554 |
-+ |
555 |
-+ if (entity->sched_data != &bfqg->sched_data) { |
556 |
-+ bic_set_bfqq(bic, NULL, 0); |
557 |
-+ bfq_log_bfqq(bfqd, async_bfqq, |
558 |
-+ "bic_change_group: %p %d", |
559 |
-+ async_bfqq, atomic_read(&async_bfqq->ref)); |
560 |
-+ bfq_put_queue(async_bfqq); |
561 |
-+ } |
562 |
-+ } |
563 |
-+ |
564 |
-+ if (sync_bfqq != NULL) { |
565 |
-+ entity = &sync_bfqq->entity; |
566 |
-+ if (entity->sched_data != &bfqg->sched_data) |
567 |
-+ bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg); |
568 |
-+ } |
569 |
-+ |
570 |
-+ return bfqg; |
571 |
-+} |
572 |
-+ |
573 |
-+/** |
574 |
-+ * bfq_bic_change_cgroup - move @bic to @cgroup. |
575 |
-+ * @bic: the bic being migrated. |
576 |
-+ * @cgroup: the destination cgroup. |
577 |
-+ * |
578 |
-+ * When the task owning @bic is moved to @cgroup, @bic is immediately |
579 |
-+ * moved into its new parent group. |
580 |
-+ */ |
581 |
-+static void bfq_bic_change_cgroup(struct bfq_io_cq *bic, |
582 |
-+ struct cgroup *cgroup) |
583 |
-+{ |
584 |
-+ struct bfq_data *bfqd; |
585 |
-+ unsigned long uninitialized_var(flags); |
586 |
-+ |
587 |
-+ bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), &flags); |
588 |
-+ if (bfqd != NULL) { |
589 |
-+ __bfq_bic_change_cgroup(bfqd, bic, cgroup); |
590 |
-+ bfq_put_bfqd_unlock(bfqd, &flags); |
591 |
-+ } |
592 |
-+} |
593 |
-+ |
594 |
-+/** |
595 |
-+ * bfq_bic_update_cgroup - update the cgroup of @bic. |
596 |
-+ * @bic: the @bic to update. |
597 |
-+ * |
598 |
-+ * Make sure that @bic is enqueued in the cgroup of the current task. |
599 |
-+ * We need this in addition to moving bics during the cgroup attach |
600 |
-+ * phase because the task owning @bic could be at its first disk |
601 |
-+ * access or we may end up in the root cgroup as the result of a |
602 |
-+ * memory allocation failure and here we try to move to the right |
603 |
-+ * group. |
604 |
-+ * |
605 |
-+ * Must be called under the queue lock. It is safe to use the returned |
606 |
-+ * value even after the rcu_read_unlock() as the migration/destruction |
607 |
-+ * paths act under the queue lock too. IOW it is impossible to race with |
608 |
-+ * group migration/destruction and end up with an invalid group as: |
609 |
-+ * a) here cgroup has not yet been destroyed, nor its destroy callback |
610 |
-+ * has started execution, as current holds a reference to it, |
611 |
-+ * b) if it is destroyed after rcu_read_unlock() [after current is |
612 |
-+ * migrated to a different cgroup] its attach() callback will have |
613 |
-+ * taken care of remove all the references to the old cgroup data. |
614 |
-+ */ |
615 |
-+static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic) |
616 |
-+{ |
617 |
-+ struct bfq_data *bfqd = bic_to_bfqd(bic); |
618 |
-+ struct bfq_group *bfqg; |
619 |
-+ struct cgroup *cgroup; |
620 |
-+ |
621 |
-+ BUG_ON(bfqd == NULL); |
622 |
-+ |
623 |
-+ rcu_read_lock(); |
624 |
-+ cgroup = task_cgroup(current, bfqio_subsys_id); |
625 |
-+ bfqg = __bfq_bic_change_cgroup(bfqd, bic, cgroup); |
626 |
-+ rcu_read_unlock(); |
627 |
-+ |
628 |
-+ return bfqg; |
629 |
-+} |
630 |
-+ |
631 |
-+/** |
632 |
-+ * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. |
633 |
-+ * @st: the service tree being flushed. |
634 |
-+ */ |
635 |
-+static inline void bfq_flush_idle_tree(struct bfq_service_tree *st) |
636 |
-+{ |
637 |
-+ struct bfq_entity *entity = st->first_idle; |
638 |
-+ |
639 |
-+ for (; entity != NULL; entity = st->first_idle) |
640 |
-+ __bfq_deactivate_entity(entity, 0); |
641 |
-+} |
642 |
-+ |
643 |
-+/** |
644 |
-+ * bfq_reparent_leaf_entity - move leaf entity to the root_group. |
645 |
-+ * @bfqd: the device data structure with the root group. |
646 |
-+ * @entity: the entity to move. |
647 |
-+ */ |
648 |
-+static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd, |
649 |
-+ struct bfq_entity *entity) |
650 |
-+{ |
651 |
-+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); |
652 |
-+ |
653 |
-+ BUG_ON(bfqq == NULL); |
654 |
-+ bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); |
655 |
-+ return; |
656 |
-+} |
657 |
-+ |
658 |
-+/** |
659 |
-+ * bfq_reparent_active_entities - move to the root group all active entities. |
660 |
-+ * @bfqd: the device data structure with the root group. |
661 |
-+ * @bfqg: the group to move from. |
662 |
-+ * @st: the service tree with the entities. |
663 |
-+ * |
664 |
-+ * Needs queue_lock to be taken and reference to be valid over the call. |
665 |
-+ */ |
666 |
-+static inline void bfq_reparent_active_entities(struct bfq_data *bfqd, |
667 |
-+ struct bfq_group *bfqg, |
668 |
-+ struct bfq_service_tree *st) |
669 |
-+{ |
670 |
-+ struct rb_root *active = &st->active; |
671 |
-+ struct bfq_entity *entity = NULL; |
672 |
-+ |
673 |
-+ if (!RB_EMPTY_ROOT(&st->active)) |
674 |
-+ entity = bfq_entity_of(rb_first(active)); |
675 |
-+ |
676 |
-+ for (; entity != NULL ; entity = bfq_entity_of(rb_first(active))) |
677 |
-+ bfq_reparent_leaf_entity(bfqd, entity); |
678 |
-+ |
679 |
-+ if (bfqg->sched_data.active_entity != NULL) |
680 |
-+ bfq_reparent_leaf_entity(bfqd, bfqg->sched_data.active_entity); |
681 |
-+ |
682 |
-+ return; |
683 |
-+} |
684 |
-+ |
685 |
-+/** |
686 |
-+ * bfq_destroy_group - destroy @bfqg. |
687 |
-+ * @bgrp: the bfqio_cgroup containing @bfqg. |
688 |
-+ * @bfqg: the group being destroyed. |
689 |
-+ * |
690 |
-+ * Destroy @bfqg, making sure that it is not referenced from its parent. |
691 |
-+ */ |
692 |
-+static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg) |
693 |
-+{ |
694 |
-+ struct bfq_data *bfqd; |
695 |
-+ struct bfq_service_tree *st; |
696 |
-+ struct bfq_entity *entity = bfqg->my_entity; |
697 |
-+ unsigned long uninitialized_var(flags); |
698 |
-+ int i; |
699 |
-+ |
700 |
-+ hlist_del(&bfqg->group_node); |
701 |
-+ |
702 |
-+ /* |
703 |
-+ * Empty all service_trees belonging to this group before deactivating |
704 |
-+ * the group itself. |
705 |
-+ */ |
706 |
-+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { |
707 |
-+ st = bfqg->sched_data.service_tree + i; |
708 |
-+ |
709 |
-+ /* |
710 |
-+ * The idle tree may still contain bfq_queues belonging |
711 |
-+ * to exited task because they never migrated to a different |
712 |
-+ * cgroup from the one being destroyed now. Noone else |
713 |
-+ * can access them so it's safe to act without any lock. |
714 |
-+ */ |
715 |
-+ bfq_flush_idle_tree(st); |
716 |
-+ |
717 |
-+ /* |
718 |
-+ * It may happen that some queues are still active |
719 |
-+ * (busy) upon group destruction (if the corresponding |
720 |
-+ * processes have been forced to terminate). We move |
721 |
-+ * all the leaf entities corresponding to these queues |
722 |
-+ * to the root_group. |
723 |
-+ * Also, it may happen that the group has an entity |
724 |
-+ * under service, which is disconnected from the active |
725 |
-+ * tree: it must be moved, too. |
726 |
-+ * There is no need to put the sync queues, as the |
727 |
-+ * scheduler has taken no reference. |
728 |
-+ */ |
729 |
-+ bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); |
730 |
-+ if (bfqd != NULL) { |
731 |
-+ bfq_reparent_active_entities(bfqd, bfqg, st); |
732 |
-+ bfq_put_bfqd_unlock(bfqd, &flags); |
733 |
-+ } |
734 |
-+ BUG_ON(!RB_EMPTY_ROOT(&st->active)); |
735 |
-+ BUG_ON(!RB_EMPTY_ROOT(&st->idle)); |
736 |
-+ } |
737 |
-+ BUG_ON(bfqg->sched_data.next_active != NULL); |
738 |
-+ BUG_ON(bfqg->sched_data.active_entity != NULL); |
739 |
-+ |
740 |
-+ /* |
741 |
-+ * We may race with device destruction, take extra care when |
742 |
-+ * dereferencing bfqg->bfqd. |
743 |
-+ */ |
744 |
-+ bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); |
745 |
-+ if (bfqd != NULL) { |
746 |
-+ hlist_del(&bfqg->bfqd_node); |
747 |
-+ __bfq_deactivate_entity(entity, 0); |
748 |
-+ bfq_put_async_queues(bfqd, bfqg); |
749 |
-+ bfq_put_bfqd_unlock(bfqd, &flags); |
750 |
-+ } |
751 |
-+ BUG_ON(entity->tree != NULL); |
752 |
-+ |
753 |
-+ /* |
754 |
-+ * No need to defer the kfree() to the end of the RCU grace |
755 |
-+ * period: we are called from the destroy() callback of our |
756 |
-+ * cgroup, so we can be sure that noone is a) still using |
757 |
-+ * this cgroup or b) doing lookups in it. |
758 |
-+ */ |
759 |
-+ kfree(bfqg); |
760 |
-+} |
761 |
-+ |
762 |
-+static void bfq_end_raising_async(struct bfq_data *bfqd) |
763 |
-+{ |
764 |
-+ struct hlist_node *tmp; |
765 |
-+ struct bfq_group *bfqg; |
766 |
-+ |
767 |
-+ hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node) |
768 |
-+ bfq_end_raising_async_queues(bfqd, bfqg); |
769 |
-+} |
770 |
-+ |
771 |
-+/** |
772 |
-+ * bfq_disconnect_groups - diconnect @bfqd from all its groups. |
773 |
-+ * @bfqd: the device descriptor being exited. |
774 |
-+ * |
775 |
-+ * When the device exits we just make sure that no lookup can return |
776 |
-+ * the now unused group structures. They will be deallocated on cgroup |
777 |
-+ * destruction. |
778 |
-+ */ |
779 |
-+static void bfq_disconnect_groups(struct bfq_data *bfqd) |
780 |
-+{ |
781 |
-+ struct hlist_node *tmp; |
782 |
-+ struct bfq_group *bfqg; |
783 |
-+ |
784 |
-+ bfq_log(bfqd, "disconnect_groups beginning") ; |
785 |
-+ hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node) { |
786 |
-+ hlist_del(&bfqg->bfqd_node); |
787 |
-+ |
788 |
-+ __bfq_deactivate_entity(bfqg->my_entity, 0); |
789 |
-+ |
790 |
-+ /* |
791 |
-+ * Don't remove from the group hash, just set an |
792 |
-+ * invalid key. No lookups can race with the |
793 |
-+ * assignment as bfqd is being destroyed; this |
794 |
-+ * implies also that new elements cannot be added |
795 |
-+ * to the list. |
796 |
-+ */ |
797 |
-+ rcu_assign_pointer(bfqg->bfqd, NULL); |
798 |
-+ |
799 |
-+ bfq_log(bfqd, "disconnect_groups: put async for group %p", |
800 |
-+ bfqg) ; |
801 |
-+ bfq_put_async_queues(bfqd, bfqg); |
802 |
-+ } |
803 |
-+} |
804 |
-+ |
805 |
-+static inline void bfq_free_root_group(struct bfq_data *bfqd) |
806 |
-+{ |
807 |
-+ struct bfqio_cgroup *bgrp = &bfqio_root_cgroup; |
808 |
-+ struct bfq_group *bfqg = bfqd->root_group; |
809 |
-+ |
810 |
-+ bfq_put_async_queues(bfqd, bfqg); |
811 |
-+ |
812 |
-+ spin_lock_irq(&bgrp->lock); |
813 |
-+ hlist_del_rcu(&bfqg->group_node); |
814 |
-+ spin_unlock_irq(&bgrp->lock); |
815 |
-+ |
816 |
-+ /* |
817 |
-+ * No need to synchronize_rcu() here: since the device is gone |
818 |
-+ * there cannot be any read-side access to its root_group. |
819 |
-+ */ |
820 |
-+ kfree(bfqg); |
821 |
-+} |
822 |
-+ |
823 |
-+static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) |
824 |
-+{ |
825 |
-+ struct bfq_group *bfqg; |
826 |
-+ struct bfqio_cgroup *bgrp; |
827 |
-+ int i; |
828 |
-+ |
829 |
-+ bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); |
830 |
-+ if (bfqg == NULL) |
831 |
-+ return NULL; |
832 |
-+ |
833 |
-+ bfqg->entity.parent = NULL; |
834 |
-+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) |
835 |
-+ bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; |
836 |
-+ |
837 |
-+ bgrp = &bfqio_root_cgroup; |
838 |
-+ spin_lock_irq(&bgrp->lock); |
839 |
-+ rcu_assign_pointer(bfqg->bfqd, bfqd); |
840 |
-+ hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data); |
841 |
-+ spin_unlock_irq(&bgrp->lock); |
842 |
-+ |
843 |
-+ return bfqg; |
844 |
-+} |
845 |
-+ |
846 |
-+#define SHOW_FUNCTION(__VAR) \ |
847 |
-+static u64 bfqio_cgroup_##__VAR##_read(struct cgroup *cgroup, \ |
848 |
-+ struct cftype *cftype) \ |
849 |
-+{ \ |
850 |
-+ struct bfqio_cgroup *bgrp; \ |
851 |
-+ u64 ret = -ENODEV; \ |
852 |
-+ \ |
853 |
-+ mutex_lock(&bfqio_mutex); \ |
854 |
-+ if (bfqio_is_removed(cgroup)) \ |
855 |
-+ goto out_unlock; \ |
856 |
-+ \ |
857 |
-+ bgrp = cgroup_to_bfqio(cgroup); \ |
858 |
-+ spin_lock_irq(&bgrp->lock); \ |
859 |
-+ ret = bgrp->__VAR; \ |
860 |
-+ spin_unlock_irq(&bgrp->lock); \ |
861 |
-+ \ |
862 |
-+out_unlock: \ |
863 |
-+ mutex_unlock(&bfqio_mutex); \ |
864 |
-+ return ret; \ |
865 |
-+} |
866 |
-+ |
867 |
-+SHOW_FUNCTION(weight); |
868 |
-+SHOW_FUNCTION(ioprio); |
869 |
-+SHOW_FUNCTION(ioprio_class); |
870 |
-+#undef SHOW_FUNCTION |
871 |
-+ |
872 |
-+#define STORE_FUNCTION(__VAR, __MIN, __MAX) \ |
873 |
-+static int bfqio_cgroup_##__VAR##_write(struct cgroup *cgroup, \ |
874 |
-+ struct cftype *cftype, \ |
875 |
-+ u64 val) \ |
876 |
-+{ \ |
877 |
-+ struct bfqio_cgroup *bgrp; \ |
878 |
-+ struct bfq_group *bfqg; \ |
879 |
-+ int ret = -EINVAL; \ |
880 |
-+ \ |
881 |
-+ if (val < (__MIN) || val > (__MAX)) \ |
882 |
-+ return ret; \ |
883 |
-+ \ |
884 |
-+ ret = -ENODEV; \ |
885 |
-+ mutex_lock(&bfqio_mutex); \ |
886 |
-+ if (bfqio_is_removed(cgroup)) \ |
887 |
-+ goto out_unlock; \ |
888 |
-+ ret = 0; \ |
889 |
-+ \ |
890 |
-+ bgrp = cgroup_to_bfqio(cgroup); \ |
891 |
-+ \ |
892 |
-+ spin_lock_irq(&bgrp->lock); \ |
893 |
-+ bgrp->__VAR = (unsigned short)val; \ |
894 |
-+ hlist_for_each_entry(bfqg, &bgrp->group_data, group_node) { \ |
895 |
-+ /* \ |
896 |
-+ * Setting the ioprio_changed flag of the entity \ |
897 |
-+ * to 1 with new_##__VAR == ##__VAR would re-set \ |
898 |
-+ * the value of the weight to its ioprio mapping. \ |
899 |
-+ * Set the flag only if necessary. \ |
900 |
-+ */ \ |
901 |
-+ if ((unsigned short)val != bfqg->entity.new_##__VAR) { \ |
902 |
-+ bfqg->entity.new_##__VAR = (unsigned short)val; \ |
903 |
-+ smp_wmb(); \ |
904 |
-+ bfqg->entity.ioprio_changed = 1; \ |
905 |
-+ } \ |
906 |
-+ } \ |
907 |
-+ spin_unlock_irq(&bgrp->lock); \ |
908 |
-+ \ |
909 |
-+out_unlock: \ |
910 |
-+ mutex_unlock(&bfqio_mutex); \ |
911 |
-+ return ret; \ |
912 |
-+} |
913 |
-+ |
914 |
-+STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT); |
915 |
-+STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1); |
916 |
-+STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE); |
917 |
-+#undef STORE_FUNCTION |
918 |
-+ |
919 |
-+static struct cftype bfqio_files[] = { |
920 |
-+ { |
921 |
-+ .name = "weight", |
922 |
-+ .read_u64 = bfqio_cgroup_weight_read, |
923 |
-+ .write_u64 = bfqio_cgroup_weight_write, |
924 |
-+ }, |
925 |
-+ { |
926 |
-+ .name = "ioprio", |
927 |
-+ .read_u64 = bfqio_cgroup_ioprio_read, |
928 |
-+ .write_u64 = bfqio_cgroup_ioprio_write, |
929 |
-+ }, |
930 |
-+ { |
931 |
-+ .name = "ioprio_class", |
932 |
-+ .read_u64 = bfqio_cgroup_ioprio_class_read, |
933 |
-+ .write_u64 = bfqio_cgroup_ioprio_class_write, |
934 |
-+ }, |
935 |
-+ { }, /* terminate */ |
936 |
-+}; |
937 |
-+ |
938 |
-+static struct cgroup_subsys_state *bfqio_create(struct cgroup *cgroup) |
939 |
-+{ |
940 |
-+ struct bfqio_cgroup *bgrp; |
941 |
-+ |
942 |
-+ if (cgroup->parent != NULL) { |
943 |
-+ bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL); |
944 |
-+ if (bgrp == NULL) |
945 |
-+ return ERR_PTR(-ENOMEM); |
946 |
-+ } else |
947 |
-+ bgrp = &bfqio_root_cgroup; |
948 |
-+ |
949 |
-+ spin_lock_init(&bgrp->lock); |
950 |
-+ INIT_HLIST_HEAD(&bgrp->group_data); |
951 |
-+ bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO; |
952 |
-+ bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS; |
953 |
-+ |
954 |
-+ return &bgrp->css; |
955 |
-+} |
956 |
-+ |
957 |
-+/* |
958 |
-+ * We cannot support shared io contexts, as we have no means to support |
959 |
-+ * two tasks with the same ioc in two different groups without major rework |
960 |
-+ * of the main bic/bfqq data structures. By now we allow a task to change |
961 |
-+ * its cgroup only if it's the only owner of its ioc; the drawback of this |
962 |
-+ * behavior is that a group containing a task that forked using CLONE_IO |
963 |
-+ * will not be destroyed until the tasks sharing the ioc die. |
964 |
-+ */ |
965 |
-+static int bfqio_can_attach(struct cgroup *cgroup, struct cgroup_taskset *tset) |
966 |
-+{ |
967 |
-+ struct task_struct *task; |
968 |
-+ struct io_context *ioc; |
969 |
-+ int ret = 0; |
970 |
-+ |
971 |
-+ cgroup_taskset_for_each(task, cgroup, tset) { |
972 |
-+ /* task_lock() is needed to avoid races with exit_io_context() */ |
973 |
-+ task_lock(task); |
974 |
-+ ioc = task->io_context; |
975 |
-+ if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1) |
976 |
-+ /* |
977 |
-+ * ioc == NULL means that the task is either too young or |
978 |
-+ * exiting: if it has still no ioc the ioc can't be shared, |
979 |
-+ * if the task is exiting the attach will fail anyway, no |
980 |
-+ * matter what we return here. |
981 |
-+ */ |
982 |
-+ ret = -EINVAL; |
983 |
-+ task_unlock(task); |
984 |
-+ if (ret) |
985 |
-+ break; |
986 |
-+ } |
987 |
-+ |
988 |
-+ return ret; |
989 |
-+} |
990 |
-+ |
991 |
-+static void bfqio_attach(struct cgroup *cgroup, struct cgroup_taskset *tset) |
992 |
-+{ |
993 |
-+ struct task_struct *task; |
994 |
-+ struct io_context *ioc; |
995 |
-+ struct io_cq *icq; |
996 |
-+ |
997 |
-+ /* |
998 |
-+ * IMPORTANT NOTE: The move of more than one process at a time to a |
999 |
-+ * new group has not yet been tested. |
1000 |
-+ */ |
1001 |
-+ cgroup_taskset_for_each(task, cgroup, tset) { |
1002 |
-+ ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); |
1003 |
-+ if (ioc) { |
1004 |
-+ /* |
1005 |
-+ * Handle cgroup change here. |
1006 |
-+ */ |
1007 |
-+ rcu_read_lock(); |
1008 |
-+ hlist_for_each_entry_rcu(icq, &ioc->icq_list, ioc_node) |
1009 |
-+ if (!strncmp(icq->q->elevator->type->elevator_name, |
1010 |
-+ "bfq", ELV_NAME_MAX)) |
1011 |
-+ bfq_bic_change_cgroup(icq_to_bic(icq), |
1012 |
-+ cgroup); |
1013 |
-+ rcu_read_unlock(); |
1014 |
-+ put_io_context(ioc); |
1015 |
-+ } |
1016 |
-+ } |
1017 |
-+} |
1018 |
-+ |
1019 |
-+static void bfqio_destroy(struct cgroup *cgroup) |
1020 |
-+{ |
1021 |
-+ struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); |
1022 |
-+ struct hlist_node *tmp; |
1023 |
-+ struct bfq_group *bfqg; |
1024 |
-+ |
1025 |
-+ /* |
1026 |
-+ * Since we are destroying the cgroup, there are no more tasks |
1027 |
-+ * referencing it, and all the RCU grace periods that may have |
1028 |
-+ * referenced it are ended (as the destruction of the parent |
1029 |
-+ * cgroup is RCU-safe); bgrp->group_data will not be accessed by |
1030 |
-+ * anything else and we don't need any synchronization. |
1031 |
-+ */ |
1032 |
-+ hlist_for_each_entry_safe(bfqg, tmp, &bgrp->group_data, group_node) |
1033 |
-+ bfq_destroy_group(bgrp, bfqg); |
1034 |
-+ |
1035 |
-+ BUG_ON(!hlist_empty(&bgrp->group_data)); |
1036 |
-+ |
1037 |
-+ kfree(bgrp); |
1038 |
-+} |
1039 |
-+ |
1040 |
-+struct cgroup_subsys bfqio_subsys = { |
1041 |
-+ .name = "bfqio", |
1042 |
-+ .css_alloc = bfqio_create, |
1043 |
-+ .can_attach = bfqio_can_attach, |
1044 |
-+ .attach = bfqio_attach, |
1045 |
-+ .css_free = bfqio_destroy, |
1046 |
-+ .subsys_id = bfqio_subsys_id, |
1047 |
-+ .base_cftypes = bfqio_files, |
1048 |
-+}; |
1049 |
-+#else |
1050 |
-+static inline void bfq_init_entity(struct bfq_entity *entity, |
1051 |
-+ struct bfq_group *bfqg) |
1052 |
-+{ |
1053 |
-+ entity->weight = entity->new_weight; |
1054 |
-+ entity->orig_weight = entity->new_weight; |
1055 |
-+ entity->ioprio = entity->new_ioprio; |
1056 |
-+ entity->ioprio_class = entity->new_ioprio_class; |
1057 |
-+ entity->sched_data = &bfqg->sched_data; |
1058 |
-+} |
1059 |
-+ |
1060 |
-+static inline struct bfq_group * |
1061 |
-+bfq_bic_update_cgroup(struct bfq_io_cq *bic) |
1062 |
-+{ |
1063 |
-+ struct bfq_data *bfqd = bic_to_bfqd(bic); |
1064 |
-+ return bfqd->root_group; |
1065 |
-+} |
1066 |
-+ |
1067 |
-+static inline void bfq_bfqq_move(struct bfq_data *bfqd, |
1068 |
-+ struct bfq_queue *bfqq, |
1069 |
-+ struct bfq_entity *entity, |
1070 |
-+ struct bfq_group *bfqg) |
1071 |
-+{ |
1072 |
-+} |
1073 |
-+ |
1074 |
-+static void bfq_end_raising_async(struct bfq_data *bfqd) |
1075 |
-+{ |
1076 |
-+ bfq_end_raising_async_queues(bfqd, bfqd->root_group); |
1077 |
-+} |
1078 |
-+ |
1079 |
-+static inline void bfq_disconnect_groups(struct bfq_data *bfqd) |
1080 |
-+{ |
1081 |
-+ bfq_put_async_queues(bfqd, bfqd->root_group); |
1082 |
-+} |
1083 |
-+ |
1084 |
-+static inline void bfq_free_root_group(struct bfq_data *bfqd) |
1085 |
-+{ |
1086 |
-+ kfree(bfqd->root_group); |
1087 |
-+} |
1088 |
-+ |
1089 |
-+static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) |
1090 |
-+{ |
1091 |
-+ struct bfq_group *bfqg; |
1092 |
-+ int i; |
1093 |
-+ |
1094 |
-+ bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); |
1095 |
-+ if (bfqg == NULL) |
1096 |
-+ return NULL; |
1097 |
-+ |
1098 |
-+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) |
1099 |
-+ bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; |
1100 |
-+ |
1101 |
-+ return bfqg; |
1102 |
-+} |
1103 |
-+#endif |
1104 |
-diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c |
1105 |
-new file mode 100644 |
1106 |
-index 0000000..326e3ec |
1107 |
---- /dev/null |
1108 |
-+++ b/block/bfq-ioc.c |
1109 |
-@@ -0,0 +1,36 @@ |
1110 |
-+/* |
1111 |
-+ * BFQ: I/O context handling. |
1112 |
-+ * |
1113 |
-+ * Based on ideas and code from CFQ: |
1114 |
-+ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk> |
1115 |
-+ * |
1116 |
-+ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it> |
1117 |
-+ * Paolo Valente <paolo.valente@×××××××.it> |
1118 |
-+ * |
1119 |
-+ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it> |
1120 |
-+ */ |
1121 |
-+ |
1122 |
-+/** |
1123 |
-+ * icq_to_bic - convert iocontext queue structure to bfq_io_cq. |
1124 |
-+ * @icq: the iocontext queue. |
1125 |
-+ */ |
1126 |
-+static inline struct bfq_io_cq *icq_to_bic(struct io_cq *icq) |
1127 |
-+{ |
1128 |
-+ /* bic->icq is the first member, %NULL will convert to %NULL */ |
1129 |
-+ return container_of(icq, struct bfq_io_cq, icq); |
1130 |
-+} |
1131 |
-+ |
1132 |
-+/** |
1133 |
-+ * bfq_bic_lookup - search into @ioc a bic associated to @bfqd. |
1134 |
-+ * @bfqd: the lookup key. |
1135 |
-+ * @ioc: the io_context of the process doing I/O. |
1136 |
-+ * |
1137 |
-+ * Queue lock must be held. |
1138 |
-+ */ |
1139 |
-+static inline struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd, |
1140 |
-+ struct io_context *ioc) |
1141 |
-+{ |
1142 |
-+ if(ioc) |
1143 |
-+ return icq_to_bic(ioc_lookup_icq(ioc, bfqd->queue)); |
1144 |
-+ return NULL; |
1145 |
-+} |
1146 |
-diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c |
1147 |
-new file mode 100644 |
1148 |
-index 0000000..b230927 |
1149 |
---- /dev/null |
1150 |
-+++ b/block/bfq-iosched.c |
1151 |
-@@ -0,0 +1,3070 @@ |
1152 |
-+/* |
1153 |
-+ * BFQ, or Budget Fair Queueing, disk scheduler. |
1154 |
-+ * |
1155 |
-+ * Based on ideas and code from CFQ: |
1156 |
-+ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk> |
1157 |
-+ * |
1158 |
-+ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it> |
1159 |
-+ * Paolo Valente <paolo.valente@×××××××.it> |
1160 |
-+ * |
1161 |
-+ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it> |
1162 |
-+ * |
1163 |
-+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file. |
1164 |
-+ * |
1165 |
-+ * BFQ is a proportional share disk scheduling algorithm based on the |
1166 |
-+ * slice-by-slice service scheme of CFQ. But BFQ assigns budgets, |
1167 |
-+ * measured in number of sectors, to tasks instead of time slices. |
1168 |
-+ * The disk is not granted to the active task for a given time slice, |
1169 |
-+ * but until it has exahusted its assigned budget. This change from |
1170 |
-+ * the time to the service domain allows BFQ to distribute the disk |
1171 |
-+ * bandwidth among tasks as desired, without any distortion due to |
1172 |
-+ * ZBR, workload fluctuations or other factors. BFQ uses an ad hoc |
1173 |
-+ * internal scheduler, called B-WF2Q+, to schedule tasks according to |
1174 |
-+ * their budgets. Thanks to this accurate scheduler, BFQ can afford |
1175 |
-+ * to assign high budgets to disk-bound non-seeky tasks (to boost the |
1176 |
-+ * throughput), and yet guarantee low latencies to interactive and |
1177 |
-+ * soft real-time applications. |
1178 |
-+ * |
1179 |
-+ * BFQ has been introduced in [1], where the interested reader can |
1180 |
-+ * find an accurate description of the algorithm, the bandwidth |
1181 |
-+ * distribution and latency guarantees it provides, plus formal proofs |
1182 |
-+ * of all the properties. With respect to the algorithm presented in |
1183 |
-+ * the paper, this implementation adds several little heuristics, and |
1184 |
-+ * a hierarchical extension, based on H-WF2Q+. |
1185 |
-+ * |
1186 |
-+ * B-WF2Q+ is based on WF2Q+, that is described in [2], together with |
1187 |
-+ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N) |
1188 |
-+ * complexity derives from the one introduced with EEVDF in [3]. |
1189 |
-+ * |
1190 |
-+ * [1] P. Valente and F. Checconi, ``High Throughput Disk Scheduling |
1191 |
-+ * with Deterministic Guarantees on Bandwidth Distribution,'', |
1192 |
-+ * IEEE Transactions on Computer, May 2010. |
1193 |
-+ * |
1194 |
-+ * http://algo.ing.unimo.it/people/paolo/disk_sched/bfq-techreport.pdf |
1195 |
-+ * |
1196 |
-+ * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing |
1197 |
-+ * Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689, |
1198 |
-+ * Oct 1997. |
1199 |
-+ * |
1200 |
-+ * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz |
1201 |
-+ * |
1202 |
-+ * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline |
1203 |
-+ * First: A Flexible and Accurate Mechanism for Proportional Share |
1204 |
-+ * Resource Allocation,'' technical report. |
1205 |
-+ * |
1206 |
-+ * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf |
1207 |
-+ */ |
1208 |
-+#include <linux/module.h> |
1209 |
-+#include <linux/slab.h> |
1210 |
-+#include <linux/blkdev.h> |
1211 |
-+#include <linux/cgroup.h> |
1212 |
-+#include <linux/elevator.h> |
1213 |
-+#include <linux/jiffies.h> |
1214 |
-+#include <linux/rbtree.h> |
1215 |
-+#include <linux/ioprio.h> |
1216 |
-+#include "bfq.h" |
1217 |
-+#include "blk.h" |
1218 |
-+ |
1219 |
-+/* Max number of dispatches in one round of service. */ |
1220 |
-+static const int bfq_quantum = 4; |
1221 |
-+ |
1222 |
-+/* Expiration time of sync (0) and async (1) requests, in jiffies. */ |
1223 |
-+static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; |
1224 |
-+ |
1225 |
-+/* Maximum backwards seek, in KiB. */ |
1226 |
-+static const int bfq_back_max = 16 * 1024; |
1227 |
-+ |
1228 |
-+/* Penalty of a backwards seek, in number of sectors. */ |
1229 |
-+static const int bfq_back_penalty = 2; |
1230 |
-+ |
1231 |
-+/* Idling period duration, in jiffies. */ |
1232 |
-+static int bfq_slice_idle = HZ / 125; |
1233 |
-+ |
1234 |
-+/* Default maximum budget values, in sectors and number of requests. */ |
1235 |
-+static const int bfq_default_max_budget = 16 * 1024; |
1236 |
-+static const int bfq_max_budget_async_rq = 4; |
1237 |
-+ |
1238 |
-+/* |
1239 |
-+ * Async to sync throughput distribution is controlled as follows: |
1240 |
-+ * when an async request is served, the entity is charged the number |
1241 |
-+ * of sectors of the request, multipled by the factor below |
1242 |
-+ */ |
1243 |
-+static const int bfq_async_charge_factor = 10; |
1244 |
-+ |
1245 |
-+/* Default timeout values, in jiffies, approximating CFQ defaults. */ |
1246 |
-+static const int bfq_timeout_sync = HZ / 8; |
1247 |
-+static int bfq_timeout_async = HZ / 25; |
1248 |
-+ |
1249 |
-+struct kmem_cache *bfq_pool; |
1250 |
-+ |
1251 |
-+/* Below this threshold (in ms), we consider thinktime immediate. */ |
1252 |
-+#define BFQ_MIN_TT 2 |
1253 |
-+ |
1254 |
-+/* hw_tag detection: parallel requests threshold and min samples needed. */ |
1255 |
-+#define BFQ_HW_QUEUE_THRESHOLD 4 |
1256 |
-+#define BFQ_HW_QUEUE_SAMPLES 32 |
1257 |
-+ |
1258 |
-+#define BFQQ_SEEK_THR (sector_t)(8 * 1024) |
1259 |
-+#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR) |
1260 |
-+ |
1261 |
-+/* Min samples used for peak rate estimation (for autotuning). */ |
1262 |
-+#define BFQ_PEAK_RATE_SAMPLES 32 |
1263 |
-+ |
1264 |
-+/* Shift used for peak rate fixed precision calculations. */ |
1265 |
-+#define BFQ_RATE_SHIFT 16 |
1266 |
-+ |
1267 |
-+/* |
1268 |
-+ * The duration of the weight raising for interactive applications is |
1269 |
-+ * computed automatically (as default behaviour), using the following |
1270 |
-+ * formula: duration = (R / r) * T, where r is the peak rate of the |
1271 |
-+ * disk, and R and T are two reference parameters. In particular, R is |
1272 |
-+ * the peak rate of a reference disk, and T is about the maximum time |
1273 |
-+ * for starting popular large applications on that disk, under BFQ and |
1274 |
-+ * while reading two files in parallel. Finally, BFQ uses two |
1275 |
-+ * different pairs (R, T) depending on whether the disk is rotational |
1276 |
-+ * or non-rotational. |
1277 |
-+ */ |
1278 |
-+#define T_rot (msecs_to_jiffies(5500)) |
1279 |
-+#define T_nonrot (msecs_to_jiffies(2000)) |
1280 |
-+/* Next two quantities are in sectors/usec, left-shifted by BFQ_RATE_SHIFT */ |
1281 |
-+#define R_rot 17415 |
1282 |
-+#define R_nonrot 34791 |
1283 |
-+ |
1284 |
-+#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ |
1285 |
-+ { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) |
1286 |
-+ |
1287 |
-+#define RQ_BIC(rq) ((struct bfq_io_cq *) (rq)->elv.priv[0]) |
1288 |
-+#define RQ_BFQQ(rq) ((rq)->elv.priv[1]) |
1289 |
-+ |
1290 |
-+static inline void bfq_schedule_dispatch(struct bfq_data *bfqd); |
1291 |
-+ |
1292 |
-+#include "bfq-ioc.c" |
1293 |
-+#include "bfq-sched.c" |
1294 |
-+#include "bfq-cgroup.c" |
1295 |
-+ |
1296 |
-+#define bfq_class_idle(bfqq) ((bfqq)->entity.ioprio_class ==\ |
1297 |
-+ IOPRIO_CLASS_IDLE) |
1298 |
-+#define bfq_class_rt(bfqq) ((bfqq)->entity.ioprio_class ==\ |
1299 |
-+ IOPRIO_CLASS_RT) |
1300 |
-+ |
1301 |
-+#define bfq_sample_valid(samples) ((samples) > 80) |
1302 |
-+ |
1303 |
-+/* |
1304 |
-+ * We regard a request as SYNC, if either it's a read or has the SYNC bit |
1305 |
-+ * set (in which case it could also be a direct WRITE). |
1306 |
-+ */ |
1307 |
-+static inline int bfq_bio_sync(struct bio *bio) |
1308 |
-+{ |
1309 |
-+ if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC)) |
1310 |
-+ return 1; |
1311 |
-+ |
1312 |
-+ return 0; |
1313 |
-+} |
1314 |
-+ |
1315 |
-+/* |
1316 |
-+ * Scheduler run of queue, if there are requests pending and no one in the |
1317 |
-+ * driver that will restart queueing. |
1318 |
-+ */ |
1319 |
-+static inline void bfq_schedule_dispatch(struct bfq_data *bfqd) |
1320 |
-+{ |
1321 |
-+ if (bfqd->queued != 0) { |
1322 |
-+ bfq_log(bfqd, "schedule dispatch"); |
1323 |
-+ kblockd_schedule_work(bfqd->queue, &bfqd->unplug_work); |
1324 |
-+ } |
1325 |
-+} |
1326 |
-+ |
1327 |
-+/* |
1328 |
-+ * Lifted from AS - choose which of rq1 and rq2 that is best served now. |
1329 |
-+ * We choose the request that is closesr to the head right now. Distance |
1330 |
-+ * behind the head is penalized and only allowed to a certain extent. |
1331 |
-+ */ |
1332 |
-+static struct request *bfq_choose_req(struct bfq_data *bfqd, |
1333 |
-+ struct request *rq1, |
1334 |
-+ struct request *rq2, |
1335 |
-+ sector_t last) |
1336 |
-+{ |
1337 |
-+ sector_t s1, s2, d1 = 0, d2 = 0; |
1338 |
-+ unsigned long back_max; |
1339 |
-+#define BFQ_RQ1_WRAP 0x01 /* request 1 wraps */ |
1340 |
-+#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */ |
1341 |
-+ unsigned wrap = 0; /* bit mask: requests behind the disk head? */ |
1342 |
-+ |
1343 |
-+ if (rq1 == NULL || rq1 == rq2) |
1344 |
-+ return rq2; |
1345 |
-+ if (rq2 == NULL) |
1346 |
-+ return rq1; |
1347 |
-+ |
1348 |
-+ if (rq_is_sync(rq1) && !rq_is_sync(rq2)) |
1349 |
-+ return rq1; |
1350 |
-+ else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) |
1351 |
-+ return rq2; |
1352 |
-+ if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) |
1353 |
-+ return rq1; |
1354 |
-+ else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META)) |
1355 |
-+ return rq2; |
1356 |
-+ |
1357 |
-+ s1 = blk_rq_pos(rq1); |
1358 |
-+ s2 = blk_rq_pos(rq2); |
1359 |
-+ |
1360 |
-+ /* |
1361 |
-+ * By definition, 1KiB is 2 sectors. |
1362 |
-+ */ |
1363 |
-+ back_max = bfqd->bfq_back_max * 2; |
1364 |
-+ |
1365 |
-+ /* |
1366 |
-+ * Strict one way elevator _except_ in the case where we allow |
1367 |
-+ * short backward seeks which are biased as twice the cost of a |
1368 |
-+ * similar forward seek. |
1369 |
-+ */ |
1370 |
-+ if (s1 >= last) |
1371 |
-+ d1 = s1 - last; |
1372 |
-+ else if (s1 + back_max >= last) |
1373 |
-+ d1 = (last - s1) * bfqd->bfq_back_penalty; |
1374 |
-+ else |
1375 |
-+ wrap |= BFQ_RQ1_WRAP; |
1376 |
-+ |
1377 |
-+ if (s2 >= last) |
1378 |
-+ d2 = s2 - last; |
1379 |
-+ else if (s2 + back_max >= last) |
1380 |
-+ d2 = (last - s2) * bfqd->bfq_back_penalty; |
1381 |
-+ else |
1382 |
-+ wrap |= BFQ_RQ2_WRAP; |
1383 |
-+ |
1384 |
-+ /* Found required data */ |
1385 |
-+ |
1386 |
-+ /* |
1387 |
-+ * By doing switch() on the bit mask "wrap" we avoid having to |
1388 |
-+ * check two variables for all permutations: --> faster! |
1389 |
-+ */ |
1390 |
-+ switch (wrap) { |
1391 |
-+ case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ |
1392 |
-+ if (d1 < d2) |
1393 |
-+ return rq1; |
1394 |
-+ else if (d2 < d1) |
1395 |
-+ return rq2; |
1396 |
-+ else { |
1397 |
-+ if (s1 >= s2) |
1398 |
-+ return rq1; |
1399 |
-+ else |
1400 |
-+ return rq2; |
1401 |
-+ } |
1402 |
-+ |
1403 |
-+ case BFQ_RQ2_WRAP: |
1404 |
-+ return rq1; |
1405 |
-+ case BFQ_RQ1_WRAP: |
1406 |
-+ return rq2; |
1407 |
-+ case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */ |
1408 |
-+ default: |
1409 |
-+ /* |
1410 |
-+ * Since both rqs are wrapped, |
1411 |
-+ * start with the one that's further behind head |
1412 |
-+ * (--> only *one* back seek required), |
1413 |
-+ * since back seek takes more time than forward. |
1414 |
-+ */ |
1415 |
-+ if (s1 <= s2) |
1416 |
-+ return rq1; |
1417 |
-+ else |
1418 |
-+ return rq2; |
1419 |
-+ } |
1420 |
-+} |
1421 |
-+ |
1422 |
-+static struct bfq_queue * |
1423 |
-+bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, |
1424 |
-+ sector_t sector, struct rb_node **ret_parent, |
1425 |
-+ struct rb_node ***rb_link) |
1426 |
-+{ |
1427 |
-+ struct rb_node **p, *parent; |
1428 |
-+ struct bfq_queue *bfqq = NULL; |
1429 |
-+ |
1430 |
-+ parent = NULL; |
1431 |
-+ p = &root->rb_node; |
1432 |
-+ while (*p) { |
1433 |
-+ struct rb_node **n; |
1434 |
-+ |
1435 |
-+ parent = *p; |
1436 |
-+ bfqq = rb_entry(parent, struct bfq_queue, pos_node); |
1437 |
-+ |
1438 |
-+ /* |
1439 |
-+ * Sort strictly based on sector. Smallest to the left, |
1440 |
-+ * largest to the right. |
1441 |
-+ */ |
1442 |
-+ if (sector > blk_rq_pos(bfqq->next_rq)) |
1443 |
-+ n = &(*p)->rb_right; |
1444 |
-+ else if (sector < blk_rq_pos(bfqq->next_rq)) |
1445 |
-+ n = &(*p)->rb_left; |
1446 |
-+ else |
1447 |
-+ break; |
1448 |
-+ p = n; |
1449 |
-+ bfqq = NULL; |
1450 |
-+ } |
1451 |
-+ |
1452 |
-+ *ret_parent = parent; |
1453 |
-+ if (rb_link) |
1454 |
-+ *rb_link = p; |
1455 |
-+ |
1456 |
-+ bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", |
1457 |
-+ (long long unsigned)sector, |
1458 |
-+ bfqq != NULL ? bfqq->pid : 0); |
1459 |
-+ |
1460 |
-+ return bfqq; |
1461 |
-+} |
1462 |
-+ |
1463 |
-+static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq) |
1464 |
-+{ |
1465 |
-+ struct rb_node **p, *parent; |
1466 |
-+ struct bfq_queue *__bfqq; |
1467 |
-+ |
1468 |
-+ if (bfqq->pos_root != NULL) { |
1469 |
-+ rb_erase(&bfqq->pos_node, bfqq->pos_root); |
1470 |
-+ bfqq->pos_root = NULL; |
1471 |
-+ } |
1472 |
-+ |
1473 |
-+ if (bfq_class_idle(bfqq)) |
1474 |
-+ return; |
1475 |
-+ if (!bfqq->next_rq) |
1476 |
-+ return; |
1477 |
-+ |
1478 |
-+ bfqq->pos_root = &bfqd->rq_pos_tree; |
1479 |
-+ __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root, |
1480 |
-+ blk_rq_pos(bfqq->next_rq), &parent, &p); |
1481 |
-+ if (__bfqq == NULL) { |
1482 |
-+ rb_link_node(&bfqq->pos_node, parent, p); |
1483 |
-+ rb_insert_color(&bfqq->pos_node, bfqq->pos_root); |
1484 |
-+ } else |
1485 |
-+ bfqq->pos_root = NULL; |
1486 |
-+} |
1487 |
-+ |
1488 |
-+static struct request *bfq_find_next_rq(struct bfq_data *bfqd, |
1489 |
-+ struct bfq_queue *bfqq, |
1490 |
-+ struct request *last) |
1491 |
-+{ |
1492 |
-+ struct rb_node *rbnext = rb_next(&last->rb_node); |
1493 |
-+ struct rb_node *rbprev = rb_prev(&last->rb_node); |
1494 |
-+ struct request *next = NULL, *prev = NULL; |
1495 |
-+ |
1496 |
-+ BUG_ON(RB_EMPTY_NODE(&last->rb_node)); |
1497 |
-+ |
1498 |
-+ if (rbprev != NULL) |
1499 |
-+ prev = rb_entry_rq(rbprev); |
1500 |
-+ |
1501 |
-+ if (rbnext != NULL) |
1502 |
-+ next = rb_entry_rq(rbnext); |
1503 |
-+ else { |
1504 |
-+ rbnext = rb_first(&bfqq->sort_list); |
1505 |
-+ if (rbnext && rbnext != &last->rb_node) |
1506 |
-+ next = rb_entry_rq(rbnext); |
1507 |
-+ } |
1508 |
-+ |
1509 |
-+ return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last)); |
1510 |
-+} |
1511 |
-+ |
1512 |
-+static void bfq_del_rq_rb(struct request *rq) |
1513 |
-+{ |
1514 |
-+ struct bfq_queue *bfqq = RQ_BFQQ(rq); |
1515 |
-+ struct bfq_data *bfqd = bfqq->bfqd; |
1516 |
-+ const int sync = rq_is_sync(rq); |
1517 |
-+ |
1518 |
-+ BUG_ON(bfqq->queued[sync] == 0); |
1519 |
-+ bfqq->queued[sync]--; |
1520 |
-+ bfqd->queued--; |
1521 |
-+ |
1522 |
-+ elv_rb_del(&bfqq->sort_list, rq); |
1523 |
-+ |
1524 |
-+ if (RB_EMPTY_ROOT(&bfqq->sort_list)) { |
1525 |
-+ if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->active_queue) |
1526 |
-+ bfq_del_bfqq_busy(bfqd, bfqq, 1); |
1527 |
-+ /* |
1528 |
-+ * Remove queue from request-position tree as it is empty. |
1529 |
-+ */ |
1530 |
-+ if (bfqq->pos_root != NULL) { |
1531 |
-+ rb_erase(&bfqq->pos_node, bfqq->pos_root); |
1532 |
-+ bfqq->pos_root = NULL; |
1533 |
-+ } |
1534 |
-+ } |
1535 |
-+} |
1536 |
-+ |
1537 |
-+/* see the definition of bfq_async_charge_factor for details */ |
1538 |
-+static inline unsigned long bfq_serv_to_charge(struct request *rq, |
1539 |
-+ struct bfq_queue *bfqq) |
1540 |
-+{ |
1541 |
-+ return blk_rq_sectors(rq) * |
1542 |
-+ (1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->raising_coeff == 1) * |
1543 |
-+ bfq_async_charge_factor)); |
1544 |
-+} |
1545 |
-+ |
1546 |
-+/** |
1547 |
-+ * bfq_updated_next_req - update the queue after a new next_rq selection. |
1548 |
-+ * @bfqd: the device data the queue belongs to. |
1549 |
-+ * @bfqq: the queue to update. |
1550 |
-+ * |
1551 |
-+ * If the first request of a queue changes we make sure that the queue |
1552 |
-+ * has enough budget to serve at least its first request (if the |
1553 |
-+ * request has grown). We do this because if the queue has not enough |
1554 |
-+ * budget for its first request, it has to go through two dispatch |
1555 |
-+ * rounds to actually get it dispatched. |
1556 |
-+ */ |
1557 |
-+static void bfq_updated_next_req(struct bfq_data *bfqd, |
1558 |
-+ struct bfq_queue *bfqq) |
1559 |
-+{ |
1560 |
-+ struct bfq_entity *entity = &bfqq->entity; |
1561 |
-+ struct bfq_service_tree *st = bfq_entity_service_tree(entity); |
1562 |
-+ struct request *next_rq = bfqq->next_rq; |
1563 |
-+ unsigned long new_budget; |
1564 |
-+ |
1565 |
-+ if (next_rq == NULL) |
1566 |
-+ return; |
1567 |
-+ |
1568 |
-+ if (bfqq == bfqd->active_queue) |
1569 |
-+ /* |
1570 |
-+ * In order not to break guarantees, budgets cannot be |
1571 |
-+ * changed after an entity has been selected. |
1572 |
-+ */ |
1573 |
-+ return; |
1574 |
-+ |
1575 |
-+ BUG_ON(entity->tree != &st->active); |
1576 |
-+ BUG_ON(entity == entity->sched_data->active_entity); |
1577 |
-+ |
1578 |
-+ new_budget = max_t(unsigned long, bfqq->max_budget, |
1579 |
-+ bfq_serv_to_charge(next_rq, bfqq)); |
1580 |
-+ entity->budget = new_budget; |
1581 |
-+ bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", new_budget); |
1582 |
-+ bfq_activate_bfqq(bfqd, bfqq); |
1583 |
-+} |
1584 |
-+ |
1585 |
-+static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd) |
1586 |
-+{ |
1587 |
-+ u64 dur; |
1588 |
-+ |
1589 |
-+ if (bfqd->bfq_raising_max_time > 0) |
1590 |
-+ return bfqd->bfq_raising_max_time; |
1591 |
-+ |
1592 |
-+ dur = bfqd->RT_prod; |
1593 |
-+ do_div(dur, bfqd->peak_rate); |
1594 |
-+ |
1595 |
-+ return dur; |
1596 |
-+} |
1597 |
-+ |
1598 |
-+static void bfq_add_rq_rb(struct request *rq) |
1599 |
-+{ |
1600 |
-+ struct bfq_queue *bfqq = RQ_BFQQ(rq); |
1601 |
-+ struct bfq_entity *entity = &bfqq->entity; |
1602 |
-+ struct bfq_data *bfqd = bfqq->bfqd; |
1603 |
-+ struct request *next_rq, *prev; |
1604 |
-+ unsigned long old_raising_coeff = bfqq->raising_coeff; |
1605 |
-+ int idle_for_long_time = bfqq->budget_timeout + |
1606 |
-+ bfqd->bfq_raising_min_idle_time < jiffies; |
1607 |
-+ |
1608 |
-+ bfq_log_bfqq(bfqd, bfqq, "add_rq_rb %d", rq_is_sync(rq)); |
1609 |
-+ bfqq->queued[rq_is_sync(rq)]++; |
1610 |
-+ bfqd->queued++; |
1611 |
-+ |
1612 |
-+ elv_rb_add(&bfqq->sort_list, rq); |
1613 |
-+ |
1614 |
-+ /* |
1615 |
-+ * Check if this request is a better next-serve candidate. |
1616 |
-+ */ |
1617 |
-+ prev = bfqq->next_rq; |
1618 |
-+ next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); |
1619 |
-+ BUG_ON(next_rq == NULL); |
1620 |
-+ bfqq->next_rq = next_rq; |
1621 |
-+ |
1622 |
-+ /* |
1623 |
-+ * Adjust priority tree position, if next_rq changes. |
1624 |
-+ */ |
1625 |
-+ if (prev != bfqq->next_rq) |
1626 |
-+ bfq_rq_pos_tree_add(bfqd, bfqq); |
1627 |
-+ |
1628 |
-+ if (!bfq_bfqq_busy(bfqq)) { |
1629 |
-+ int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 && |
1630 |
-+ bfqq->soft_rt_next_start < jiffies; |
1631 |
-+ entity->budget = max_t(unsigned long, bfqq->max_budget, |
1632 |
-+ bfq_serv_to_charge(next_rq, bfqq)); |
1633 |
-+ |
1634 |
-+ if (! bfqd->low_latency) |
1635 |
-+ goto add_bfqq_busy; |
1636 |
-+ |
1637 |
-+ /* |
1638 |
-+ * If the queue is not being boosted and has been idle |
1639 |
-+ * for enough time, start a weight-raising period |
1640 |
-+ */ |
1641 |
-+ if(old_raising_coeff == 1 && (idle_for_long_time || soft_rt)) { |
1642 |
-+ bfqq->raising_coeff = bfqd->bfq_raising_coeff; |
1643 |
-+ if (idle_for_long_time) |
1644 |
-+ bfqq->raising_cur_max_time = |
1645 |
-+ bfq_wrais_duration(bfqd); |
1646 |
-+ else |
1647 |
-+ bfqq->raising_cur_max_time = |
1648 |
-+ bfqd->bfq_raising_rt_max_time; |
1649 |
-+ bfq_log_bfqq(bfqd, bfqq, |
1650 |
-+ "wrais starting at %llu msec," |
1651 |
-+ "rais_max_time %u", |
1652 |
-+ bfqq->last_rais_start_finish, |
1653 |
-+ jiffies_to_msecs(bfqq-> |
1654 |
-+ raising_cur_max_time)); |
1655 |
-+ } else if (old_raising_coeff > 1) { |
1656 |
-+ if (idle_for_long_time) |
1657 |
-+ bfqq->raising_cur_max_time = |
1658 |
-+ bfq_wrais_duration(bfqd); |
1659 |
-+ else if (bfqq->raising_cur_max_time == |
1660 |
-+ bfqd->bfq_raising_rt_max_time && |
1661 |
-+ !soft_rt) { |
1662 |
-+ bfqq->raising_coeff = 1; |
1663 |
-+ bfq_log_bfqq(bfqd, bfqq, |
1664 |
-+ "wrais ending at %llu msec," |
1665 |
-+ "rais_max_time %u", |
1666 |
-+ bfqq->last_rais_start_finish, |
1667 |
-+ jiffies_to_msecs(bfqq-> |
1668 |
-+ raising_cur_max_time)); |
1669 |
-+ } |
1670 |
-+ } |
1671 |
-+ if (old_raising_coeff != bfqq->raising_coeff) |
1672 |
-+ entity->ioprio_changed = 1; |
1673 |
-+add_bfqq_busy: |
1674 |
-+ bfq_add_bfqq_busy(bfqd, bfqq); |
1675 |
-+ } else { |
1676 |
-+ if(bfqd->low_latency && old_raising_coeff == 1 && |
1677 |
-+ !rq_is_sync(rq) && |
1678 |
-+ bfqq->last_rais_start_finish + |
1679 |
-+ bfqd->bfq_raising_min_inter_arr_async < jiffies) { |
1680 |
-+ bfqq->raising_coeff = bfqd->bfq_raising_coeff; |
1681 |
-+ bfqq->raising_cur_max_time = bfq_wrais_duration(bfqd); |
1682 |
-+ |
1683 |
-+ entity->ioprio_changed = 1; |
1684 |
-+ bfq_log_bfqq(bfqd, bfqq, |
1685 |
-+ "non-idle wrais starting at %llu msec," |
1686 |
-+ "rais_max_time %u", |
1687 |
-+ bfqq->last_rais_start_finish, |
1688 |
-+ jiffies_to_msecs(bfqq-> |
1689 |
-+ raising_cur_max_time)); |
1690 |
-+ } |
1691 |
-+ bfq_updated_next_req(bfqd, bfqq); |
1692 |
-+ } |
1693 |
-+ |
1694 |
-+ if(bfqd->low_latency && |
1695 |
-+ (old_raising_coeff == 1 || bfqq->raising_coeff == 1 || |
1696 |
-+ idle_for_long_time)) |
1697 |
-+ bfqq->last_rais_start_finish = jiffies; |
1698 |
-+} |
1699 |
-+ |
1700 |
-+static void bfq_reposition_rq_rb(struct bfq_queue *bfqq, struct request *rq) |
1701 |
-+{ |
1702 |
-+ elv_rb_del(&bfqq->sort_list, rq); |
1703 |
-+ bfqq->queued[rq_is_sync(rq)]--; |
1704 |
-+ bfqq->bfqd->queued--; |
1705 |
-+ bfq_add_rq_rb(rq); |
1706 |
-+} |
1707 |
-+ |
1708 |
-+static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, |
1709 |
-+ struct bio *bio) |
1710 |
-+{ |
1711 |
-+ struct task_struct *tsk = current; |
1712 |
-+ struct bfq_io_cq *bic; |
1713 |
-+ struct bfq_queue *bfqq; |
1714 |
-+ |
1715 |
-+ bic = bfq_bic_lookup(bfqd, tsk->io_context); |
1716 |
-+ if (bic == NULL) |
1717 |
-+ return NULL; |
1718 |
-+ |
1719 |
-+ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); |
1720 |
-+ if (bfqq != NULL) { |
1721 |
-+ sector_t sector = bio->bi_sector + bio_sectors(bio); |
1722 |
-+ |
1723 |
-+ return elv_rb_find(&bfqq->sort_list, sector); |
1724 |
-+ } |
1725 |
-+ |
1726 |
-+ return NULL; |
1727 |
-+} |
1728 |
-+ |
1729 |
-+static void bfq_activate_request(struct request_queue *q, struct request *rq) |
1730 |
-+{ |
1731 |
-+ struct bfq_data *bfqd = q->elevator->elevator_data; |
1732 |
-+ |
1733 |
-+ bfqd->rq_in_driver++; |
1734 |
-+ bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); |
1735 |
-+ bfq_log(bfqd, "activate_request: new bfqd->last_position %llu", |
1736 |
-+ (long long unsigned)bfqd->last_position); |
1737 |
-+} |
1738 |
-+ |
1739 |
-+static void bfq_deactivate_request(struct request_queue *q, struct request *rq) |
1740 |
-+{ |
1741 |
-+ struct bfq_data *bfqd = q->elevator->elevator_data; |
1742 |
-+ |
1743 |
-+ WARN_ON(bfqd->rq_in_driver == 0); |
1744 |
-+ bfqd->rq_in_driver--; |
1745 |
-+} |
1746 |
-+ |
1747 |
-+static void bfq_remove_request(struct request *rq) |
1748 |
-+{ |
1749 |
-+ struct bfq_queue *bfqq = RQ_BFQQ(rq); |
1750 |
-+ struct bfq_data *bfqd = bfqq->bfqd; |
1751 |
-+ |
1752 |
-+ if (bfqq->next_rq == rq) { |
1753 |
-+ bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq); |
1754 |
-+ bfq_updated_next_req(bfqd, bfqq); |
1755 |
-+ } |
1756 |
-+ |
1757 |
-+ list_del_init(&rq->queuelist); |
1758 |
-+ bfq_del_rq_rb(rq); |
1759 |
-+ |
1760 |
-+ if (rq->cmd_flags & REQ_META) { |
1761 |
-+ WARN_ON(bfqq->meta_pending == 0); |
1762 |
-+ bfqq->meta_pending--; |
1763 |
-+ } |
1764 |
-+} |
1765 |
-+ |
1766 |
-+static int bfq_merge(struct request_queue *q, struct request **req, |
1767 |
-+ struct bio *bio) |
1768 |
-+{ |
1769 |
-+ struct bfq_data *bfqd = q->elevator->elevator_data; |
1770 |
-+ struct request *__rq; |
1771 |
-+ |
1772 |
-+ __rq = bfq_find_rq_fmerge(bfqd, bio); |
1773 |
-+ if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) { |
1774 |
-+ *req = __rq; |
1775 |
-+ return ELEVATOR_FRONT_MERGE; |
1776 |
-+ } |
1777 |
-+ |
1778 |
-+ return ELEVATOR_NO_MERGE; |
1779 |
-+} |
1780 |
-+ |
1781 |
-+static void bfq_merged_request(struct request_queue *q, struct request *req, |
1782 |
-+ int type) |
1783 |
-+{ |
1784 |
-+ if (type == ELEVATOR_FRONT_MERGE) { |
1785 |
-+ struct bfq_queue *bfqq = RQ_BFQQ(req); |
1786 |
-+ |
1787 |
-+ bfq_reposition_rq_rb(bfqq, req); |
1788 |
-+ } |
1789 |
-+} |
1790 |
-+ |
1791 |
-+static void bfq_merged_requests(struct request_queue *q, struct request *rq, |
1792 |
-+ struct request *next) |
1793 |
-+{ |
1794 |
-+ struct bfq_queue *bfqq = RQ_BFQQ(rq); |
1795 |
-+ |
1796 |
-+ /* |
1797 |
-+ * Reposition in fifo if next is older than rq. |
1798 |
-+ */ |
1799 |
-+ if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && |
1800 |
-+ time_before(rq_fifo_time(next), rq_fifo_time(rq))) { |
1801 |
-+ list_move(&rq->queuelist, &next->queuelist); |
1802 |
-+ rq_set_fifo_time(rq, rq_fifo_time(next)); |
1803 |
-+ } |
1804 |
-+ |
1805 |
-+ if (bfqq->next_rq == next) |
1806 |
-+ bfqq->next_rq = rq; |
1807 |
-+ |
1808 |
-+ bfq_remove_request(next); |
1809 |
-+} |
1810 |
-+ |
1811 |
-+/* Must be called with bfqq != NULL */ |
1812 |
-+static inline void bfq_bfqq_end_raising(struct bfq_queue *bfqq) |
1813 |
-+{ |
1814 |
-+ BUG_ON(bfqq == NULL); |
1815 |
-+ bfqq->raising_coeff = 1; |
1816 |
-+ bfqq->raising_cur_max_time = 0; |
1817 |
-+ /* Trigger a weight change on the next activation of the queue */ |
1818 |
-+ bfqq->entity.ioprio_changed = 1; |
1819 |
-+} |
1820 |
-+ |
1821 |
-+static void bfq_end_raising_async_queues(struct bfq_data *bfqd, |
1822 |
-+ struct bfq_group *bfqg) |
1823 |
-+{ |
1824 |
-+ int i, j; |
1825 |
-+ |
1826 |
-+ for (i = 0; i < 2; i++) |
1827 |
-+ for (j = 0; j < IOPRIO_BE_NR; j++) |
1828 |
-+ if (bfqg->async_bfqq[i][j] != NULL) |
1829 |
-+ bfq_bfqq_end_raising(bfqg->async_bfqq[i][j]); |
1830 |
-+ if (bfqg->async_idle_bfqq != NULL) |
1831 |
-+ bfq_bfqq_end_raising(bfqg->async_idle_bfqq); |
1832 |
-+} |
1833 |
-+ |
1834 |
-+static void bfq_end_raising(struct bfq_data *bfqd) |
1835 |
-+{ |
1836 |
-+ struct bfq_queue *bfqq; |
1837 |
-+ |
1838 |
-+ spin_lock_irq(bfqd->queue->queue_lock); |
1839 |
-+ |
1840 |
-+ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) |
1841 |
-+ bfq_bfqq_end_raising(bfqq); |
1842 |
-+ list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) |
1843 |
-+ bfq_bfqq_end_raising(bfqq); |
1844 |
-+ bfq_end_raising_async(bfqd); |
1845 |
-+ |
1846 |
-+ spin_unlock_irq(bfqd->queue->queue_lock); |
1847 |
-+} |
1848 |
-+ |
1849 |
-+static int bfq_allow_merge(struct request_queue *q, struct request *rq, |
1850 |
-+ struct bio *bio) |
1851 |
-+{ |
1852 |
-+ struct bfq_data *bfqd = q->elevator->elevator_data; |
1853 |
-+ struct bfq_io_cq *bic; |
1854 |
-+ struct bfq_queue *bfqq; |
1855 |
-+ |
1856 |
-+ /* |
1857 |
-+ * Disallow merge of a sync bio into an async request. |
1858 |
-+ */ |
1859 |
-+ if (bfq_bio_sync(bio) && !rq_is_sync(rq)) |
1860 |
-+ return 0; |
1861 |
-+ |
1862 |
-+ /* |
1863 |
-+ * Lookup the bfqq that this bio will be queued with. Allow |
1864 |
-+ * merge only if rq is queued there. |
1865 |
-+ * Queue lock is held here. |
1866 |
-+ */ |
1867 |
-+ bic = bfq_bic_lookup(bfqd, current->io_context); |
1868 |
-+ if (bic == NULL) |
1869 |
-+ return 0; |
1870 |
-+ |
1871 |
-+ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); |
1872 |
-+ return bfqq == RQ_BFQQ(rq); |
1873 |
-+} |
1874 |
-+ |
1875 |
-+static void __bfq_set_active_queue(struct bfq_data *bfqd, |
1876 |
-+ struct bfq_queue *bfqq) |
1877 |
-+{ |
1878 |
-+ if (bfqq != NULL) { |
1879 |
-+ bfq_mark_bfqq_must_alloc(bfqq); |
1880 |
-+ bfq_mark_bfqq_budget_new(bfqq); |
1881 |
-+ bfq_clear_bfqq_fifo_expire(bfqq); |
1882 |
-+ |
1883 |
-+ bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; |
1884 |
-+ |
1885 |
-+ bfq_log_bfqq(bfqd, bfqq, "set_active_queue, cur-budget = %lu", |
1886 |
-+ bfqq->entity.budget); |
1887 |
-+ } |
1888 |
-+ |
1889 |
-+ bfqd->active_queue = bfqq; |
1890 |
-+} |
1891 |
-+ |
1892 |
-+/* |
1893 |
-+ * Get and set a new active queue for service. |
1894 |
-+ */ |
1895 |
-+static struct bfq_queue *bfq_set_active_queue(struct bfq_data *bfqd, |
1896 |
-+ struct bfq_queue *bfqq) |
1897 |
-+{ |
1898 |
-+ if (!bfqq) |
1899 |
-+ bfqq = bfq_get_next_queue(bfqd); |
1900 |
-+ else |
1901 |
-+ bfq_get_next_queue_forced(bfqd, bfqq); |
1902 |
-+ |
1903 |
-+ __bfq_set_active_queue(bfqd, bfqq); |
1904 |
-+ return bfqq; |
1905 |
-+} |
1906 |
-+ |
1907 |
-+static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd, |
1908 |
-+ struct request *rq) |
1909 |
-+{ |
1910 |
-+ if (blk_rq_pos(rq) >= bfqd->last_position) |
1911 |
-+ return blk_rq_pos(rq) - bfqd->last_position; |
1912 |
-+ else |
1913 |
-+ return bfqd->last_position - blk_rq_pos(rq); |
1914 |
-+} |
1915 |
-+ |
1916 |
-+/* |
1917 |
-+ * Return true if bfqq has no request pending and rq is close enough to |
1918 |
-+ * bfqd->last_position, or if rq is closer to bfqd->last_position than |
1919 |
-+ * bfqq->next_rq |
1920 |
-+ */ |
1921 |
-+static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq) |
1922 |
-+{ |
1923 |
-+ return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR; |
1924 |
-+} |
1925 |
-+ |
1926 |
-+static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) |
1927 |
-+{ |
1928 |
-+ struct rb_root *root = &bfqd->rq_pos_tree; |
1929 |
-+ struct rb_node *parent, *node; |
1930 |
-+ struct bfq_queue *__bfqq; |
1931 |
-+ sector_t sector = bfqd->last_position; |
1932 |
-+ |
1933 |
-+ if (RB_EMPTY_ROOT(root)) |
1934 |
-+ return NULL; |
1935 |
-+ |
1936 |
-+ /* |
1937 |
-+ * First, if we find a request starting at the end of the last |
1938 |
-+ * request, choose it. |
1939 |
-+ */ |
1940 |
-+ __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL); |
1941 |
-+ if (__bfqq != NULL) |
1942 |
-+ return __bfqq; |
1943 |
-+ |
1944 |
-+ /* |
1945 |
-+ * If the exact sector wasn't found, the parent of the NULL leaf |
1946 |
-+ * will contain the closest sector (rq_pos_tree sorted by next_request |
1947 |
-+ * position). |
1948 |
-+ */ |
1949 |
-+ __bfqq = rb_entry(parent, struct bfq_queue, pos_node); |
1950 |
-+ if (bfq_rq_close(bfqd, __bfqq->next_rq)) |
1951 |
-+ return __bfqq; |
1952 |
-+ |
1953 |
-+ if (blk_rq_pos(__bfqq->next_rq) < sector) |
1954 |
-+ node = rb_next(&__bfqq->pos_node); |
1955 |
-+ else |
1956 |
-+ node = rb_prev(&__bfqq->pos_node); |
1957 |
-+ if (node == NULL) |
1958 |
-+ return NULL; |
1959 |
-+ |
1960 |
-+ __bfqq = rb_entry(node, struct bfq_queue, pos_node); |
1961 |
-+ if (bfq_rq_close(bfqd, __bfqq->next_rq)) |
1962 |
-+ return __bfqq; |
1963 |
-+ |
1964 |
-+ return NULL; |
1965 |
-+} |
1966 |
-+ |
1967 |
-+/* |
1968 |
-+ * bfqd - obvious |
1969 |
-+ * cur_bfqq - passed in so that we don't decide that the current queue |
1970 |
-+ * is closely cooperating with itself. |
1971 |
-+ * |
1972 |
-+ * We are assuming that cur_bfqq has dispatched at least one request, |
1973 |
-+ * and that bfqd->last_position reflects a position on the disk associated |
1974 |
-+ * with the I/O issued by cur_bfqq. |
1975 |
-+ */ |
1976 |
-+static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, |
1977 |
-+ struct bfq_queue *cur_bfqq) |
1978 |
-+{ |
1979 |
-+ struct bfq_queue *bfqq; |
1980 |
-+ |
1981 |
-+ if (bfq_class_idle(cur_bfqq)) |
1982 |
-+ return NULL; |
1983 |
-+ if (!bfq_bfqq_sync(cur_bfqq)) |
1984 |
-+ return NULL; |
1985 |
-+ if (BFQQ_SEEKY(cur_bfqq)) |
1986 |
-+ return NULL; |
1987 |
-+ |
1988 |
-+ /* If device has only one backlogged bfq_queue, don't search. */ |
1989 |
-+ if (bfqd->busy_queues == 1) |
1990 |
-+ return NULL; |
1991 |
-+ |
1992 |
-+ /* |
1993 |
-+ * We should notice if some of the queues are cooperating, e.g. |
1994 |
-+ * working closely on the same area of the disk. In that case, |
1995 |
-+ * we can group them together and don't waste time idling. |
1996 |
-+ */ |
1997 |
-+ bfqq = bfqq_close(bfqd); |
1998 |
-+ if (bfqq == NULL || bfqq == cur_bfqq) |
1999 |
-+ return NULL; |
2000 |
-+ |
2001 |
-+ /* |
2002 |
-+ * Do not merge queues from different bfq_groups. |
2003 |
-+ */ |
2004 |
-+ if (bfqq->entity.parent != cur_bfqq->entity.parent) |
2005 |
-+ return NULL; |
2006 |
-+ |
2007 |
-+ /* |
2008 |
-+ * It only makes sense to merge sync queues. |
2009 |
-+ */ |
2010 |
-+ if (!bfq_bfqq_sync(bfqq)) |
2011 |
-+ return NULL; |
2012 |
-+ if (BFQQ_SEEKY(bfqq)) |
2013 |
-+ return NULL; |
2014 |
-+ |
2015 |
-+ /* |
2016 |
-+ * Do not merge queues of different priority classes. |
2017 |
-+ */ |
2018 |
-+ if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq)) |
2019 |
-+ return NULL; |
2020 |
-+ |
2021 |
-+ return bfqq; |
2022 |
-+} |
2023 |
-+ |
2024 |
-+/* |
2025 |
-+ * If enough samples have been computed, return the current max budget |
2026 |
-+ * stored in bfqd, which is dynamically updated according to the |
2027 |
-+ * estimated disk peak rate; otherwise return the default max budget |
2028 |
-+ */ |
2029 |
-+static inline unsigned long bfq_max_budget(struct bfq_data *bfqd) |
2030 |
-+{ |
2031 |
-+ if (bfqd->budgets_assigned < 194) |
2032 |
-+ return bfq_default_max_budget; |
2033 |
-+ else |
2034 |
-+ return bfqd->bfq_max_budget; |
2035 |
-+} |
2036 |
-+ |
2037 |
-+/* |
2038 |
-+ * Return min budget, which is a fraction of the current or default |
2039 |
-+ * max budget (trying with 1/32) |
2040 |
-+ */ |
2041 |
-+static inline unsigned long bfq_min_budget(struct bfq_data *bfqd) |
2042 |
-+{ |
2043 |
-+ if (bfqd->budgets_assigned < 194) |
2044 |
-+ return bfq_default_max_budget / 32; |
2045 |
-+ else |
2046 |
-+ return bfqd->bfq_max_budget / 32; |
2047 |
-+} |
2048 |
-+ |
2049 |
-+/* |
2050 |
-+ * Decides whether idling should be done for given device and |
2051 |
-+ * given active queue. |
2052 |
-+ */ |
2053 |
-+static inline bool bfq_queue_nonrot_noidle(struct bfq_data *bfqd, |
2054 |
-+ struct bfq_queue *active_bfqq) |
2055 |
-+{ |
2056 |
-+ if (active_bfqq == NULL) |
2057 |
-+ return false; |
2058 |
-+ /* |
2059 |
-+ * If device is SSD it has no seek penalty, disable idling; but |
2060 |
-+ * do so only if: |
2061 |
-+ * - device does not support queuing, otherwise we still have |
2062 |
-+ * a problem with sync vs async workloads; |
2063 |
-+ * - the queue is not weight-raised, to preserve guarantees. |
2064 |
-+ */ |
2065 |
-+ return (blk_queue_nonrot(bfqd->queue) && bfqd->hw_tag && |
2066 |
-+ active_bfqq->raising_coeff == 1); |
2067 |
-+} |
2068 |
-+ |
2069 |
-+static void bfq_arm_slice_timer(struct bfq_data *bfqd) |
2070 |
-+{ |
2071 |
-+ struct bfq_queue *bfqq = bfqd->active_queue; |
2072 |
-+ struct bfq_io_cq *bic; |
2073 |
-+ unsigned long sl; |
2074 |
-+ |
2075 |
-+ WARN_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); |
2076 |
-+ |
2077 |
-+ /* Tasks have exited, don't wait. */ |
2078 |
-+ bic = bfqd->active_bic; |
2079 |
-+ if (bic == NULL || atomic_read(&bic->icq.ioc->active_ref) == 0) |
2080 |
-+ return; |
2081 |
-+ |
2082 |
-+ bfq_mark_bfqq_wait_request(bfqq); |
2083 |
-+ |
2084 |
-+ /* |
2085 |
-+ * We don't want to idle for seeks, but we do want to allow |
2086 |
-+ * fair distribution of slice time for a process doing back-to-back |
2087 |
-+ * seeks. So allow a little bit of time for him to submit a new rq. |
2088 |
-+ * |
2089 |
-+ * To prevent processes with (partly) seeky workloads from |
2090 |
-+ * being too ill-treated, grant them a small fraction of the |
2091 |
-+ * assigned budget before reducing the waiting time to |
2092 |
-+ * BFQ_MIN_TT. This happened to help reduce latency. |
2093 |
-+ */ |
2094 |
-+ sl = bfqd->bfq_slice_idle; |
2095 |
-+ if (bfq_sample_valid(bfqq->seek_samples) && BFQQ_SEEKY(bfqq) && |
2096 |
-+ bfqq->entity.service > bfq_max_budget(bfqd) / 8 && |
2097 |
-+ bfqq->raising_coeff == 1) |
2098 |
-+ sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT)); |
2099 |
-+ else if (bfqq->raising_coeff > 1) |
2100 |
-+ sl = sl * 3; |
2101 |
-+ bfqd->last_idling_start = ktime_get(); |
2102 |
-+ mod_timer(&bfqd->idle_slice_timer, jiffies + sl); |
2103 |
-+ bfq_log(bfqd, "arm idle: %u/%u ms", |
2104 |
-+ jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle)); |
2105 |
-+} |
2106 |
-+ |
2107 |
-+/* |
2108 |
-+ * Set the maximum time for the active queue to consume its |
2109 |
-+ * budget. This prevents seeky processes from lowering the disk |
2110 |
-+ * throughput (always guaranteed with a time slice scheme as in CFQ). |
2111 |
-+ */ |
2112 |
-+static void bfq_set_budget_timeout(struct bfq_data *bfqd) |
2113 |
-+{ |
2114 |
-+ struct bfq_queue *bfqq = bfqd->active_queue; |
2115 |
-+ unsigned int timeout_coeff; |
2116 |
-+ if (bfqq->raising_cur_max_time == bfqd->bfq_raising_rt_max_time) |
2117 |
-+ timeout_coeff = 1; |
2118 |
-+ else |
2119 |
-+ timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight; |
2120 |
-+ |
2121 |
-+ bfqd->last_budget_start = ktime_get(); |
2122 |
-+ |
2123 |
-+ bfq_clear_bfqq_budget_new(bfqq); |
2124 |
-+ bfqq->budget_timeout = jiffies + |
2125 |
-+ bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff; |
2126 |
-+ |
2127 |
-+ bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", |
2128 |
-+ jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * |
2129 |
-+ timeout_coeff)); |
2130 |
-+} |
2131 |
-+ |
2132 |
-+/* |
2133 |
-+ * Move request from internal lists to the request queue dispatch list. |
2134 |
-+ */ |
2135 |
-+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq) |
2136 |
-+{ |
2137 |
-+ struct bfq_data *bfqd = q->elevator->elevator_data; |
2138 |
-+ struct bfq_queue *bfqq = RQ_BFQQ(rq); |
2139 |
-+ |
2140 |
-+ bfq_remove_request(rq); |
2141 |
-+ bfqq->dispatched++; |
2142 |
-+ elv_dispatch_sort(q, rq); |
2143 |
-+ |
2144 |
-+ if (bfq_bfqq_sync(bfqq)) |
2145 |
-+ bfqd->sync_flight++; |
2146 |
-+} |
2147 |
-+ |
2148 |
-+/* |
2149 |
-+ * Return expired entry, or NULL to just start from scratch in rbtree. |
2150 |
-+ */ |
2151 |
-+static struct request *bfq_check_fifo(struct bfq_queue *bfqq) |
2152 |
-+{ |
2153 |
-+ struct request *rq = NULL; |
2154 |
-+ |
2155 |
-+ if (bfq_bfqq_fifo_expire(bfqq)) |
2156 |
-+ return NULL; |
2157 |
-+ |
2158 |
-+ bfq_mark_bfqq_fifo_expire(bfqq); |
2159 |
-+ |
2160 |
-+ if (list_empty(&bfqq->fifo)) |
2161 |
-+ return NULL; |
2162 |
-+ |
2163 |
-+ rq = rq_entry_fifo(bfqq->fifo.next); |
2164 |
-+ |
2165 |
-+ if (time_before(jiffies, rq_fifo_time(rq))) |
2166 |
-+ return NULL; |
2167 |
-+ |
2168 |
-+ return rq; |
2169 |
-+} |
2170 |
-+ |
2171 |
-+/* |
2172 |
-+ * Must be called with the queue_lock held. |
2173 |
-+ */ |
2174 |
-+static int bfqq_process_refs(struct bfq_queue *bfqq) |
2175 |
-+{ |
2176 |
-+ int process_refs, io_refs; |
2177 |
-+ |
2178 |
-+ io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; |
2179 |
-+ process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; |
2180 |
-+ BUG_ON(process_refs < 0); |
2181 |
-+ return process_refs; |
2182 |
-+} |
2183 |
-+ |
2184 |
-+static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) |
2185 |
-+{ |
2186 |
-+ int process_refs, new_process_refs; |
2187 |
-+ struct bfq_queue *__bfqq; |
2188 |
-+ |
2189 |
-+ /* |
2190 |
-+ * If there are no process references on the new_bfqq, then it is |
2191 |
-+ * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain |
2192 |
-+ * may have dropped their last reference (not just their last process |
2193 |
-+ * reference). |
2194 |
-+ */ |
2195 |
-+ if (!bfqq_process_refs(new_bfqq)) |
2196 |
-+ return; |
2197 |
-+ |
2198 |
-+ /* Avoid a circular list and skip interim queue merges. */ |
2199 |
-+ while ((__bfqq = new_bfqq->new_bfqq)) { |
2200 |
-+ if (__bfqq == bfqq) |
2201 |
-+ return; |
2202 |
-+ new_bfqq = __bfqq; |
2203 |
-+ } |
2204 |
-+ |
2205 |
-+ process_refs = bfqq_process_refs(bfqq); |
2206 |
-+ new_process_refs = bfqq_process_refs(new_bfqq); |
2207 |
-+ /* |
2208 |
-+ * If the process for the bfqq has gone away, there is no |
2209 |
-+ * sense in merging the queues. |
2210 |
-+ */ |
2211 |
-+ if (process_refs == 0 || new_process_refs == 0) |
2212 |
-+ return; |
2213 |
-+ |
2214 |
-+ /* |
2215 |
-+ * Merge in the direction of the lesser amount of work. |
2216 |
-+ */ |
2217 |
-+ if (new_process_refs >= process_refs) { |
2218 |
-+ bfqq->new_bfqq = new_bfqq; |
2219 |
-+ atomic_add(process_refs, &new_bfqq->ref); |
2220 |
-+ } else { |
2221 |
-+ new_bfqq->new_bfqq = bfqq; |
2222 |
-+ atomic_add(new_process_refs, &bfqq->ref); |
2223 |
-+ } |
2224 |
-+ bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", |
2225 |
-+ new_bfqq->pid); |
2226 |
-+} |
2227 |
-+ |
2228 |
-+static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) |
2229 |
-+{ |
2230 |
-+ struct bfq_entity *entity = &bfqq->entity; |
2231 |
-+ return entity->budget - entity->service; |
2232 |
-+} |
2233 |
-+ |
2234 |
-+static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) |
2235 |
-+{ |
2236 |
-+ BUG_ON(bfqq != bfqd->active_queue); |
2237 |
-+ |
2238 |
-+ __bfq_bfqd_reset_active(bfqd); |
2239 |
-+ |
2240 |
-+ /* |
2241 |
-+ * If this bfqq is shared between multiple processes, check |
2242 |
-+ * to make sure that those processes are still issuing I/Os |
2243 |
-+ * within the mean seek distance. If not, it may be time to |
2244 |
-+ * break the queues apart again. |
2245 |
-+ */ |
2246 |
-+ if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq)) |
2247 |
-+ bfq_mark_bfqq_split_coop(bfqq); |
2248 |
-+ |
2249 |
-+ if (RB_EMPTY_ROOT(&bfqq->sort_list)) { |
2250 |
-+ /* |
2251 |
-+ * overloading budget_timeout field to store when |
2252 |
-+ * the queue remains with no backlog, used by |
2253 |
-+ * the weight-raising mechanism |
2254 |
-+ */ |
2255 |
-+ bfqq->budget_timeout = jiffies ; |
2256 |
-+ bfq_del_bfqq_busy(bfqd, bfqq, 1); |
2257 |
-+ } else { |
2258 |
-+ bfq_activate_bfqq(bfqd, bfqq); |
2259 |
-+ /* |
2260 |
-+ * Resort priority tree of potential close cooperators. |
2261 |
-+ */ |
2262 |
-+ bfq_rq_pos_tree_add(bfqd, bfqq); |
2263 |
-+ } |
2264 |
-+} |
2265 |
-+ |
2266 |
-+/** |
2267 |
-+ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior. |
2268 |
-+ * @bfqd: device data. |
2269 |
-+ * @bfqq: queue to update. |
2270 |
-+ * @reason: reason for expiration. |
2271 |
-+ * |
2272 |
-+ * Handle the feedback on @bfqq budget. See the body for detailed |
2273 |
-+ * comments. |
2274 |
-+ */ |
2275 |
-+static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, |
2276 |
-+ struct bfq_queue *bfqq, |
2277 |
-+ enum bfqq_expiration reason) |
2278 |
-+{ |
2279 |
-+ struct request *next_rq; |
2280 |
-+ unsigned long budget, min_budget; |
2281 |
-+ |
2282 |
-+ budget = bfqq->max_budget; |
2283 |
-+ min_budget = bfq_min_budget(bfqd); |
2284 |
-+ |
2285 |
-+ BUG_ON(bfqq != bfqd->active_queue); |
2286 |
-+ |
2287 |
-+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu", |
2288 |
-+ bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); |
2289 |
-+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu", |
2290 |
-+ budget, bfq_min_budget(bfqd)); |
2291 |
-+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", |
2292 |
-+ bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->active_queue)); |
2293 |
-+ |
2294 |
-+ if (bfq_bfqq_sync(bfqq)) { |
2295 |
-+ switch (reason) { |
2296 |
-+ /* |
2297 |
-+ * Caveat: in all the following cases we trade latency |
2298 |
-+ * for throughput. |
2299 |
-+ */ |
2300 |
-+ case BFQ_BFQQ_TOO_IDLE: |
2301 |
-+ /* |
2302 |
-+ * This is the only case where we may reduce |
2303 |
-+ * the budget: if there is no requets of the |
2304 |
-+ * process still waiting for completion, then |
2305 |
-+ * we assume (tentatively) that the timer has |
2306 |
-+ * expired because the batch of requests of |
2307 |
-+ * the process could have been served with a |
2308 |
-+ * smaller budget. Hence, betting that |
2309 |
-+ * process will behave in the same way when it |
2310 |
-+ * becomes backlogged again, we reduce its |
2311 |
-+ * next budget. As long as we guess right, |
2312 |
-+ * this budget cut reduces the latency |
2313 |
-+ * experienced by the process. |
2314 |
-+ * |
2315 |
-+ * However, if there are still outstanding |
2316 |
-+ * requests, then the process may have not yet |
2317 |
-+ * issued its next request just because it is |
2318 |
-+ * still waiting for the completion of some of |
2319 |
-+ * the still oustanding ones. So in this |
2320 |
-+ * subcase we do not reduce its budget, on the |
2321 |
-+ * contrary we increase it to possibly boost |
2322 |
-+ * the throughput, as discussed in the |
2323 |
-+ * comments to the BUDGET_TIMEOUT case. |
2324 |
-+ */ |
2325 |
-+ if (bfqq->dispatched > 0) /* still oustanding reqs */ |
2326 |
-+ budget = min(budget * 2, bfqd->bfq_max_budget); |
2327 |
-+ else { |
2328 |
-+ if (budget > 5 * min_budget) |
2329 |
-+ budget -= 4 * min_budget; |
2330 |
-+ else |
2331 |
-+ budget = min_budget; |
2332 |
-+ } |
2333 |
-+ break; |
2334 |
-+ case BFQ_BFQQ_BUDGET_TIMEOUT: |
2335 |
-+ /* |
2336 |
-+ * We double the budget here because: 1) it |
2337 |
-+ * gives the chance to boost the throughput if |
2338 |
-+ * this is not a seeky process (which may have |
2339 |
-+ * bumped into this timeout because of, e.g., |
2340 |
-+ * ZBR), 2) together with charge_full_budget |
2341 |
-+ * it helps give seeky processes higher |
2342 |
-+ * timestamps, and hence be served less |
2343 |
-+ * frequently. |
2344 |
-+ */ |
2345 |
-+ budget = min(budget * 2, bfqd->bfq_max_budget); |
2346 |
-+ break; |
2347 |
-+ case BFQ_BFQQ_BUDGET_EXHAUSTED: |
2348 |
-+ /* |
2349 |
-+ * The process still has backlog, and did not |
2350 |
-+ * let either the budget timeout or the disk |
2351 |
-+ * idling timeout expire. Hence it is not |
2352 |
-+ * seeky, has a short thinktime and may be |
2353 |
-+ * happy with a higher budget too. So |
2354 |
-+ * definitely increase the budget of this good |
2355 |
-+ * candidate to boost the disk throughput. |
2356 |
-+ */ |
2357 |
-+ budget = min(budget * 4, bfqd->bfq_max_budget); |
2358 |
-+ break; |
2359 |
-+ case BFQ_BFQQ_NO_MORE_REQUESTS: |
2360 |
-+ /* |
2361 |
-+ * Leave the budget unchanged. |
2362 |
-+ */ |
2363 |
-+ default: |
2364 |
-+ return; |
2365 |
-+ } |
2366 |
-+ } else /* async queue */ |
2367 |
-+ /* async queues get always the maximum possible budget |
2368 |
-+ * (their ability to dispatch is limited by |
2369 |
-+ * @bfqd->bfq_max_budget_async_rq). |
2370 |
-+ */ |
2371 |
-+ budget = bfqd->bfq_max_budget; |
2372 |
-+ |
2373 |
-+ bfqq->max_budget = budget; |
2374 |
-+ |
2375 |
-+ if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 && |
2376 |
-+ bfqq->max_budget > bfqd->bfq_max_budget) |
2377 |
-+ bfqq->max_budget = bfqd->bfq_max_budget; |
2378 |
-+ |
2379 |
-+ /* |
2380 |
-+ * Make sure that we have enough budget for the next request. |
2381 |
-+ * Since the finish time of the bfqq must be kept in sync with |
2382 |
-+ * the budget, be sure to call __bfq_bfqq_expire() after the |
2383 |
-+ * update. |
2384 |
-+ */ |
2385 |
-+ next_rq = bfqq->next_rq; |
2386 |
-+ if (next_rq != NULL) |
2387 |
-+ bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget, |
2388 |
-+ bfq_serv_to_charge(next_rq, bfqq)); |
2389 |
-+ else |
2390 |
-+ bfqq->entity.budget = bfqq->max_budget; |
2391 |
-+ |
2392 |
-+ bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu", |
2393 |
-+ next_rq != NULL ? blk_rq_sectors(next_rq) : 0, |
2394 |
-+ bfqq->entity.budget); |
2395 |
-+} |
2396 |
-+ |
2397 |
-+static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout) |
2398 |
-+{ |
2399 |
-+ unsigned long max_budget; |
2400 |
-+ |
2401 |
-+ /* |
2402 |
-+ * The max_budget calculated when autotuning is equal to the |
2403 |
-+ * amount of sectors transfered in timeout_sync at the |
2404 |
-+ * estimated peak rate. |
2405 |
-+ */ |
2406 |
-+ max_budget = (unsigned long)(peak_rate * 1000 * |
2407 |
-+ timeout >> BFQ_RATE_SHIFT); |
2408 |
-+ |
2409 |
-+ return max_budget; |
2410 |
-+} |
2411 |
-+ |
2412 |
-+/* |
2413 |
-+ * In addition to updating the peak rate, checks whether the process |
2414 |
-+ * is "slow", and returns 1 if so. This slow flag is used, in addition |
2415 |
-+ * to the budget timeout, to reduce the amount of service provided to |
2416 |
-+ * seeky processes, and hence reduce their chances to lower the |
2417 |
-+ * throughput. See the code for more details. |
2418 |
-+ */ |
2419 |
-+static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
2420 |
-+ int compensate, enum bfqq_expiration reason) |
2421 |
-+{ |
2422 |
-+ u64 bw, usecs, expected, timeout; |
2423 |
-+ ktime_t delta; |
2424 |
-+ int update = 0; |
2425 |
-+ |
2426 |
-+ if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq)) |
2427 |
-+ return 0; |
2428 |
-+ |
2429 |
-+ if (compensate) |
2430 |
-+ delta = bfqd->last_idling_start; |
2431 |
-+ else |
2432 |
-+ delta = ktime_get(); |
2433 |
-+ delta = ktime_sub(delta, bfqd->last_budget_start); |
2434 |
-+ usecs = ktime_to_us(delta); |
2435 |
-+ |
2436 |
-+ /* Don't trust short/unrealistic values. */ |
2437 |
-+ if (usecs < 100 || usecs >= LONG_MAX) |
2438 |
-+ return 0; |
2439 |
-+ |
2440 |
-+ /* |
2441 |
-+ * Calculate the bandwidth for the last slice. We use a 64 bit |
2442 |
-+ * value to store the peak rate, in sectors per usec in fixed |
2443 |
-+ * point math. We do so to have enough precision in the estimate |
2444 |
-+ * and to avoid overflows. |
2445 |
-+ */ |
2446 |
-+ bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT; |
2447 |
-+ do_div(bw, (unsigned long)usecs); |
2448 |
-+ |
2449 |
-+ timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); |
2450 |
-+ |
2451 |
-+ /* |
2452 |
-+ * Use only long (> 20ms) intervals to filter out spikes for |
2453 |
-+ * the peak rate estimation. |
2454 |
-+ */ |
2455 |
-+ if (usecs > 20000) { |
2456 |
-+ if (bw > bfqd->peak_rate || |
2457 |
-+ (!BFQQ_SEEKY(bfqq) && |
2458 |
-+ reason == BFQ_BFQQ_BUDGET_TIMEOUT)) { |
2459 |
-+ bfq_log(bfqd, "measured bw =%llu", bw); |
2460 |
-+ /* |
2461 |
-+ * To smooth oscillations use a low-pass filter with |
2462 |
-+ * alpha=7/8, i.e., |
2463 |
-+ * new_rate = (7/8) * old_rate + (1/8) * bw |
2464 |
-+ */ |
2465 |
-+ do_div(bw, 8); |
2466 |
-+ if (bw == 0) |
2467 |
-+ return 0; |
2468 |
-+ bfqd->peak_rate *= 7; |
2469 |
-+ do_div(bfqd->peak_rate, 8); |
2470 |
-+ bfqd->peak_rate += bw; |
2471 |
-+ update = 1; |
2472 |
-+ bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate); |
2473 |
-+ } |
2474 |
-+ |
2475 |
-+ update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1; |
2476 |
-+ |
2477 |
-+ if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES) |
2478 |
-+ bfqd->peak_rate_samples++; |
2479 |
-+ |
2480 |
-+ if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES && |
2481 |
-+ update && bfqd->bfq_user_max_budget == 0) { |
2482 |
-+ bfqd->bfq_max_budget = |
2483 |
-+ bfq_calc_max_budget(bfqd->peak_rate, timeout); |
2484 |
-+ bfq_log(bfqd, "new max_budget=%lu", |
2485 |
-+ bfqd->bfq_max_budget); |
2486 |
-+ } |
2487 |
-+ } |
2488 |
-+ |
2489 |
-+ /* |
2490 |
-+ * If the process has been served for a too short time |
2491 |
-+ * interval to let its possible sequential accesses prevail on |
2492 |
-+ * the initial seek time needed to move the disk head on the |
2493 |
-+ * first sector it requested, then give the process a chance |
2494 |
-+ * and for the moment return false. |
2495 |
-+ */ |
2496 |
-+ if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8) |
2497 |
-+ return 0; |
2498 |
-+ |
2499 |
-+ /* |
2500 |
-+ * A process is considered ``slow'' (i.e., seeky, so that we |
2501 |
-+ * cannot treat it fairly in the service domain, as it would |
2502 |
-+ * slow down too much the other processes) if, when a slice |
2503 |
-+ * ends for whatever reason, it has received service at a |
2504 |
-+ * rate that would not be high enough to complete the budget |
2505 |
-+ * before the budget timeout expiration. |
2506 |
-+ */ |
2507 |
-+ expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT; |
2508 |
-+ |
2509 |
-+ /* |
2510 |
-+ * Caveat: processes doing IO in the slower disk zones will |
2511 |
-+ * tend to be slow(er) even if not seeky. And the estimated |
2512 |
-+ * peak rate will actually be an average over the disk |
2513 |
-+ * surface. Hence, to not be too harsh with unlucky processes, |
2514 |
-+ * we keep a budget/3 margin of safety before declaring a |
2515 |
-+ * process slow. |
2516 |
-+ */ |
2517 |
-+ return expected > (4 * bfqq->entity.budget) / 3; |
2518 |
-+} |
2519 |
-+ |
2520 |
-+/** |
2521 |
-+ * bfq_bfqq_expire - expire a queue. |
2522 |
-+ * @bfqd: device owning the queue. |
2523 |
-+ * @bfqq: the queue to expire. |
2524 |
-+ * @compensate: if true, compensate for the time spent idling. |
2525 |
-+ * @reason: the reason causing the expiration. |
2526 |
-+ * |
2527 |
-+ * |
2528 |
-+ * If the process associated to the queue is slow (i.e., seeky), or in |
2529 |
-+ * case of budget timeout, or, finally, if it is async, we |
2530 |
-+ * artificially charge it an entire budget (independently of the |
2531 |
-+ * actual service it received). As a consequence, the queue will get |
2532 |
-+ * higher timestamps than the correct ones upon reactivation, and |
2533 |
-+ * hence it will be rescheduled as if it had received more service |
2534 |
-+ * than what it actually received. In the end, this class of processes |
2535 |
-+ * will receive less service in proportion to how slowly they consume |
2536 |
-+ * their budgets (and hence how seriously they tend to lower the |
2537 |
-+ * throughput). |
2538 |
-+ * |
2539 |
-+ * In contrast, when a queue expires because it has been idling for |
2540 |
-+ * too much or because it exhausted its budget, we do not touch the |
2541 |
-+ * amount of service it has received. Hence when the queue will be |
2542 |
-+ * reactivated and its timestamps updated, the latter will be in sync |
2543 |
-+ * with the actual service received by the queue until expiration. |
2544 |
-+ * |
2545 |
-+ * Charging a full budget to the first type of queues and the exact |
2546 |
-+ * service to the others has the effect of using the WF2Q+ policy to |
2547 |
-+ * schedule the former on a timeslice basis, without violating the |
2548 |
-+ * service domain guarantees of the latter. |
2549 |
-+ */ |
2550 |
-+static void bfq_bfqq_expire(struct bfq_data *bfqd, |
2551 |
-+ struct bfq_queue *bfqq, |
2552 |
-+ int compensate, |
2553 |
-+ enum bfqq_expiration reason) |
2554 |
-+{ |
2555 |
-+ int slow; |
2556 |
-+ BUG_ON(bfqq != bfqd->active_queue); |
2557 |
-+ |
2558 |
-+ /* Update disk peak rate for autotuning and check whether the |
2559 |
-+ * process is slow (see bfq_update_peak_rate). |
2560 |
-+ */ |
2561 |
-+ slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason); |
2562 |
-+ |
2563 |
-+ /* |
2564 |
-+ * As above explained, 'punish' slow (i.e., seeky), timed-out |
2565 |
-+ * and async queues, to favor sequential sync workloads. |
2566 |
-+ * |
2567 |
-+ * Processes doing IO in the slower disk zones will tend to be |
2568 |
-+ * slow(er) even if not seeky. Hence, since the estimated peak |
2569 |
-+ * rate is actually an average over the disk surface, these |
2570 |
-+ * processes may timeout just for bad luck. To avoid punishing |
2571 |
-+ * them we do not charge a full budget to a process that |
2572 |
-+ * succeeded in consuming at least 2/3 of its budget. |
2573 |
-+ */ |
2574 |
-+ if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT && |
2575 |
-+ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3)) |
2576 |
-+ bfq_bfqq_charge_full_budget(bfqq); |
2577 |
-+ |
2578 |
-+ if (bfqd->low_latency && bfqq->raising_coeff == 1) |
2579 |
-+ bfqq->last_rais_start_finish = jiffies; |
2580 |
-+ |
2581 |
-+ if (bfqd->low_latency && bfqd->bfq_raising_max_softrt_rate > 0) { |
2582 |
-+ if(reason != BFQ_BFQQ_BUDGET_TIMEOUT) |
2583 |
-+ bfqq->soft_rt_next_start = |
2584 |
-+ jiffies + |
2585 |
-+ HZ * bfqq->entity.service / |
2586 |
-+ bfqd->bfq_raising_max_softrt_rate; |
2587 |
-+ else |
2588 |
-+ bfqq->soft_rt_next_start = -1; /* infinity */ |
2589 |
-+ } |
2590 |
-+ bfq_log_bfqq(bfqd, bfqq, |
2591 |
-+ "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, slow, |
2592 |
-+ bfqq->dispatched, bfq_bfqq_idle_window(bfqq)); |
2593 |
-+ |
2594 |
-+ /* Increase, decrease or leave budget unchanged according to reason */ |
2595 |
-+ __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); |
2596 |
-+ __bfq_bfqq_expire(bfqd, bfqq); |
2597 |
-+} |
2598 |
-+ |
2599 |
-+/* |
2600 |
-+ * Budget timeout is not implemented through a dedicated timer, but |
2601 |
-+ * just checked on request arrivals and completions, as well as on |
2602 |
-+ * idle timer expirations. |
2603 |
-+ */ |
2604 |
-+static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) |
2605 |
-+{ |
2606 |
-+ if (bfq_bfqq_budget_new(bfqq)) |
2607 |
-+ return 0; |
2608 |
-+ |
2609 |
-+ if (time_before(jiffies, bfqq->budget_timeout)) |
2610 |
-+ return 0; |
2611 |
-+ |
2612 |
-+ return 1; |
2613 |
-+} |
2614 |
-+ |
2615 |
-+/* |
2616 |
-+ * If we expire a queue that is waiting for the arrival of a new |
2617 |
-+ * request, we may prevent the fictitious timestamp backshifting that |
2618 |
-+ * allows the guarantees of the queue to be preserved (see [1] for |
2619 |
-+ * this tricky aspect). Hence we return true only if this condition |
2620 |
-+ * does not hold, or if the queue is slow enough to deserve only to be |
2621 |
-+ * kicked off for preserving a high throughput. |
2622 |
-+*/ |
2623 |
-+static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) |
2624 |
-+{ |
2625 |
-+ bfq_log_bfqq(bfqq->bfqd, bfqq, |
2626 |
-+ "may_budget_timeout: wr %d left %d timeout %d", |
2627 |
-+ bfq_bfqq_wait_request(bfqq), |
2628 |
-+ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, |
2629 |
-+ bfq_bfqq_budget_timeout(bfqq)); |
2630 |
-+ |
2631 |
-+ return (!bfq_bfqq_wait_request(bfqq) || |
2632 |
-+ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3) |
2633 |
-+ && |
2634 |
-+ bfq_bfqq_budget_timeout(bfqq); |
2635 |
-+} |
2636 |
-+ |
2637 |
-+/* |
2638 |
-+ * If the active queue is empty, but it is sync and either of the following |
2639 |
-+ * conditions holds, then: 1) the queue must remain active and cannot be |
2640 |
-+ * expired, and 2) the disk must be idled to wait for the possible arrival |
2641 |
-+ * of a new request for the queue. The conditions are: |
2642 |
-+ * - the device is rotational and not performing NCQ, and the queue has its |
2643 |
-+ * idle window set (in this case, waiting for a new request for the queue |
2644 |
-+ * is likely to boost the disk throughput); |
2645 |
-+ * - the queue is weight-raised (waiting for the request is necessary for |
2646 |
-+ * providing the queue with fairness and latency guarantees). |
2647 |
-+ */ |
2648 |
-+static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq, |
2649 |
-+ int budg_timeout) |
2650 |
-+{ |
2651 |
-+ struct bfq_data *bfqd = bfqq->bfqd; |
2652 |
-+ |
2653 |
-+ return (bfq_bfqq_sync(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list) && |
2654 |
-+ bfqd->bfq_slice_idle != 0 && |
2655 |
-+ ((bfq_bfqq_idle_window(bfqq) && !bfqd->hw_tag && |
2656 |
-+ !blk_queue_nonrot(bfqd->queue)) |
2657 |
-+ || bfqq->raising_coeff > 1) && |
2658 |
-+ (bfqd->rq_in_driver == 0 || |
2659 |
-+ budg_timeout || |
2660 |
-+ bfqq->raising_coeff > 1) && |
2661 |
-+ !bfq_close_cooperator(bfqd, bfqq) && |
2662 |
-+ (!bfq_bfqq_coop(bfqq) || |
2663 |
-+ !bfq_bfqq_some_coop_idle(bfqq)) && |
2664 |
-+ !bfq_queue_nonrot_noidle(bfqd, bfqq)); |
2665 |
-+} |
2666 |
-+ |
2667 |
-+/* |
2668 |
-+ * Select a queue for service. If we have a current active queue, |
2669 |
-+ * check whether to continue servicing it, or retrieve and set a new one. |
2670 |
-+ */ |
2671 |
-+static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) |
2672 |
-+{ |
2673 |
-+ struct bfq_queue *bfqq, *new_bfqq = NULL; |
2674 |
-+ struct request *next_rq; |
2675 |
-+ enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; |
2676 |
-+ int budg_timeout; |
2677 |
-+ |
2678 |
-+ bfqq = bfqd->active_queue; |
2679 |
-+ if (bfqq == NULL) |
2680 |
-+ goto new_queue; |
2681 |
-+ |
2682 |
-+ bfq_log_bfqq(bfqd, bfqq, "select_queue: already active queue"); |
2683 |
-+ |
2684 |
-+ /* |
2685 |
-+ * If another queue has a request waiting within our mean seek |
2686 |
-+ * distance, let it run. The expire code will check for close |
2687 |
-+ * cooperators and put the close queue at the front of the |
2688 |
-+ * service tree. If possible, merge the expiring queue with the |
2689 |
-+ * new bfqq. |
2690 |
-+ */ |
2691 |
-+ new_bfqq = bfq_close_cooperator(bfqd, bfqq); |
2692 |
-+ if (new_bfqq != NULL && bfqq->new_bfqq == NULL) |
2693 |
-+ bfq_setup_merge(bfqq, new_bfqq); |
2694 |
-+ |
2695 |
-+ budg_timeout = bfq_may_expire_for_budg_timeout(bfqq); |
2696 |
-+ if (budg_timeout && |
2697 |
-+ !bfq_bfqq_must_idle(bfqq, budg_timeout)) |
2698 |
-+ goto expire; |
2699 |
-+ |
2700 |
-+ next_rq = bfqq->next_rq; |
2701 |
-+ /* |
2702 |
-+ * If bfqq has requests queued and it has enough budget left to |
2703 |
-+ * serve them, keep the queue, otherwise expire it. |
2704 |
-+ */ |
2705 |
-+ if (next_rq != NULL) { |
2706 |
-+ if (bfq_serv_to_charge(next_rq, bfqq) > |
2707 |
-+ bfq_bfqq_budget_left(bfqq)) { |
2708 |
-+ reason = BFQ_BFQQ_BUDGET_EXHAUSTED; |
2709 |
-+ goto expire; |
2710 |
-+ } else { |
2711 |
-+ /* |
2712 |
-+ * The idle timer may be pending because we may not |
2713 |
-+ * disable disk idling even when a new request arrives |
2714 |
-+ */ |
2715 |
-+ if (timer_pending(&bfqd->idle_slice_timer)) { |
2716 |
-+ /* |
2717 |
-+ * If we get here: 1) at least a new request |
2718 |
-+ * has arrived but we have not disabled the |
2719 |
-+ * timer because the request was too small, |
2720 |
-+ * 2) then the block layer has unplugged the |
2721 |
-+ * device, causing the dispatch to be invoked. |
2722 |
-+ * |
2723 |
-+ * Since the device is unplugged, now the |
2724 |
-+ * requests are probably large enough to |
2725 |
-+ * provide a reasonable throughput. |
2726 |
-+ * So we disable idling. |
2727 |
-+ */ |
2728 |
-+ bfq_clear_bfqq_wait_request(bfqq); |
2729 |
-+ del_timer(&bfqd->idle_slice_timer); |
2730 |
-+ } |
2731 |
-+ if (new_bfqq == NULL) |
2732 |
-+ goto keep_queue; |
2733 |
-+ else |
2734 |
-+ goto expire; |
2735 |
-+ } |
2736 |
-+ } |
2737 |
-+ |
2738 |
-+ /* |
2739 |
-+ * No requests pending. If there is no cooperator, and the active |
2740 |
-+ * queue still has requests in flight or is idling for a new request, |
2741 |
-+ * then keep it. |
2742 |
-+ */ |
2743 |
-+ if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) || |
2744 |
-+ (bfqq->dispatched != 0 && |
2745 |
-+ (bfq_bfqq_idle_window(bfqq) || bfqq->raising_coeff > 1) && |
2746 |
-+ !bfq_queue_nonrot_noidle(bfqd, bfqq)))) { |
2747 |
-+ bfqq = NULL; |
2748 |
-+ goto keep_queue; |
2749 |
-+ } else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) { |
2750 |
-+ /* |
2751 |
-+ * Expiring the queue because there is a close cooperator, |
2752 |
-+ * cancel timer. |
2753 |
-+ */ |
2754 |
-+ bfq_clear_bfqq_wait_request(bfqq); |
2755 |
-+ del_timer(&bfqd->idle_slice_timer); |
2756 |
-+ } |
2757 |
-+ |
2758 |
-+ reason = BFQ_BFQQ_NO_MORE_REQUESTS; |
2759 |
-+expire: |
2760 |
-+ bfq_bfqq_expire(bfqd, bfqq, 0, reason); |
2761 |
-+new_queue: |
2762 |
-+ bfqq = bfq_set_active_queue(bfqd, new_bfqq); |
2763 |
-+ bfq_log(bfqd, "select_queue: new queue %d returned", |
2764 |
-+ bfqq != NULL ? bfqq->pid : 0); |
2765 |
-+keep_queue: |
2766 |
-+ return bfqq; |
2767 |
-+} |
2768 |
-+ |
2769 |
-+static void update_raising_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) |
2770 |
-+{ |
2771 |
-+ if (bfqq->raising_coeff > 1) { /* queue is being boosted */ |
2772 |
-+ struct bfq_entity *entity = &bfqq->entity; |
2773 |
-+ |
2774 |
-+ bfq_log_bfqq(bfqd, bfqq, |
2775 |
-+ "raising period dur %u/%u msec, " |
2776 |
-+ "old raising coeff %u, w %d(%d)", |
2777 |
-+ jiffies_to_msecs(jiffies - |
2778 |
-+ bfqq->last_rais_start_finish), |
2779 |
-+ jiffies_to_msecs(bfqq->raising_cur_max_time), |
2780 |
-+ bfqq->raising_coeff, |
2781 |
-+ bfqq->entity.weight, bfqq->entity.orig_weight); |
2782 |
-+ |
2783 |
-+ BUG_ON(bfqq != bfqd->active_queue && entity->weight != |
2784 |
-+ entity->orig_weight * bfqq->raising_coeff); |
2785 |
-+ if(entity->ioprio_changed) |
2786 |
-+ bfq_log_bfqq(bfqd, bfqq, |
2787 |
-+ "WARN: pending prio change"); |
2788 |
-+ /* |
2789 |
-+ * If too much time has elapsed from the beginning |
2790 |
-+ * of this weight-raising period and process is not soft |
2791 |
-+ * real-time, stop it |
2792 |
-+ */ |
2793 |
-+ if (jiffies - bfqq->last_rais_start_finish > |
2794 |
-+ bfqq->raising_cur_max_time) { |
2795 |
-+ int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 && |
2796 |
-+ bfqq->soft_rt_next_start < jiffies; |
2797 |
-+ |
2798 |
-+ bfqq->last_rais_start_finish = jiffies; |
2799 |
-+ if (soft_rt) |
2800 |
-+ bfqq->raising_cur_max_time = |
2801 |
-+ bfqd->bfq_raising_rt_max_time; |
2802 |
-+ else { |
2803 |
-+ bfq_log_bfqq(bfqd, bfqq, |
2804 |
-+ "wrais ending at %llu msec," |
2805 |
-+ "rais_max_time %u", |
2806 |
-+ bfqq->last_rais_start_finish, |
2807 |
-+ jiffies_to_msecs(bfqq-> |
2808 |
-+ raising_cur_max_time)); |
2809 |
-+ bfq_bfqq_end_raising(bfqq); |
2810 |
-+ __bfq_entity_update_weight_prio( |
2811 |
-+ bfq_entity_service_tree(entity), |
2812 |
-+ entity); |
2813 |
-+ } |
2814 |
-+ } |
2815 |
-+ } |
2816 |
-+} |
2817 |
-+ |
2818 |
-+/* |
2819 |
-+ * Dispatch one request from bfqq, moving it to the request queue |
2820 |
-+ * dispatch list. |
2821 |
-+ */ |
2822 |
-+static int bfq_dispatch_request(struct bfq_data *bfqd, |
2823 |
-+ struct bfq_queue *bfqq) |
2824 |
-+{ |
2825 |
-+ int dispatched = 0; |
2826 |
-+ struct request *rq; |
2827 |
-+ unsigned long service_to_charge; |
2828 |
-+ |
2829 |
-+ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); |
2830 |
-+ |
2831 |
-+ /* Follow expired path, else get first next available. */ |
2832 |
-+ rq = bfq_check_fifo(bfqq); |
2833 |
-+ if (rq == NULL) |
2834 |
-+ rq = bfqq->next_rq; |
2835 |
-+ service_to_charge = bfq_serv_to_charge(rq, bfqq); |
2836 |
-+ |
2837 |
-+ if (service_to_charge > bfq_bfqq_budget_left(bfqq)) { |
2838 |
-+ /* |
2839 |
-+ * This may happen if the next rq is chosen |
2840 |
-+ * in fifo order instead of sector order. |
2841 |
-+ * The budget is properly dimensioned |
2842 |
-+ * to be always sufficient to serve the next request |
2843 |
-+ * only if it is chosen in sector order. The reason is |
2844 |
-+ * that it would be quite inefficient and little useful |
2845 |
-+ * to always make sure that the budget is large enough |
2846 |
-+ * to serve even the possible next rq in fifo order. |
2847 |
-+ * In fact, requests are seldom served in fifo order. |
2848 |
-+ * |
2849 |
-+ * Expire the queue for budget exhaustion, and |
2850 |
-+ * make sure that the next act_budget is enough |
2851 |
-+ * to serve the next request, even if it comes |
2852 |
-+ * from the fifo expired path. |
2853 |
-+ */ |
2854 |
-+ bfqq->next_rq = rq; |
2855 |
-+ /* |
2856 |
-+ * Since this dispatch is failed, make sure that |
2857 |
-+ * a new one will be performed |
2858 |
-+ */ |
2859 |
-+ if (!bfqd->rq_in_driver) |
2860 |
-+ bfq_schedule_dispatch(bfqd); |
2861 |
-+ goto expire; |
2862 |
-+ } |
2863 |
-+ |
2864 |
-+ /* Finally, insert request into driver dispatch list. */ |
2865 |
-+ bfq_bfqq_served(bfqq, service_to_charge); |
2866 |
-+ bfq_dispatch_insert(bfqd->queue, rq); |
2867 |
-+ |
2868 |
-+ update_raising_data(bfqd, bfqq); |
2869 |
-+ |
2870 |
-+ bfq_log_bfqq(bfqd, bfqq, "dispatched %u sec req (%llu), " |
2871 |
-+ "budg left %lu", |
2872 |
-+ blk_rq_sectors(rq), |
2873 |
-+ (long long unsigned)blk_rq_pos(rq), |
2874 |
-+ bfq_bfqq_budget_left(bfqq)); |
2875 |
-+ |
2876 |
-+ dispatched++; |
2877 |
-+ |
2878 |
-+ if (bfqd->active_bic == NULL) { |
2879 |
-+ atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount); |
2880 |
-+ bfqd->active_bic = RQ_BIC(rq); |
2881 |
-+ } |
2882 |
-+ |
2883 |
-+ if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) && |
2884 |
-+ dispatched >= bfqd->bfq_max_budget_async_rq) || |
2885 |
-+ bfq_class_idle(bfqq))) |
2886 |
-+ goto expire; |
2887 |
-+ |
2888 |
-+ return dispatched; |
2889 |
-+ |
2890 |
-+expire: |
2891 |
-+ bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED); |
2892 |
-+ return dispatched; |
2893 |
-+} |
2894 |
-+ |
2895 |
-+static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq) |
2896 |
-+{ |
2897 |
-+ int dispatched = 0; |
2898 |
-+ |
2899 |
-+ while (bfqq->next_rq != NULL) { |
2900 |
-+ bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq); |
2901 |
-+ dispatched++; |
2902 |
-+ } |
2903 |
-+ |
2904 |
-+ BUG_ON(!list_empty(&bfqq->fifo)); |
2905 |
-+ return dispatched; |
2906 |
-+} |
2907 |
-+ |
2908 |
-+/* |
2909 |
-+ * Drain our current requests. Used for barriers and when switching |
2910 |
-+ * io schedulers on-the-fly. |
2911 |
-+ */ |
2912 |
-+static int bfq_forced_dispatch(struct bfq_data *bfqd) |
2913 |
-+{ |
2914 |
-+ struct bfq_queue *bfqq, *n; |
2915 |
-+ struct bfq_service_tree *st; |
2916 |
-+ int dispatched = 0; |
2917 |
-+ |
2918 |
-+ bfqq = bfqd->active_queue; |
2919 |
-+ if (bfqq != NULL) |
2920 |
-+ __bfq_bfqq_expire(bfqd, bfqq); |
2921 |
-+ |
2922 |
-+ /* |
2923 |
-+ * Loop through classes, and be careful to leave the scheduler |
2924 |
-+ * in a consistent state, as feedback mechanisms and vtime |
2925 |
-+ * updates cannot be disabled during the process. |
2926 |
-+ */ |
2927 |
-+ list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) { |
2928 |
-+ st = bfq_entity_service_tree(&bfqq->entity); |
2929 |
-+ |
2930 |
-+ dispatched += __bfq_forced_dispatch_bfqq(bfqq); |
2931 |
-+ bfqq->max_budget = bfq_max_budget(bfqd); |
2932 |
-+ |
2933 |
-+ bfq_forget_idle(st); |
2934 |
-+ } |
2935 |
-+ |
2936 |
-+ BUG_ON(bfqd->busy_queues != 0); |
2937 |
-+ |
2938 |
-+ return dispatched; |
2939 |
-+} |
2940 |
-+ |
2941 |
-+static int bfq_dispatch_requests(struct request_queue *q, int force) |
2942 |
-+{ |
2943 |
-+ struct bfq_data *bfqd = q->elevator->elevator_data; |
2944 |
-+ struct bfq_queue *bfqq; |
2945 |
-+ int max_dispatch; |
2946 |
-+ |
2947 |
-+ bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); |
2948 |
-+ if (bfqd->busy_queues == 0) |
2949 |
-+ return 0; |
2950 |
-+ |
2951 |
-+ if (unlikely(force)) |
2952 |
-+ return bfq_forced_dispatch(bfqd); |
2953 |
-+ |
2954 |
-+ if((bfqq = bfq_select_queue(bfqd)) == NULL) |
2955 |
-+ return 0; |
2956 |
-+ |
2957 |
-+ max_dispatch = bfqd->bfq_quantum; |
2958 |
-+ if (bfq_class_idle(bfqq)) |
2959 |
-+ max_dispatch = 1; |
2960 |
-+ |
2961 |
-+ if (!bfq_bfqq_sync(bfqq)) |
2962 |
-+ max_dispatch = bfqd->bfq_max_budget_async_rq; |
2963 |
-+ |
2964 |
-+ if (bfqq->dispatched >= max_dispatch) { |
2965 |
-+ if (bfqd->busy_queues > 1) |
2966 |
-+ return 0; |
2967 |
-+ if (bfqq->dispatched >= 4 * max_dispatch) |
2968 |
-+ return 0; |
2969 |
-+ } |
2970 |
-+ |
2971 |
-+ if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq)) |
2972 |
-+ return 0; |
2973 |
-+ |
2974 |
-+ bfq_clear_bfqq_wait_request(bfqq); |
2975 |
-+ BUG_ON(timer_pending(&bfqd->idle_slice_timer)); |
2976 |
-+ |
2977 |
-+ if (! bfq_dispatch_request(bfqd, bfqq)) |
2978 |
-+ return 0; |
2979 |
-+ |
2980 |
-+ bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d" |
2981 |
-+ "(max_disp %d)", bfqq->pid, max_dispatch); |
2982 |
-+ |
2983 |
-+ return 1; |
2984 |
-+} |
2985 |
-+ |
2986 |
-+/* |
2987 |
-+ * Task holds one reference to the queue, dropped when task exits. Each rq |
2988 |
-+ * in-flight on this queue also holds a reference, dropped when rq is freed. |
2989 |
-+ * |
2990 |
-+ * Queue lock must be held here. |
2991 |
-+ */ |
2992 |
-+static void bfq_put_queue(struct bfq_queue *bfqq) |
2993 |
-+{ |
2994 |
-+ struct bfq_data *bfqd = bfqq->bfqd; |
2995 |
-+ |
2996 |
-+ BUG_ON(atomic_read(&bfqq->ref) <= 0); |
2997 |
-+ |
2998 |
-+ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq, |
2999 |
-+ atomic_read(&bfqq->ref)); |
3000 |
-+ if (!atomic_dec_and_test(&bfqq->ref)) |
3001 |
-+ return; |
3002 |
-+ |
3003 |
-+ BUG_ON(rb_first(&bfqq->sort_list) != NULL); |
3004 |
-+ BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0); |
3005 |
-+ BUG_ON(bfqq->entity.tree != NULL); |
3006 |
-+ BUG_ON(bfq_bfqq_busy(bfqq)); |
3007 |
-+ BUG_ON(bfqd->active_queue == bfqq); |
3008 |
-+ |
3009 |
-+ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq); |
3010 |
-+ |
3011 |
-+ kmem_cache_free(bfq_pool, bfqq); |
3012 |
-+} |
3013 |
-+ |
3014 |
-+static void bfq_put_cooperator(struct bfq_queue *bfqq) |
3015 |
-+{ |
3016 |
-+ struct bfq_queue *__bfqq, *next; |
3017 |
-+ |
3018 |
-+ /* |
3019 |
-+ * If this queue was scheduled to merge with another queue, be |
3020 |
-+ * sure to drop the reference taken on that queue (and others in |
3021 |
-+ * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs. |
3022 |
-+ */ |
3023 |
-+ __bfqq = bfqq->new_bfqq; |
3024 |
-+ while (__bfqq) { |
3025 |
-+ if (__bfqq == bfqq) { |
3026 |
-+ WARN(1, "bfqq->new_bfqq loop detected.\n"); |
3027 |
-+ break; |
3028 |
-+ } |
3029 |
-+ next = __bfqq->new_bfqq; |
3030 |
-+ bfq_put_queue(__bfqq); |
3031 |
-+ __bfqq = next; |
3032 |
-+ } |
3033 |
-+} |
3034 |
-+ |
3035 |
-+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) |
3036 |
-+{ |
3037 |
-+ if (bfqq == bfqd->active_queue) { |
3038 |
-+ __bfq_bfqq_expire(bfqd, bfqq); |
3039 |
-+ bfq_schedule_dispatch(bfqd); |
3040 |
-+ } |
3041 |
-+ |
3042 |
-+ bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, |
3043 |
-+ atomic_read(&bfqq->ref)); |
3044 |
-+ |
3045 |
-+ bfq_put_cooperator(bfqq); |
3046 |
-+ |
3047 |
-+ bfq_put_queue(bfqq); |
3048 |
-+} |
3049 |
-+ |
3050 |
-+static void bfq_init_icq(struct io_cq *icq) |
3051 |
-+{ |
3052 |
-+ struct bfq_io_cq *bic = icq_to_bic(icq); |
3053 |
-+ |
3054 |
-+ bic->ttime.last_end_request = jiffies; |
3055 |
-+} |
3056 |
-+ |
3057 |
-+static void bfq_exit_icq(struct io_cq *icq) |
3058 |
-+{ |
3059 |
-+ struct bfq_io_cq *bic = icq_to_bic(icq); |
3060 |
-+ struct bfq_data *bfqd = bic_to_bfqd(bic); |
3061 |
-+ |
3062 |
-+ if (bic->bfqq[BLK_RW_ASYNC]) { |
3063 |
-+ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_ASYNC]); |
3064 |
-+ bic->bfqq[BLK_RW_ASYNC] = NULL; |
3065 |
-+ } |
3066 |
-+ |
3067 |
-+ if (bic->bfqq[BLK_RW_SYNC]) { |
3068 |
-+ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]); |
3069 |
-+ bic->bfqq[BLK_RW_SYNC] = NULL; |
3070 |
-+ } |
3071 |
-+} |
3072 |
-+ |
3073 |
-+/* |
3074 |
-+ * Update the entity prio values; note that the new values will not |
3075 |
-+ * be used until the next (re)activation. |
3076 |
-+ */ |
3077 |
-+static void bfq_init_prio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) |
3078 |
-+{ |
3079 |
-+ struct task_struct *tsk = current; |
3080 |
-+ int ioprio_class; |
3081 |
-+ |
3082 |
-+ if (!bfq_bfqq_prio_changed(bfqq)) |
3083 |
-+ return; |
3084 |
-+ |
3085 |
-+ ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); |
3086 |
-+ switch (ioprio_class) { |
3087 |
-+ default: |
3088 |
-+ printk(KERN_ERR "bfq: bad prio %x\n", ioprio_class); |
3089 |
-+ case IOPRIO_CLASS_NONE: |
3090 |
-+ /* |
3091 |
-+ * No prio set, inherit CPU scheduling settings. |
3092 |
-+ */ |
3093 |
-+ bfqq->entity.new_ioprio = task_nice_ioprio(tsk); |
3094 |
-+ bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk); |
3095 |
-+ break; |
3096 |
-+ case IOPRIO_CLASS_RT: |
3097 |
-+ bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio); |
3098 |
-+ bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT; |
3099 |
-+ break; |
3100 |
-+ case IOPRIO_CLASS_BE: |
3101 |
-+ bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio); |
3102 |
-+ bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE; |
3103 |
-+ break; |
3104 |
-+ case IOPRIO_CLASS_IDLE: |
3105 |
-+ bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE; |
3106 |
-+ bfqq->entity.new_ioprio = 7; |
3107 |
-+ bfq_clear_bfqq_idle_window(bfqq); |
3108 |
-+ break; |
3109 |
-+ } |
3110 |
-+ |
3111 |
-+ bfqq->entity.ioprio_changed = 1; |
3112 |
-+ |
3113 |
-+ /* |
3114 |
-+ * Keep track of original prio settings in case we have to temporarily |
3115 |
-+ * elevate the priority of this queue. |
3116 |
-+ */ |
3117 |
-+ bfqq->org_ioprio = bfqq->entity.new_ioprio; |
3118 |
-+ bfq_clear_bfqq_prio_changed(bfqq); |
3119 |
-+} |
3120 |
-+ |
3121 |
-+static void bfq_changed_ioprio(struct bfq_io_cq *bic) |
3122 |
-+{ |
3123 |
-+ struct bfq_data *bfqd; |
3124 |
-+ struct bfq_queue *bfqq, *new_bfqq; |
3125 |
-+ struct bfq_group *bfqg; |
3126 |
-+ unsigned long uninitialized_var(flags); |
3127 |
-+ int ioprio = bic->icq.ioc->ioprio; |
3128 |
-+ |
3129 |
-+ bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), &flags); |
3130 |
-+ /* |
3131 |
-+ * This condition may trigger on a newly created bic, be sure to drop the |
3132 |
-+ * lock before returning. |
3133 |
-+ */ |
3134 |
-+ if (unlikely(bfqd == NULL) || likely(bic->ioprio == ioprio)) |
3135 |
-+ goto out; |
3136 |
-+ |
3137 |
-+ bfqq = bic->bfqq[BLK_RW_ASYNC]; |
3138 |
-+ if (bfqq != NULL) { |
3139 |
-+ bfqg = container_of(bfqq->entity.sched_data, struct bfq_group, |
3140 |
-+ sched_data); |
3141 |
-+ new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, bic, |
3142 |
-+ GFP_ATOMIC); |
3143 |
-+ if (new_bfqq != NULL) { |
3144 |
-+ bic->bfqq[BLK_RW_ASYNC] = new_bfqq; |
3145 |
-+ bfq_log_bfqq(bfqd, bfqq, |
3146 |
-+ "changed_ioprio: bfqq %p %d", |
3147 |
-+ bfqq, atomic_read(&bfqq->ref)); |
3148 |
-+ bfq_put_queue(bfqq); |
3149 |
-+ } |
3150 |
-+ } |
3151 |
-+ |
3152 |
-+ bfqq = bic->bfqq[BLK_RW_SYNC]; |
3153 |
-+ if (bfqq != NULL) |
3154 |
-+ bfq_mark_bfqq_prio_changed(bfqq); |
3155 |
-+ |
3156 |
-+ bic->ioprio = ioprio; |
3157 |
-+ |
3158 |
-+out: |
3159 |
-+ bfq_put_bfqd_unlock(bfqd, &flags); |
3160 |
-+} |
3161 |
-+ |
3162 |
-+static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
3163 |
-+ pid_t pid, int is_sync) |
3164 |
-+{ |
3165 |
-+ RB_CLEAR_NODE(&bfqq->entity.rb_node); |
3166 |
-+ INIT_LIST_HEAD(&bfqq->fifo); |
3167 |
-+ |
3168 |
-+ atomic_set(&bfqq->ref, 0); |
3169 |
-+ bfqq->bfqd = bfqd; |
3170 |
-+ |
3171 |
-+ bfq_mark_bfqq_prio_changed(bfqq); |
3172 |
-+ |
3173 |
-+ if (is_sync) { |
3174 |
-+ if (!bfq_class_idle(bfqq)) |
3175 |
-+ bfq_mark_bfqq_idle_window(bfqq); |
3176 |
-+ bfq_mark_bfqq_sync(bfqq); |
3177 |
-+ } |
3178 |
-+ |
3179 |
-+ /* Tentative initial value to trade off between thr and lat */ |
3180 |
-+ bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3; |
3181 |
-+ bfqq->pid = pid; |
3182 |
-+ |
3183 |
-+ bfqq->raising_coeff = 1; |
3184 |
-+ bfqq->last_rais_start_finish = 0; |
3185 |
-+ bfqq->soft_rt_next_start = -1; |
3186 |
-+} |
3187 |
-+ |
3188 |
-+static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd, |
3189 |
-+ struct bfq_group *bfqg, |
3190 |
-+ int is_sync, |
3191 |
-+ struct bfq_io_cq *bic, |
3192 |
-+ gfp_t gfp_mask) |
3193 |
-+{ |
3194 |
-+ struct bfq_queue *bfqq, *new_bfqq = NULL; |
3195 |
-+ |
3196 |
-+retry: |
3197 |
-+ /* bic always exists here */ |
3198 |
-+ bfqq = bic_to_bfqq(bic, is_sync); |
3199 |
-+ |
3200 |
-+ /* |
3201 |
-+ * Always try a new alloc if we fall back to the OOM bfqq |
3202 |
-+ * originally, since it should just be a temporary situation. |
3203 |
-+ */ |
3204 |
-+ if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { |
3205 |
-+ bfqq = NULL; |
3206 |
-+ if (new_bfqq != NULL) { |
3207 |
-+ bfqq = new_bfqq; |
3208 |
-+ new_bfqq = NULL; |
3209 |
-+ } else if (gfp_mask & __GFP_WAIT) { |
3210 |
-+ spin_unlock_irq(bfqd->queue->queue_lock); |
3211 |
-+ new_bfqq = kmem_cache_alloc_node(bfq_pool, |
3212 |
-+ gfp_mask | __GFP_ZERO, |
3213 |
-+ bfqd->queue->node); |
3214 |
-+ spin_lock_irq(bfqd->queue->queue_lock); |
3215 |
-+ if (new_bfqq != NULL) |
3216 |
-+ goto retry; |
3217 |
-+ } else { |
3218 |
-+ bfqq = kmem_cache_alloc_node(bfq_pool, |
3219 |
-+ gfp_mask | __GFP_ZERO, |
3220 |
-+ bfqd->queue->node); |
3221 |
-+ } |
3222 |
-+ |
3223 |
-+ if (bfqq != NULL) { |
3224 |
-+ bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync); |
3225 |
-+ bfq_log_bfqq(bfqd, bfqq, "allocated"); |
3226 |
-+ } else { |
3227 |
-+ bfqq = &bfqd->oom_bfqq; |
3228 |
-+ bfq_log_bfqq(bfqd, bfqq, "using oom bfqq"); |
3229 |
-+ } |
3230 |
-+ |
3231 |
-+ bfq_init_prio_data(bfqq, bic); |
3232 |
-+ bfq_init_entity(&bfqq->entity, bfqg); |
3233 |
-+ } |
3234 |
-+ |
3235 |
-+ if (new_bfqq != NULL) |
3236 |
-+ kmem_cache_free(bfq_pool, new_bfqq); |
3237 |
-+ |
3238 |
-+ return bfqq; |
3239 |
-+} |
3240 |
-+ |
3241 |
-+static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, |
3242 |
-+ struct bfq_group *bfqg, |
3243 |
-+ int ioprio_class, int ioprio) |
3244 |
-+{ |
3245 |
-+ switch (ioprio_class) { |
3246 |
-+ case IOPRIO_CLASS_RT: |
3247 |
-+ return &bfqg->async_bfqq[0][ioprio]; |
3248 |
-+ case IOPRIO_CLASS_NONE: |
3249 |
-+ ioprio = IOPRIO_NORM; |
3250 |
-+ /* fall through */ |
3251 |
-+ case IOPRIO_CLASS_BE: |
3252 |
-+ return &bfqg->async_bfqq[1][ioprio]; |
3253 |
-+ case IOPRIO_CLASS_IDLE: |
3254 |
-+ return &bfqg->async_idle_bfqq; |
3255 |
-+ default: |
3256 |
-+ BUG(); |
3257 |
-+ } |
3258 |
-+} |
3259 |
-+ |
3260 |
-+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, |
3261 |
-+ struct bfq_group *bfqg, int is_sync, |
3262 |
-+ struct bfq_io_cq *bic, gfp_t gfp_mask) |
3263 |
-+{ |
3264 |
-+ const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio); |
3265 |
-+ const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); |
3266 |
-+ struct bfq_queue **async_bfqq = NULL; |
3267 |
-+ struct bfq_queue *bfqq = NULL; |
3268 |
-+ |
3269 |
-+ if (!is_sync) { |
3270 |
-+ async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, |
3271 |
-+ ioprio); |
3272 |
-+ bfqq = *async_bfqq; |
3273 |
-+ } |
3274 |
-+ |
3275 |
-+ if (bfqq == NULL) |
3276 |
-+ bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, bic, gfp_mask); |
3277 |
-+ |
3278 |
-+ /* |
3279 |
-+ * Pin the queue now that it's allocated, scheduler exit will prune it. |
3280 |
-+ */ |
3281 |
-+ if (!is_sync && *async_bfqq == NULL) { |
3282 |
-+ atomic_inc(&bfqq->ref); |
3283 |
-+ bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", |
3284 |
-+ bfqq, atomic_read(&bfqq->ref)); |
3285 |
-+ *async_bfqq = bfqq; |
3286 |
-+ } |
3287 |
-+ |
3288 |
-+ atomic_inc(&bfqq->ref); |
3289 |
-+ bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, |
3290 |
-+ atomic_read(&bfqq->ref)); |
3291 |
-+ return bfqq; |
3292 |
-+} |
3293 |
-+ |
3294 |
-+static void bfq_update_io_thinktime(struct bfq_data *bfqd, |
3295 |
-+ struct bfq_io_cq *bic) |
3296 |
-+{ |
3297 |
-+ unsigned long elapsed = jiffies - bic->ttime.last_end_request; |
3298 |
-+ unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle); |
3299 |
-+ |
3300 |
-+ bic->ttime.ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8; |
3301 |
-+ bic->ttime.ttime_total = (7*bic->ttime.ttime_total + 256*ttime) / 8; |
3302 |
-+ bic->ttime.ttime_mean = (bic->ttime.ttime_total + 128) / bic->ttime.ttime_samples; |
3303 |
-+} |
3304 |
-+ |
3305 |
-+static void bfq_update_io_seektime(struct bfq_data *bfqd, |
3306 |
-+ struct bfq_queue *bfqq, |
3307 |
-+ struct request *rq) |
3308 |
-+{ |
3309 |
-+ sector_t sdist; |
3310 |
-+ u64 total; |
3311 |
-+ |
3312 |
-+ if (bfqq->last_request_pos < blk_rq_pos(rq)) |
3313 |
-+ sdist = blk_rq_pos(rq) - bfqq->last_request_pos; |
3314 |
-+ else |
3315 |
-+ sdist = bfqq->last_request_pos - blk_rq_pos(rq); |
3316 |
-+ |
3317 |
-+ /* |
3318 |
-+ * Don't allow the seek distance to get too large from the |
3319 |
-+ * odd fragment, pagein, etc. |
3320 |
-+ */ |
3321 |
-+ if (bfqq->seek_samples == 0) /* first request, not really a seek */ |
3322 |
-+ sdist = 0; |
3323 |
-+ else if (bfqq->seek_samples <= 60) /* second & third seek */ |
3324 |
-+ sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024); |
3325 |
-+ else |
3326 |
-+ sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64); |
3327 |
-+ |
3328 |
-+ bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8; |
3329 |
-+ bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8; |
3330 |
-+ total = bfqq->seek_total + (bfqq->seek_samples/2); |
3331 |
-+ do_div(total, bfqq->seek_samples); |
3332 |
-+ if (bfq_bfqq_coop(bfqq)) { |
3333 |
-+ /* |
3334 |
-+ * If the mean seektime increases for a (non-seeky) shared |
3335 |
-+ * queue, some cooperator is likely to be idling too much. |
3336 |
-+ * On the contrary, if it decreases, some cooperator has |
3337 |
-+ * probably waked up. |
3338 |
-+ * |
3339 |
-+ */ |
3340 |
-+ if ((sector_t)total < bfqq->seek_mean) |
3341 |
-+ bfq_mark_bfqq_some_coop_idle(bfqq) ; |
3342 |
-+ else if ((sector_t)total > bfqq->seek_mean) |
3343 |
-+ bfq_clear_bfqq_some_coop_idle(bfqq) ; |
3344 |
-+ } |
3345 |
-+ bfqq->seek_mean = (sector_t)total; |
3346 |
-+ |
3347 |
-+ bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist, |
3348 |
-+ (u64)bfqq->seek_mean); |
3349 |
-+} |
3350 |
-+ |
3351 |
-+/* |
3352 |
-+ * Disable idle window if the process thinks too long or seeks so much that |
3353 |
-+ * it doesn't matter. |
3354 |
-+ */ |
3355 |
-+static void bfq_update_idle_window(struct bfq_data *bfqd, |
3356 |
-+ struct bfq_queue *bfqq, |
3357 |
-+ struct bfq_io_cq *bic) |
3358 |
-+{ |
3359 |
-+ int enable_idle; |
3360 |
-+ |
3361 |
-+ /* Don't idle for async or idle io prio class. */ |
3362 |
-+ if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) |
3363 |
-+ return; |
3364 |
-+ |
3365 |
-+ enable_idle = bfq_bfqq_idle_window(bfqq); |
3366 |
-+ |
3367 |
-+ if (atomic_read(&bic->icq.ioc->active_ref) == 0 || |
3368 |
-+ bfqd->bfq_slice_idle == 0 || |
3369 |
-+ (bfqd->hw_tag && BFQQ_SEEKY(bfqq) && |
3370 |
-+ bfqq->raising_coeff == 1)) |
3371 |
-+ enable_idle = 0; |
3372 |
-+ else if (bfq_sample_valid(bic->ttime.ttime_samples)) { |
3373 |
-+ if (bic->ttime.ttime_mean > bfqd->bfq_slice_idle && |
3374 |
-+ bfqq->raising_coeff == 1) |
3375 |
-+ enable_idle = 0; |
3376 |
-+ else |
3377 |
-+ enable_idle = 1; |
3378 |
-+ } |
3379 |
-+ bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d", |
3380 |
-+ enable_idle); |
3381 |
-+ |
3382 |
-+ if (enable_idle) |
3383 |
-+ bfq_mark_bfqq_idle_window(bfqq); |
3384 |
-+ else |
3385 |
-+ bfq_clear_bfqq_idle_window(bfqq); |
3386 |
-+} |
3387 |
-+ |
3388 |
-+/* |
3389 |
-+ * Called when a new fs request (rq) is added to bfqq. Check if there's |
3390 |
-+ * something we should do about it. |
3391 |
-+ */ |
3392 |
-+static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
3393 |
-+ struct request *rq) |
3394 |
-+{ |
3395 |
-+ struct bfq_io_cq *bic = RQ_BIC(rq); |
3396 |
-+ |
3397 |
-+ if (rq->cmd_flags & REQ_META) |
3398 |
-+ bfqq->meta_pending++; |
3399 |
-+ |
3400 |
-+ bfq_update_io_thinktime(bfqd, bic); |
3401 |
-+ bfq_update_io_seektime(bfqd, bfqq, rq); |
3402 |
-+ if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || |
3403 |
-+ !BFQQ_SEEKY(bfqq)) |
3404 |
-+ bfq_update_idle_window(bfqd, bfqq, bic); |
3405 |
-+ |
3406 |
-+ bfq_log_bfqq(bfqd, bfqq, |
3407 |
-+ "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", |
3408 |
-+ bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq), |
3409 |
-+ (long long unsigned)bfqq->seek_mean); |
3410 |
-+ |
3411 |
-+ bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); |
3412 |
-+ |
3413 |
-+ if (bfqq == bfqd->active_queue) { |
3414 |
-+ /* |
3415 |
-+ * If there is just this request queued and the request |
3416 |
-+ * is small, just exit. |
3417 |
-+ * In this way, if the disk is being idled to wait for a new |
3418 |
-+ * request from the active queue, we avoid unplugging the |
3419 |
-+ * device now. |
3420 |
-+ * |
3421 |
-+ * By doing so, we spare the disk to be committed |
3422 |
-+ * to serve just a small request. On the contrary, we wait for |
3423 |
-+ * the block layer to decide when to unplug the device: |
3424 |
-+ * hopefully, new requests will be merged to this |
3425 |
-+ * one quickly, then the device will be unplugged |
3426 |
-+ * and larger requests will be dispatched. |
3427 |
-+ */ |
3428 |
-+ if (bfqq->queued[rq_is_sync(rq)] == 1 && |
3429 |
-+ blk_rq_sectors(rq) < 32) { |
3430 |
-+ return; |
3431 |
-+ } |
3432 |
-+ if (bfq_bfqq_wait_request(bfqq)) { |
3433 |
-+ /* |
3434 |
-+ * If we are waiting for a request for this queue, let |
3435 |
-+ * it rip immediately and flag that we must not expire |
3436 |
-+ * this queue just now. |
3437 |
-+ */ |
3438 |
-+ bfq_clear_bfqq_wait_request(bfqq); |
3439 |
-+ del_timer(&bfqd->idle_slice_timer); |
3440 |
-+ /* |
3441 |
-+ * Here we can safely expire the queue, in |
3442 |
-+ * case of budget timeout, without wasting |
3443 |
-+ * guarantees |
3444 |
-+ */ |
3445 |
-+ if (bfq_bfqq_budget_timeout(bfqq)) |
3446 |
-+ bfq_bfqq_expire(bfqd, bfqq, 0, |
3447 |
-+ BFQ_BFQQ_BUDGET_TIMEOUT); |
3448 |
-+ __blk_run_queue(bfqd->queue); |
3449 |
-+ } |
3450 |
-+ } |
3451 |
-+} |
3452 |
-+ |
3453 |
-+static void bfq_insert_request(struct request_queue *q, struct request *rq) |
3454 |
-+{ |
3455 |
-+ struct bfq_data *bfqd = q->elevator->elevator_data; |
3456 |
-+ struct bfq_queue *bfqq = RQ_BFQQ(rq); |
3457 |
-+ |
3458 |
-+ assert_spin_locked(bfqd->queue->queue_lock); |
3459 |
-+ bfq_init_prio_data(bfqq, RQ_BIC(rq)); |
3460 |
-+ |
3461 |
-+ bfq_add_rq_rb(rq); |
3462 |
-+ |
3463 |
-+ rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]); |
3464 |
-+ list_add_tail(&rq->queuelist, &bfqq->fifo); |
3465 |
-+ |
3466 |
-+ bfq_rq_enqueued(bfqd, bfqq, rq); |
3467 |
-+} |
3468 |
-+ |
3469 |
-+static void bfq_update_hw_tag(struct bfq_data *bfqd) |
3470 |
-+{ |
3471 |
-+ bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver, |
3472 |
-+ bfqd->rq_in_driver); |
3473 |
-+ |
3474 |
-+ if (bfqd->hw_tag == 1) |
3475 |
-+ return; |
3476 |
-+ |
3477 |
-+ /* |
3478 |
-+ * This sample is valid if the number of outstanding requests |
3479 |
-+ * is large enough to allow a queueing behavior. Note that the |
3480 |
-+ * sum is not exact, as it's not taking into account deactivated |
3481 |
-+ * requests. |
3482 |
-+ */ |
3483 |
-+ if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD) |
3484 |
-+ return; |
3485 |
-+ |
3486 |
-+ if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES) |
3487 |
-+ return; |
3488 |
-+ |
3489 |
-+ bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD; |
3490 |
-+ bfqd->max_rq_in_driver = 0; |
3491 |
-+ bfqd->hw_tag_samples = 0; |
3492 |
-+} |
3493 |
-+ |
3494 |
-+static void bfq_completed_request(struct request_queue *q, struct request *rq) |
3495 |
-+{ |
3496 |
-+ struct bfq_queue *bfqq = RQ_BFQQ(rq); |
3497 |
-+ struct bfq_data *bfqd = bfqq->bfqd; |
3498 |
-+ const int sync = rq_is_sync(rq); |
3499 |
-+ |
3500 |
-+ bfq_log_bfqq(bfqd, bfqq, "completed %u sects req (%d)", |
3501 |
-+ blk_rq_sectors(rq), sync); |
3502 |
-+ |
3503 |
-+ bfq_update_hw_tag(bfqd); |
3504 |
-+ |
3505 |
-+ WARN_ON(!bfqd->rq_in_driver); |
3506 |
-+ WARN_ON(!bfqq->dispatched); |
3507 |
-+ bfqd->rq_in_driver--; |
3508 |
-+ bfqq->dispatched--; |
3509 |
-+ |
3510 |
-+ if (bfq_bfqq_sync(bfqq)) |
3511 |
-+ bfqd->sync_flight--; |
3512 |
-+ |
3513 |
-+ if (sync) |
3514 |
-+ RQ_BIC(rq)->ttime.last_end_request = jiffies; |
3515 |
-+ |
3516 |
-+ /* |
3517 |
-+ * If this is the active queue, check if it needs to be expired, |
3518 |
-+ * or if we want to idle in case it has no pending requests. |
3519 |
-+ */ |
3520 |
-+ if (bfqd->active_queue == bfqq) { |
3521 |
-+ int budg_timeout = bfq_may_expire_for_budg_timeout(bfqq); |
3522 |
-+ if (bfq_bfqq_budget_new(bfqq)) |
3523 |
-+ bfq_set_budget_timeout(bfqd); |
3524 |
-+ |
3525 |
-+ /* Idling is disabled also for cooperation issues: |
3526 |
-+ * 1) there is a close cooperator for the queue, or |
3527 |
-+ * 2) the queue is shared and some cooperator is likely |
3528 |
-+ * to be idle (in this case, by not arming the idle timer, |
3529 |
-+ * we try to slow down the queue, to prevent the zones |
3530 |
-+ * of the disk accessed by the active cooperators to become |
3531 |
-+ * too distant from the zone that will be accessed by the |
3532 |
-+ * currently idle cooperators) |
3533 |
-+ */ |
3534 |
-+ if (bfq_bfqq_must_idle(bfqq, budg_timeout)) |
3535 |
-+ bfq_arm_slice_timer(bfqd); |
3536 |
-+ else if (budg_timeout) |
3537 |
-+ bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT); |
3538 |
-+ } |
3539 |
-+ |
3540 |
-+ if (!bfqd->rq_in_driver) |
3541 |
-+ bfq_schedule_dispatch(bfqd); |
3542 |
-+} |
3543 |
-+ |
3544 |
-+static inline int __bfq_may_queue(struct bfq_queue *bfqq) |
3545 |
-+{ |
3546 |
-+ if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) { |
3547 |
-+ bfq_clear_bfqq_must_alloc(bfqq); |
3548 |
-+ return ELV_MQUEUE_MUST; |
3549 |
-+ } |
3550 |
-+ |
3551 |
-+ return ELV_MQUEUE_MAY; |
3552 |
-+} |
3553 |
-+ |
3554 |
-+static int bfq_may_queue(struct request_queue *q, int rw) |
3555 |
-+{ |
3556 |
-+ struct bfq_data *bfqd = q->elevator->elevator_data; |
3557 |
-+ struct task_struct *tsk = current; |
3558 |
-+ struct bfq_io_cq *bic; |
3559 |
-+ struct bfq_queue *bfqq; |
3560 |
-+ |
3561 |
-+ /* |
3562 |
-+ * Don't force setup of a queue from here, as a call to may_queue |
3563 |
-+ * does not necessarily imply that a request actually will be queued. |
3564 |
-+ * So just lookup a possibly existing queue, or return 'may queue' |
3565 |
-+ * if that fails. |
3566 |
-+ */ |
3567 |
-+ bic = bfq_bic_lookup(bfqd, tsk->io_context); |
3568 |
-+ if (bic == NULL) |
3569 |
-+ return ELV_MQUEUE_MAY; |
3570 |
-+ |
3571 |
-+ bfqq = bic_to_bfqq(bic, rw_is_sync(rw)); |
3572 |
-+ if (bfqq != NULL) { |
3573 |
-+ bfq_init_prio_data(bfqq, bic); |
3574 |
-+ |
3575 |
-+ return __bfq_may_queue(bfqq); |
3576 |
-+ } |
3577 |
-+ |
3578 |
-+ return ELV_MQUEUE_MAY; |
3579 |
-+} |
3580 |
-+ |
3581 |
-+/* |
3582 |
-+ * Queue lock held here. |
3583 |
-+ */ |
3584 |
-+static void bfq_put_request(struct request *rq) |
3585 |
-+{ |
3586 |
-+ struct bfq_queue *bfqq = RQ_BFQQ(rq); |
3587 |
-+ |
3588 |
-+ if (bfqq != NULL) { |
3589 |
-+ const int rw = rq_data_dir(rq); |
3590 |
-+ |
3591 |
-+ BUG_ON(!bfqq->allocated[rw]); |
3592 |
-+ bfqq->allocated[rw]--; |
3593 |
-+ |
3594 |
-+ rq->elv.priv[0] = NULL; |
3595 |
-+ rq->elv.priv[1] = NULL; |
3596 |
-+ |
3597 |
-+ bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d", |
3598 |
-+ bfqq, atomic_read(&bfqq->ref)); |
3599 |
-+ bfq_put_queue(bfqq); |
3600 |
-+ } |
3601 |
-+} |
3602 |
-+ |
3603 |
-+static struct bfq_queue * |
3604 |
-+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, |
3605 |
-+ struct bfq_queue *bfqq) |
3606 |
-+{ |
3607 |
-+ bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", |
3608 |
-+ (long unsigned)bfqq->new_bfqq->pid); |
3609 |
-+ bic_set_bfqq(bic, bfqq->new_bfqq, 1); |
3610 |
-+ bfq_mark_bfqq_coop(bfqq->new_bfqq); |
3611 |
-+ bfq_put_queue(bfqq); |
3612 |
-+ return bic_to_bfqq(bic, 1); |
3613 |
-+} |
3614 |
-+ |
3615 |
-+/* |
3616 |
-+ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this |
3617 |
-+ * was the last process referring to said bfqq. |
3618 |
-+ */ |
3619 |
-+static struct bfq_queue * |
3620 |
-+bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) |
3621 |
-+{ |
3622 |
-+ bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); |
3623 |
-+ if (bfqq_process_refs(bfqq) == 1) { |
3624 |
-+ bfqq->pid = current->pid; |
3625 |
-+ bfq_clear_bfqq_some_coop_idle(bfqq); |
3626 |
-+ bfq_clear_bfqq_coop(bfqq); |
3627 |
-+ bfq_clear_bfqq_split_coop(bfqq); |
3628 |
-+ return bfqq; |
3629 |
-+ } |
3630 |
-+ |
3631 |
-+ bic_set_bfqq(bic, NULL, 1); |
3632 |
-+ |
3633 |
-+ bfq_put_cooperator(bfqq); |
3634 |
-+ |
3635 |
-+ bfq_put_queue(bfqq); |
3636 |
-+ return NULL; |
3637 |
-+} |
3638 |
-+ |
3639 |
-+/* |
3640 |
-+ * Allocate bfq data structures associated with this request. |
3641 |
-+ */ |
3642 |
-+static int bfq_set_request(struct request_queue *q, struct request *rq, |
3643 |
-+ struct bio *bio, gfp_t gfp_mask) |
3644 |
-+{ |
3645 |
-+ struct bfq_data *bfqd = q->elevator->elevator_data; |
3646 |
-+ struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq); |
3647 |
-+ const int rw = rq_data_dir(rq); |
3648 |
-+ const int is_sync = rq_is_sync(rq); |
3649 |
-+ struct bfq_queue *bfqq; |
3650 |
-+ struct bfq_group *bfqg; |
3651 |
-+ unsigned long flags; |
3652 |
-+ |
3653 |
-+ might_sleep_if(gfp_mask & __GFP_WAIT); |
3654 |
-+ |
3655 |
-+ bfq_changed_ioprio(bic); |
3656 |
-+ |
3657 |
-+ spin_lock_irqsave(q->queue_lock, flags); |
3658 |
-+ |
3659 |
-+ if (bic == NULL) |
3660 |
-+ goto queue_fail; |
3661 |
-+ |
3662 |
-+ bfqg = bfq_bic_update_cgroup(bic); |
3663 |
-+ |
3664 |
-+new_queue: |
3665 |
-+ bfqq = bic_to_bfqq(bic, is_sync); |
3666 |
-+ if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { |
3667 |
-+ bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask); |
3668 |
-+ bic_set_bfqq(bic, bfqq, is_sync); |
3669 |
-+ } else { |
3670 |
-+ /* |
3671 |
-+ * If the queue was seeky for too long, break it apart. |
3672 |
-+ */ |
3673 |
-+ if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { |
3674 |
-+ bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); |
3675 |
-+ bfqq = bfq_split_bfqq(bic, bfqq); |
3676 |
-+ if (!bfqq) |
3677 |
-+ goto new_queue; |
3678 |
-+ } |
3679 |
-+ |
3680 |
-+ /* |
3681 |
-+ * Check to see if this queue is scheduled to merge with |
3682 |
-+ * another closely cooperating queue. The merging of queues |
3683 |
-+ * happens here as it must be done in process context. |
3684 |
-+ * The reference on new_bfqq was taken in merge_bfqqs. |
3685 |
-+ */ |
3686 |
-+ if (bfqq->new_bfqq != NULL) |
3687 |
-+ bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq); |
3688 |
-+ } |
3689 |
-+ |
3690 |
-+ bfqq->allocated[rw]++; |
3691 |
-+ atomic_inc(&bfqq->ref); |
3692 |
-+ bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, |
3693 |
-+ atomic_read(&bfqq->ref)); |
3694 |
-+ |
3695 |
-+ rq->elv.priv[0] = bic; |
3696 |
-+ rq->elv.priv[1] = bfqq; |
3697 |
-+ |
3698 |
-+ spin_unlock_irqrestore(q->queue_lock, flags); |
3699 |
-+ |
3700 |
-+ return 0; |
3701 |
-+ |
3702 |
-+queue_fail: |
3703 |
-+ bfq_schedule_dispatch(bfqd); |
3704 |
-+ spin_unlock_irqrestore(q->queue_lock, flags); |
3705 |
-+ |
3706 |
-+ return 1; |
3707 |
-+} |
3708 |
-+ |
3709 |
-+static void bfq_kick_queue(struct work_struct *work) |
3710 |
-+{ |
3711 |
-+ struct bfq_data *bfqd = |
3712 |
-+ container_of(work, struct bfq_data, unplug_work); |
3713 |
-+ struct request_queue *q = bfqd->queue; |
3714 |
-+ |
3715 |
-+ spin_lock_irq(q->queue_lock); |
3716 |
-+ __blk_run_queue(q); |
3717 |
-+ spin_unlock_irq(q->queue_lock); |
3718 |
-+} |
3719 |
-+ |
3720 |
-+/* |
3721 |
-+ * Handler of the expiration of the timer running if the active_queue |
3722 |
-+ * is idling inside its time slice. |
3723 |
-+ */ |
3724 |
-+static void bfq_idle_slice_timer(unsigned long data) |
3725 |
-+{ |
3726 |
-+ struct bfq_data *bfqd = (struct bfq_data *)data; |
3727 |
-+ struct bfq_queue *bfqq; |
3728 |
-+ unsigned long flags; |
3729 |
-+ enum bfqq_expiration reason; |
3730 |
-+ |
3731 |
-+ spin_lock_irqsave(bfqd->queue->queue_lock, flags); |
3732 |
-+ |
3733 |
-+ bfqq = bfqd->active_queue; |
3734 |
-+ /* |
3735 |
-+ * Theoretical race here: active_queue can be NULL or different |
3736 |
-+ * from the queue that was idling if the timer handler spins on |
3737 |
-+ * the queue_lock and a new request arrives for the current |
3738 |
-+ * queue and there is a full dispatch cycle that changes the |
3739 |
-+ * active_queue. This can hardly happen, but in the worst case |
3740 |
-+ * we just expire a queue too early. |
3741 |
-+ */ |
3742 |
-+ if (bfqq != NULL) { |
3743 |
-+ bfq_log_bfqq(bfqd, bfqq, "slice_timer expired"); |
3744 |
-+ if (bfq_bfqq_budget_timeout(bfqq)) |
3745 |
-+ /* |
3746 |
-+ * Also here the queue can be safely expired |
3747 |
-+ * for budget timeout without wasting |
3748 |
-+ * guarantees |
3749 |
-+ */ |
3750 |
-+ reason = BFQ_BFQQ_BUDGET_TIMEOUT; |
3751 |
-+ else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0) |
3752 |
-+ /* |
3753 |
-+ * The queue may not be empty upon timer expiration, |
3754 |
-+ * because we may not disable the timer when the first |
3755 |
-+ * request of the active queue arrives during |
3756 |
-+ * disk idling |
3757 |
-+ */ |
3758 |
-+ reason = BFQ_BFQQ_TOO_IDLE; |
3759 |
-+ else |
3760 |
-+ goto schedule_dispatch; |
3761 |
-+ |
3762 |
-+ bfq_bfqq_expire(bfqd, bfqq, 1, reason); |
3763 |
-+ } |
3764 |
-+ |
3765 |
-+schedule_dispatch: |
3766 |
-+ bfq_schedule_dispatch(bfqd); |
3767 |
-+ |
3768 |
-+ spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); |
3769 |
-+} |
3770 |
-+ |
3771 |
-+static void bfq_shutdown_timer_wq(struct bfq_data *bfqd) |
3772 |
-+{ |
3773 |
-+ del_timer_sync(&bfqd->idle_slice_timer); |
3774 |
-+ cancel_work_sync(&bfqd->unplug_work); |
3775 |
-+} |
3776 |
-+ |
3777 |
-+static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd, |
3778 |
-+ struct bfq_queue **bfqq_ptr) |
3779 |
-+{ |
3780 |
-+ struct bfq_group *root_group = bfqd->root_group; |
3781 |
-+ struct bfq_queue *bfqq = *bfqq_ptr; |
3782 |
-+ |
3783 |
-+ bfq_log(bfqd, "put_async_bfqq: %p", bfqq); |
3784 |
-+ if (bfqq != NULL) { |
3785 |
-+ bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group); |
3786 |
-+ bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", |
3787 |
-+ bfqq, atomic_read(&bfqq->ref)); |
3788 |
-+ bfq_put_queue(bfqq); |
3789 |
-+ *bfqq_ptr = NULL; |
3790 |
-+ } |
3791 |
-+} |
3792 |
-+ |
3793 |
-+/* |
3794 |
-+ * Release all the bfqg references to its async queues. If we are |
3795 |
-+ * deallocating the group these queues may still contain requests, so |
3796 |
-+ * we reparent them to the root cgroup (i.e., the only one that will |
3797 |
-+ * exist for sure untill all the requests on a device are gone). |
3798 |
-+ */ |
3799 |
-+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) |
3800 |
-+{ |
3801 |
-+ int i, j; |
3802 |
-+ |
3803 |
-+ for (i = 0; i < 2; i++) |
3804 |
-+ for (j = 0; j < IOPRIO_BE_NR; j++) |
3805 |
-+ __bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]); |
3806 |
-+ |
3807 |
-+ __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq); |
3808 |
-+} |
3809 |
-+ |
3810 |
-+static void bfq_exit_queue(struct elevator_queue *e) |
3811 |
-+{ |
3812 |
-+ struct bfq_data *bfqd = e->elevator_data; |
3813 |
-+ struct request_queue *q = bfqd->queue; |
3814 |
-+ struct bfq_queue *bfqq, *n; |
3815 |
-+ |
3816 |
-+ bfq_shutdown_timer_wq(bfqd); |
3817 |
-+ |
3818 |
-+ spin_lock_irq(q->queue_lock); |
3819 |
-+ |
3820 |
-+ BUG_ON(bfqd->active_queue != NULL); |
3821 |
-+ list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) |
3822 |
-+ bfq_deactivate_bfqq(bfqd, bfqq, 0); |
3823 |
-+ |
3824 |
-+ bfq_disconnect_groups(bfqd); |
3825 |
-+ spin_unlock_irq(q->queue_lock); |
3826 |
-+ |
3827 |
-+ bfq_shutdown_timer_wq(bfqd); |
3828 |
-+ |
3829 |
-+ synchronize_rcu(); |
3830 |
-+ |
3831 |
-+ BUG_ON(timer_pending(&bfqd->idle_slice_timer)); |
3832 |
-+ |
3833 |
-+ bfq_free_root_group(bfqd); |
3834 |
-+ kfree(bfqd); |
3835 |
-+} |
3836 |
-+ |
3837 |
-+static int bfq_init_queue(struct request_queue *q) |
3838 |
-+{ |
3839 |
-+ struct bfq_group *bfqg; |
3840 |
-+ struct bfq_data *bfqd; |
3841 |
-+ |
3842 |
-+ bfqd = kmalloc_node(sizeof(*bfqd), GFP_KERNEL | __GFP_ZERO, q->node); |
3843 |
-+ if (bfqd == NULL) |
3844 |
-+ return -ENOMEM; |
3845 |
-+ |
3846 |
-+ /* |
3847 |
-+ * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues. |
3848 |
-+ * Grab a permanent reference to it, so that the normal code flow |
3849 |
-+ * will not attempt to free it. |
3850 |
-+ */ |
3851 |
-+ bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0); |
3852 |
-+ atomic_inc(&bfqd->oom_bfqq.ref); |
3853 |
-+ |
3854 |
-+ bfqd->queue = q; |
3855 |
-+ q->elevator->elevator_data = bfqd; |
3856 |
-+ |
3857 |
-+ bfqg = bfq_alloc_root_group(bfqd, q->node); |
3858 |
-+ if (bfqg == NULL) { |
3859 |
-+ kfree(bfqd); |
3860 |
-+ return -ENOMEM; |
3861 |
-+ } |
3862 |
-+ |
3863 |
-+ bfqd->root_group = bfqg; |
3864 |
-+ |
3865 |
-+ init_timer(&bfqd->idle_slice_timer); |
3866 |
-+ bfqd->idle_slice_timer.function = bfq_idle_slice_timer; |
3867 |
-+ bfqd->idle_slice_timer.data = (unsigned long)bfqd; |
3868 |
-+ |
3869 |
-+ bfqd->rq_pos_tree = RB_ROOT; |
3870 |
-+ |
3871 |
-+ INIT_WORK(&bfqd->unplug_work, bfq_kick_queue); |
3872 |
-+ |
3873 |
-+ INIT_LIST_HEAD(&bfqd->active_list); |
3874 |
-+ INIT_LIST_HEAD(&bfqd->idle_list); |
3875 |
-+ |
3876 |
-+ bfqd->hw_tag = -1; |
3877 |
-+ |
3878 |
-+ bfqd->bfq_max_budget = bfq_default_max_budget; |
3879 |
-+ |
3880 |
-+ bfqd->bfq_quantum = bfq_quantum; |
3881 |
-+ bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0]; |
3882 |
-+ bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1]; |
3883 |
-+ bfqd->bfq_back_max = bfq_back_max; |
3884 |
-+ bfqd->bfq_back_penalty = bfq_back_penalty; |
3885 |
-+ bfqd->bfq_slice_idle = bfq_slice_idle; |
3886 |
-+ bfqd->bfq_class_idle_last_service = 0; |
3887 |
-+ bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq; |
3888 |
-+ bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async; |
3889 |
-+ bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync; |
3890 |
-+ |
3891 |
-+ bfqd->low_latency = true; |
3892 |
-+ |
3893 |
-+ bfqd->bfq_raising_coeff = 20; |
3894 |
-+ bfqd->bfq_raising_rt_max_time = msecs_to_jiffies(300); |
3895 |
-+ bfqd->bfq_raising_max_time = 0; |
3896 |
-+ bfqd->bfq_raising_min_idle_time = msecs_to_jiffies(2000); |
3897 |
-+ bfqd->bfq_raising_min_inter_arr_async = msecs_to_jiffies(500); |
3898 |
-+ bfqd->bfq_raising_max_softrt_rate = 7000; |
3899 |
-+ |
3900 |
-+ /* Initially estimate the device's peak rate as the reference rate */ |
3901 |
-+ if (blk_queue_nonrot(bfqd->queue)) { |
3902 |
-+ bfqd->RT_prod = R_nonrot * T_nonrot; |
3903 |
-+ bfqd->peak_rate = R_nonrot; |
3904 |
-+ } else { |
3905 |
-+ bfqd->RT_prod = R_rot * T_rot; |
3906 |
-+ bfqd->peak_rate = R_rot; |
3907 |
-+ } |
3908 |
-+ |
3909 |
-+ return 0; |
3910 |
-+} |
3911 |
-+ |
3912 |
-+static void bfq_slab_kill(void) |
3913 |
-+{ |
3914 |
-+ if (bfq_pool != NULL) |
3915 |
-+ kmem_cache_destroy(bfq_pool); |
3916 |
-+} |
3917 |
-+ |
3918 |
-+static int __init bfq_slab_setup(void) |
3919 |
-+{ |
3920 |
-+ bfq_pool = KMEM_CACHE(bfq_queue, 0); |
3921 |
-+ if (bfq_pool == NULL) |
3922 |
-+ return -ENOMEM; |
3923 |
-+ return 0; |
3924 |
-+} |
3925 |
-+ |
3926 |
-+static ssize_t bfq_var_show(unsigned int var, char *page) |
3927 |
-+{ |
3928 |
-+ return sprintf(page, "%d\n", var); |
3929 |
-+} |
3930 |
-+ |
3931 |
-+static ssize_t bfq_var_store(unsigned long *var, const char *page, size_t count) |
3932 |
-+{ |
3933 |
-+ unsigned long new_val; |
3934 |
-+ int ret = strict_strtoul(page, 10, &new_val); |
3935 |
-+ |
3936 |
-+ if (ret == 0) |
3937 |
-+ *var = new_val; |
3938 |
-+ |
3939 |
-+ return count; |
3940 |
-+} |
3941 |
-+ |
3942 |
-+static ssize_t bfq_raising_max_time_show(struct elevator_queue *e, char *page) |
3943 |
-+{ |
3944 |
-+ struct bfq_data *bfqd = e->elevator_data; |
3945 |
-+ return sprintf(page, "%d\n", bfqd->bfq_raising_max_time > 0 ? |
3946 |
-+ jiffies_to_msecs(bfqd->bfq_raising_max_time) : |
3947 |
-+ jiffies_to_msecs(bfq_wrais_duration(bfqd))); |
3948 |
-+} |
3949 |
-+ |
3950 |
-+static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) |
3951 |
-+{ |
3952 |
-+ struct bfq_queue *bfqq; |
3953 |
-+ struct bfq_data *bfqd = e->elevator_data; |
3954 |
-+ ssize_t num_char = 0; |
3955 |
-+ |
3956 |
-+ num_char += sprintf(page + num_char, "Tot reqs queued %d\n\n", |
3957 |
-+ bfqd->queued); |
3958 |
-+ |
3959 |
-+ spin_lock_irq(bfqd->queue->queue_lock); |
3960 |
-+ |
3961 |
-+ num_char += sprintf(page + num_char, "Active:\n"); |
3962 |
-+ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) { |
3963 |
-+ num_char += sprintf(page + num_char, |
3964 |
-+ "pid%d: weight %hu, nr_queued %d %d," |
3965 |
-+ " dur %d/%u\n", |
3966 |
-+ bfqq->pid, |
3967 |
-+ bfqq->entity.weight, |
3968 |
-+ bfqq->queued[0], |
3969 |
-+ bfqq->queued[1], |
3970 |
-+ jiffies_to_msecs(jiffies - |
3971 |
-+ bfqq->last_rais_start_finish), |
3972 |
-+ jiffies_to_msecs(bfqq->raising_cur_max_time)); |
3973 |
-+ } |
3974 |
-+ |
3975 |
-+ num_char += sprintf(page + num_char, "Idle:\n"); |
3976 |
-+ list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) { |
3977 |
-+ num_char += sprintf(page + num_char, |
3978 |
-+ "pid%d: weight %hu, dur %d/%u\n", |
3979 |
-+ bfqq->pid, |
3980 |
-+ bfqq->entity.weight, |
3981 |
-+ jiffies_to_msecs(jiffies - |
3982 |
-+ bfqq->last_rais_start_finish), |
3983 |
-+ jiffies_to_msecs(bfqq->raising_cur_max_time)); |
3984 |
-+ } |
3985 |
-+ |
3986 |
-+ spin_unlock_irq(bfqd->queue->queue_lock); |
3987 |
-+ |
3988 |
-+ return num_char; |
3989 |
-+} |
3990 |
-+ |
3991 |
-+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ |
3992 |
-+static ssize_t __FUNC(struct elevator_queue *e, char *page) \ |
3993 |
-+{ \ |
3994 |
-+ struct bfq_data *bfqd = e->elevator_data; \ |
3995 |
-+ unsigned int __data = __VAR; \ |
3996 |
-+ if (__CONV) \ |
3997 |
-+ __data = jiffies_to_msecs(__data); \ |
3998 |
-+ return bfq_var_show(__data, (page)); \ |
3999 |
-+} |
4000 |
-+SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0); |
4001 |
-+SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1); |
4002 |
-+SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1); |
4003 |
-+SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0); |
4004 |
-+SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0); |
4005 |
-+SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1); |
4006 |
-+SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); |
4007 |
-+SHOW_FUNCTION(bfq_max_budget_async_rq_show, bfqd->bfq_max_budget_async_rq, 0); |
4008 |
-+SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1); |
4009 |
-+SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1); |
4010 |
-+SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0); |
4011 |
-+SHOW_FUNCTION(bfq_raising_coeff_show, bfqd->bfq_raising_coeff, 0); |
4012 |
-+SHOW_FUNCTION(bfq_raising_rt_max_time_show, bfqd->bfq_raising_rt_max_time, 1); |
4013 |
-+SHOW_FUNCTION(bfq_raising_min_idle_time_show, bfqd->bfq_raising_min_idle_time, |
4014 |
-+ 1); |
4015 |
-+SHOW_FUNCTION(bfq_raising_min_inter_arr_async_show, |
4016 |
-+ bfqd->bfq_raising_min_inter_arr_async, |
4017 |
-+ 1); |
4018 |
-+SHOW_FUNCTION(bfq_raising_max_softrt_rate_show, |
4019 |
-+ bfqd->bfq_raising_max_softrt_rate, 0); |
4020 |
-+#undef SHOW_FUNCTION |
4021 |
-+ |
4022 |
-+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ |
4023 |
-+static ssize_t \ |
4024 |
-+__FUNC(struct elevator_queue *e, const char *page, size_t count) \ |
4025 |
-+{ \ |
4026 |
-+ struct bfq_data *bfqd = e->elevator_data; \ |
4027 |
-+ unsigned long uninitialized_var(__data); \ |
4028 |
-+ int ret = bfq_var_store(&__data, (page), count); \ |
4029 |
-+ if (__data < (MIN)) \ |
4030 |
-+ __data = (MIN); \ |
4031 |
-+ else if (__data > (MAX)) \ |
4032 |
-+ __data = (MAX); \ |
4033 |
-+ if (__CONV) \ |
4034 |
-+ *(__PTR) = msecs_to_jiffies(__data); \ |
4035 |
-+ else \ |
4036 |
-+ *(__PTR) = __data; \ |
4037 |
-+ return ret; \ |
4038 |
-+} |
4039 |
-+STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0); |
4040 |
-+STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1, |
4041 |
-+ INT_MAX, 1); |
4042 |
-+STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1, |
4043 |
-+ INT_MAX, 1); |
4044 |
-+STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0); |
4045 |
-+STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1, |
4046 |
-+ INT_MAX, 0); |
4047 |
-+STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1); |
4048 |
-+STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq, |
4049 |
-+ 1, INT_MAX, 0); |
4050 |
-+STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0, |
4051 |
-+ INT_MAX, 1); |
4052 |
-+STORE_FUNCTION(bfq_raising_coeff_store, &bfqd->bfq_raising_coeff, 1, |
4053 |
-+ INT_MAX, 0); |
4054 |
-+STORE_FUNCTION(bfq_raising_max_time_store, &bfqd->bfq_raising_max_time, 0, |
4055 |
-+ INT_MAX, 1); |
4056 |
-+STORE_FUNCTION(bfq_raising_rt_max_time_store, &bfqd->bfq_raising_rt_max_time, 0, |
4057 |
-+ INT_MAX, 1); |
4058 |
-+STORE_FUNCTION(bfq_raising_min_idle_time_store, |
4059 |
-+ &bfqd->bfq_raising_min_idle_time, 0, INT_MAX, 1); |
4060 |
-+STORE_FUNCTION(bfq_raising_min_inter_arr_async_store, |
4061 |
-+ &bfqd->bfq_raising_min_inter_arr_async, 0, INT_MAX, 1); |
4062 |
-+STORE_FUNCTION(bfq_raising_max_softrt_rate_store, |
4063 |
-+ &bfqd->bfq_raising_max_softrt_rate, 0, INT_MAX, 0); |
4064 |
-+#undef STORE_FUNCTION |
4065 |
-+ |
4066 |
-+/* do nothing for the moment */ |
4067 |
-+static ssize_t bfq_weights_store(struct elevator_queue *e, |
4068 |
-+ const char *page, size_t count) |
4069 |
-+{ |
4070 |
-+ return count; |
4071 |
-+} |
4072 |
-+ |
4073 |
-+static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd) |
4074 |
-+{ |
4075 |
-+ u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); |
4076 |
-+ |
4077 |
-+ if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES) |
4078 |
-+ return bfq_calc_max_budget(bfqd->peak_rate, timeout); |
4079 |
-+ else |
4080 |
-+ return bfq_default_max_budget; |
4081 |
-+} |
4082 |
-+ |
4083 |
-+static ssize_t bfq_max_budget_store(struct elevator_queue *e, |
4084 |
-+ const char *page, size_t count) |
4085 |
-+{ |
4086 |
-+ struct bfq_data *bfqd = e->elevator_data; |
4087 |
-+ unsigned long uninitialized_var(__data); |
4088 |
-+ int ret = bfq_var_store(&__data, (page), count); |
4089 |
-+ |
4090 |
-+ if (__data == 0) |
4091 |
-+ bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); |
4092 |
-+ else { |
4093 |
-+ if (__data > INT_MAX) |
4094 |
-+ __data = INT_MAX; |
4095 |
-+ bfqd->bfq_max_budget = __data; |
4096 |
-+ } |
4097 |
-+ |
4098 |
-+ bfqd->bfq_user_max_budget = __data; |
4099 |
-+ |
4100 |
-+ return ret; |
4101 |
-+} |
4102 |
-+ |
4103 |
-+static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, |
4104 |
-+ const char *page, size_t count) |
4105 |
-+{ |
4106 |
-+ struct bfq_data *bfqd = e->elevator_data; |
4107 |
-+ unsigned long uninitialized_var(__data); |
4108 |
-+ int ret = bfq_var_store(&__data, (page), count); |
4109 |
-+ |
4110 |
-+ if (__data < 1) |
4111 |
-+ __data = 1; |
4112 |
-+ else if (__data > INT_MAX) |
4113 |
-+ __data = INT_MAX; |
4114 |
-+ |
4115 |
-+ bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data); |
4116 |
-+ if (bfqd->bfq_user_max_budget == 0) |
4117 |
-+ bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); |
4118 |
-+ |
4119 |
-+ return ret; |
4120 |
-+} |
4121 |
-+ |
4122 |
-+static ssize_t bfq_low_latency_store(struct elevator_queue *e, |
4123 |
-+ const char *page, size_t count) |
4124 |
-+{ |
4125 |
-+ struct bfq_data *bfqd = e->elevator_data; |
4126 |
-+ unsigned long uninitialized_var(__data); |
4127 |
-+ int ret = bfq_var_store(&__data, (page), count); |
4128 |
-+ |
4129 |
-+ if (__data > 1) |
4130 |
-+ __data = 1; |
4131 |
-+ if (__data == 0 && bfqd->low_latency != 0) |
4132 |
-+ bfq_end_raising(bfqd); |
4133 |
-+ bfqd->low_latency = __data; |
4134 |
-+ |
4135 |
-+ return ret; |
4136 |
-+} |
4137 |
-+ |
4138 |
-+#define BFQ_ATTR(name) \ |
4139 |
-+ __ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store) |
4140 |
-+ |
4141 |
-+static struct elv_fs_entry bfq_attrs[] = { |
4142 |
-+ BFQ_ATTR(quantum), |
4143 |
-+ BFQ_ATTR(fifo_expire_sync), |
4144 |
-+ BFQ_ATTR(fifo_expire_async), |
4145 |
-+ BFQ_ATTR(back_seek_max), |
4146 |
-+ BFQ_ATTR(back_seek_penalty), |
4147 |
-+ BFQ_ATTR(slice_idle), |
4148 |
-+ BFQ_ATTR(max_budget), |
4149 |
-+ BFQ_ATTR(max_budget_async_rq), |
4150 |
-+ BFQ_ATTR(timeout_sync), |
4151 |
-+ BFQ_ATTR(timeout_async), |
4152 |
-+ BFQ_ATTR(low_latency), |
4153 |
-+ BFQ_ATTR(raising_coeff), |
4154 |
-+ BFQ_ATTR(raising_max_time), |
4155 |
-+ BFQ_ATTR(raising_rt_max_time), |
4156 |
-+ BFQ_ATTR(raising_min_idle_time), |
4157 |
-+ BFQ_ATTR(raising_min_inter_arr_async), |
4158 |
-+ BFQ_ATTR(raising_max_softrt_rate), |
4159 |
-+ BFQ_ATTR(weights), |
4160 |
-+ __ATTR_NULL |
4161 |
-+}; |
4162 |
-+ |
4163 |
-+static struct elevator_type iosched_bfq = { |
4164 |
-+ .ops = { |
4165 |
-+ .elevator_merge_fn = bfq_merge, |
4166 |
-+ .elevator_merged_fn = bfq_merged_request, |
4167 |
-+ .elevator_merge_req_fn = bfq_merged_requests, |
4168 |
-+ .elevator_allow_merge_fn = bfq_allow_merge, |
4169 |
-+ .elevator_dispatch_fn = bfq_dispatch_requests, |
4170 |
-+ .elevator_add_req_fn = bfq_insert_request, |
4171 |
-+ .elevator_activate_req_fn = bfq_activate_request, |
4172 |
-+ .elevator_deactivate_req_fn = bfq_deactivate_request, |
4173 |
-+ .elevator_completed_req_fn = bfq_completed_request, |
4174 |
-+ .elevator_former_req_fn = elv_rb_former_request, |
4175 |
-+ .elevator_latter_req_fn = elv_rb_latter_request, |
4176 |
-+ .elevator_init_icq_fn = bfq_init_icq, |
4177 |
-+ .elevator_exit_icq_fn = bfq_exit_icq, |
4178 |
-+ .elevator_set_req_fn = bfq_set_request, |
4179 |
-+ .elevator_put_req_fn = bfq_put_request, |
4180 |
-+ .elevator_may_queue_fn = bfq_may_queue, |
4181 |
-+ .elevator_init_fn = bfq_init_queue, |
4182 |
-+ .elevator_exit_fn = bfq_exit_queue, |
4183 |
-+ }, |
4184 |
-+ .icq_size = sizeof(struct bfq_io_cq), |
4185 |
-+ .icq_align = __alignof__(struct bfq_io_cq), |
4186 |
-+ .elevator_attrs = bfq_attrs, |
4187 |
-+ .elevator_name = "bfq", |
4188 |
-+ .elevator_owner = THIS_MODULE, |
4189 |
-+}; |
4190 |
-+ |
4191 |
-+static int __init bfq_init(void) |
4192 |
-+{ |
4193 |
-+ /* |
4194 |
-+ * Can be 0 on HZ < 1000 setups. |
4195 |
-+ */ |
4196 |
-+ if (bfq_slice_idle == 0) |
4197 |
-+ bfq_slice_idle = 1; |
4198 |
-+ |
4199 |
-+ if (bfq_timeout_async == 0) |
4200 |
-+ bfq_timeout_async = 1; |
4201 |
-+ |
4202 |
-+ if (bfq_slab_setup()) |
4203 |
-+ return -ENOMEM; |
4204 |
-+ |
4205 |
-+ elv_register(&iosched_bfq); |
4206 |
-+ |
4207 |
-+ return 0; |
4208 |
-+} |
4209 |
-+ |
4210 |
-+static void __exit bfq_exit(void) |
4211 |
-+{ |
4212 |
-+ elv_unregister(&iosched_bfq); |
4213 |
-+ bfq_slab_kill(); |
4214 |
-+} |
4215 |
-+ |
4216 |
-+module_init(bfq_init); |
4217 |
-+module_exit(bfq_exit); |
4218 |
-+ |
4219 |
-+MODULE_AUTHOR("Fabio Checconi, Paolo Valente"); |
4220 |
-+MODULE_LICENSE("GPL"); |
4221 |
-+MODULE_DESCRIPTION("Budget Fair Queueing IO scheduler"); |
4222 |
-diff --git a/block/bfq-sched.c b/block/bfq-sched.c |
4223 |
-new file mode 100644 |
4224 |
-index 0000000..03f8061 |
4225 |
---- /dev/null |
4226 |
-+++ b/block/bfq-sched.c |
4227 |
-@@ -0,0 +1,1072 @@ |
4228 |
-+/* |
4229 |
-+ * BFQ: Hierarchical B-WF2Q+ scheduler. |
4230 |
-+ * |
4231 |
-+ * Based on ideas and code from CFQ: |
4232 |
-+ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk> |
4233 |
-+ * |
4234 |
-+ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it> |
4235 |
-+ * Paolo Valente <paolo.valente@×××××××.it> |
4236 |
-+ * |
4237 |
-+ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it> |
4238 |
-+ */ |
4239 |
-+ |
4240 |
-+#ifdef CONFIG_CGROUP_BFQIO |
4241 |
-+#define for_each_entity(entity) \ |
4242 |
-+ for (; entity != NULL; entity = entity->parent) |
4243 |
-+ |
4244 |
-+#define for_each_entity_safe(entity, parent) \ |
4245 |
-+ for (; entity && ({ parent = entity->parent; 1; }); entity = parent) |
4246 |
-+ |
4247 |
-+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, |
4248 |
-+ int extract, |
4249 |
-+ struct bfq_data *bfqd); |
4250 |
-+ |
4251 |
-+static inline void bfq_update_budget(struct bfq_entity *next_active) |
4252 |
-+{ |
4253 |
-+ struct bfq_entity *bfqg_entity; |
4254 |
-+ struct bfq_group *bfqg; |
4255 |
-+ struct bfq_sched_data *group_sd; |
4256 |
-+ |
4257 |
-+ BUG_ON(next_active == NULL); |
4258 |
-+ |
4259 |
-+ group_sd = next_active->sched_data; |
4260 |
-+ |
4261 |
-+ bfqg = container_of(group_sd, struct bfq_group, sched_data); |
4262 |
-+ /* |
4263 |
-+ * bfq_group's my_entity field is not NULL only if the group |
4264 |
-+ * is not the root group. We must not touch the root entity |
4265 |
-+ * as it must never become an active entity. |
4266 |
-+ */ |
4267 |
-+ bfqg_entity = bfqg->my_entity; |
4268 |
-+ if (bfqg_entity != NULL) |
4269 |
-+ bfqg_entity->budget = next_active->budget; |
4270 |
-+} |
4271 |
-+ |
4272 |
-+static int bfq_update_next_active(struct bfq_sched_data *sd) |
4273 |
-+{ |
4274 |
-+ struct bfq_entity *next_active; |
4275 |
-+ |
4276 |
-+ if (sd->active_entity != NULL) |
4277 |
-+ /* will update/requeue at the end of service */ |
4278 |
-+ return 0; |
4279 |
-+ |
4280 |
-+ /* |
4281 |
-+ * NOTE: this can be improved in many ways, such as returning |
4282 |
-+ * 1 (and thus propagating upwards the update) only when the |
4283 |
-+ * budget changes, or caching the bfqq that will be scheduled |
4284 |
-+ * next from this subtree. By now we worry more about |
4285 |
-+ * correctness than about performance... |
4286 |
-+ */ |
4287 |
-+ next_active = bfq_lookup_next_entity(sd, 0, NULL); |
4288 |
-+ sd->next_active = next_active; |
4289 |
-+ |
4290 |
-+ if (next_active != NULL) |
4291 |
-+ bfq_update_budget(next_active); |
4292 |
-+ |
4293 |
-+ return 1; |
4294 |
-+} |
4295 |
-+ |
4296 |
-+static inline void bfq_check_next_active(struct bfq_sched_data *sd, |
4297 |
-+ struct bfq_entity *entity) |
4298 |
-+{ |
4299 |
-+ BUG_ON(sd->next_active != entity); |
4300 |
-+} |
4301 |
-+#else |
4302 |
-+#define for_each_entity(entity) \ |
4303 |
-+ for (; entity != NULL; entity = NULL) |
4304 |
-+ |
4305 |
-+#define for_each_entity_safe(entity, parent) \ |
4306 |
-+ for (parent = NULL; entity != NULL; entity = parent) |
4307 |
-+ |
4308 |
-+static inline int bfq_update_next_active(struct bfq_sched_data *sd) |
4309 |
-+{ |
4310 |
-+ return 0; |
4311 |
-+} |
4312 |
-+ |
4313 |
-+static inline void bfq_check_next_active(struct bfq_sched_data *sd, |
4314 |
-+ struct bfq_entity *entity) |
4315 |
-+{ |
4316 |
-+} |
4317 |
-+ |
4318 |
-+static inline void bfq_update_budget(struct bfq_entity *next_active) |
4319 |
-+{ |
4320 |
-+} |
4321 |
-+#endif |
4322 |
-+ |
4323 |
-+/* |
4324 |
-+ * Shift for timestamp calculations. This actually limits the maximum |
4325 |
-+ * service allowed in one timestamp delta (small shift values increase it), |
4326 |
-+ * the maximum total weight that can be used for the queues in the system |
4327 |
-+ * (big shift values increase it), and the period of virtual time wraparounds. |
4328 |
-+ */ |
4329 |
-+#define WFQ_SERVICE_SHIFT 22 |
4330 |
-+ |
4331 |
-+/** |
4332 |
-+ * bfq_gt - compare two timestamps. |
4333 |
-+ * @a: first ts. |
4334 |
-+ * @b: second ts. |
4335 |
-+ * |
4336 |
-+ * Return @a > @b, dealing with wrapping correctly. |
4337 |
-+ */ |
4338 |
-+static inline int bfq_gt(u64 a, u64 b) |
4339 |
-+{ |
4340 |
-+ return (s64)(a - b) > 0; |
4341 |
-+} |
4342 |
-+ |
4343 |
-+static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) |
4344 |
-+{ |
4345 |
-+ struct bfq_queue *bfqq = NULL; |
4346 |
-+ |
4347 |
-+ BUG_ON(entity == NULL); |
4348 |
-+ |
4349 |
-+ if (entity->my_sched_data == NULL) |
4350 |
-+ bfqq = container_of(entity, struct bfq_queue, entity); |
4351 |
-+ |
4352 |
-+ return bfqq; |
4353 |
-+} |
4354 |
-+ |
4355 |
-+ |
4356 |
-+/** |
4357 |
-+ * bfq_delta - map service into the virtual time domain. |
4358 |
-+ * @service: amount of service. |
4359 |
-+ * @weight: scale factor (weight of an entity or weight sum). |
4360 |
-+ */ |
4361 |
-+static inline u64 bfq_delta(unsigned long service, |
4362 |
-+ unsigned long weight) |
4363 |
-+{ |
4364 |
-+ u64 d = (u64)service << WFQ_SERVICE_SHIFT; |
4365 |
-+ |
4366 |
-+ do_div(d, weight); |
4367 |
-+ return d; |
4368 |
-+} |
4369 |
-+ |
4370 |
-+/** |
4371 |
-+ * bfq_calc_finish - assign the finish time to an entity. |
4372 |
-+ * @entity: the entity to act upon. |
4373 |
-+ * @service: the service to be charged to the entity. |
4374 |
-+ */ |
4375 |
-+static inline void bfq_calc_finish(struct bfq_entity *entity, |
4376 |
-+ unsigned long service) |
4377 |
-+{ |
4378 |
-+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); |
4379 |
-+ |
4380 |
-+ BUG_ON(entity->weight == 0); |
4381 |
-+ |
4382 |
-+ entity->finish = entity->start + |
4383 |
-+ bfq_delta(service, entity->weight); |
4384 |
-+ |
4385 |
-+ if (bfqq != NULL) { |
4386 |
-+ bfq_log_bfqq(bfqq->bfqd, bfqq, |
4387 |
-+ "calc_finish: serv %lu, w %d", |
4388 |
-+ service, entity->weight); |
4389 |
-+ bfq_log_bfqq(bfqq->bfqd, bfqq, |
4390 |
-+ "calc_finish: start %llu, finish %llu, delta %llu", |
4391 |
-+ entity->start, entity->finish, |
4392 |
-+ bfq_delta(service, entity->weight)); |
4393 |
-+ } |
4394 |
-+} |
4395 |
-+ |
4396 |
-+/** |
4397 |
-+ * bfq_entity_of - get an entity from a node. |
4398 |
-+ * @node: the node field of the entity. |
4399 |
-+ * |
4400 |
-+ * Convert a node pointer to the relative entity. This is used only |
4401 |
-+ * to simplify the logic of some functions and not as the generic |
4402 |
-+ * conversion mechanism because, e.g., in the tree walking functions, |
4403 |
-+ * the check for a %NULL value would be redundant. |
4404 |
-+ */ |
4405 |
-+static inline struct bfq_entity *bfq_entity_of(struct rb_node *node) |
4406 |
-+{ |
4407 |
-+ struct bfq_entity *entity = NULL; |
4408 |
-+ |
4409 |
-+ if (node != NULL) |
4410 |
-+ entity = rb_entry(node, struct bfq_entity, rb_node); |
4411 |
-+ |
4412 |
-+ return entity; |
4413 |
-+} |
4414 |
-+ |
4415 |
-+/** |
4416 |
-+ * bfq_extract - remove an entity from a tree. |
4417 |
-+ * @root: the tree root. |
4418 |
-+ * @entity: the entity to remove. |
4419 |
-+ */ |
4420 |
-+static inline void bfq_extract(struct rb_root *root, |
4421 |
-+ struct bfq_entity *entity) |
4422 |
-+{ |
4423 |
-+ BUG_ON(entity->tree != root); |
4424 |
-+ |
4425 |
-+ entity->tree = NULL; |
4426 |
-+ rb_erase(&entity->rb_node, root); |
4427 |
-+} |
4428 |
-+ |
4429 |
-+/** |
4430 |
-+ * bfq_idle_extract - extract an entity from the idle tree. |
4431 |
-+ * @st: the service tree of the owning @entity. |
4432 |
-+ * @entity: the entity being removed. |
4433 |
-+ */ |
4434 |
-+static void bfq_idle_extract(struct bfq_service_tree *st, |
4435 |
-+ struct bfq_entity *entity) |
4436 |
-+{ |
4437 |
-+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); |
4438 |
-+ struct rb_node *next; |
4439 |
-+ |
4440 |
-+ BUG_ON(entity->tree != &st->idle); |
4441 |
-+ |
4442 |
-+ if (entity == st->first_idle) { |
4443 |
-+ next = rb_next(&entity->rb_node); |
4444 |
-+ st->first_idle = bfq_entity_of(next); |
4445 |
-+ } |
4446 |
-+ |
4447 |
-+ if (entity == st->last_idle) { |
4448 |
-+ next = rb_prev(&entity->rb_node); |
4449 |
-+ st->last_idle = bfq_entity_of(next); |
4450 |
-+ } |
4451 |
-+ |
4452 |
-+ bfq_extract(&st->idle, entity); |
4453 |
-+ |
4454 |
-+ if (bfqq != NULL) |
4455 |
-+ list_del(&bfqq->bfqq_list); |
4456 |
-+} |
4457 |
-+ |
4458 |
-+/** |
4459 |
-+ * bfq_insert - generic tree insertion. |
4460 |
-+ * @root: tree root. |
4461 |
-+ * @entity: entity to insert. |
4462 |
-+ * |
4463 |
-+ * This is used for the idle and the active tree, since they are both |
4464 |
-+ * ordered by finish time. |
4465 |
-+ */ |
4466 |
-+static void bfq_insert(struct rb_root *root, struct bfq_entity *entity) |
4467 |
-+{ |
4468 |
-+ struct bfq_entity *entry; |
4469 |
-+ struct rb_node **node = &root->rb_node; |
4470 |
-+ struct rb_node *parent = NULL; |
4471 |
-+ |
4472 |
-+ BUG_ON(entity->tree != NULL); |
4473 |
-+ |
4474 |
-+ while (*node != NULL) { |
4475 |
-+ parent = *node; |
4476 |
-+ entry = rb_entry(parent, struct bfq_entity, rb_node); |
4477 |
-+ |
4478 |
-+ if (bfq_gt(entry->finish, entity->finish)) |
4479 |
-+ node = &parent->rb_left; |
4480 |
-+ else |
4481 |
-+ node = &parent->rb_right; |
4482 |
-+ } |
4483 |
-+ |
4484 |
-+ rb_link_node(&entity->rb_node, parent, node); |
4485 |
-+ rb_insert_color(&entity->rb_node, root); |
4486 |
-+ |
4487 |
-+ entity->tree = root; |
4488 |
-+} |
4489 |
-+ |
4490 |
-+/** |
4491 |
-+ * bfq_update_min - update the min_start field of a entity. |
4492 |
-+ * @entity: the entity to update. |
4493 |
-+ * @node: one of its children. |
4494 |
-+ * |
4495 |
-+ * This function is called when @entity may store an invalid value for |
4496 |
-+ * min_start due to updates to the active tree. The function assumes |
4497 |
-+ * that the subtree rooted at @node (which may be its left or its right |
4498 |
-+ * child) has a valid min_start value. |
4499 |
-+ */ |
4500 |
-+static inline void bfq_update_min(struct bfq_entity *entity, |
4501 |
-+ struct rb_node *node) |
4502 |
-+{ |
4503 |
-+ struct bfq_entity *child; |
4504 |
-+ |
4505 |
-+ if (node != NULL) { |
4506 |
-+ child = rb_entry(node, struct bfq_entity, rb_node); |
4507 |
-+ if (bfq_gt(entity->min_start, child->min_start)) |
4508 |
-+ entity->min_start = child->min_start; |
4509 |
-+ } |
4510 |
-+} |
4511 |
-+ |
4512 |
-+/** |
4513 |
-+ * bfq_update_active_node - recalculate min_start. |
4514 |
-+ * @node: the node to update. |
4515 |
-+ * |
4516 |
-+ * @node may have changed position or one of its children may have moved, |
4517 |
-+ * this function updates its min_start value. The left and right subtrees |
4518 |
-+ * are assumed to hold a correct min_start value. |
4519 |
-+ */ |
4520 |
-+static inline void bfq_update_active_node(struct rb_node *node) |
4521 |
-+{ |
4522 |
-+ struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node); |
4523 |
-+ |
4524 |
-+ entity->min_start = entity->start; |
4525 |
-+ bfq_update_min(entity, node->rb_right); |
4526 |
-+ bfq_update_min(entity, node->rb_left); |
4527 |
-+} |
4528 |
-+ |
4529 |
-+/** |
4530 |
-+ * bfq_update_active_tree - update min_start for the whole active tree. |
4531 |
-+ * @node: the starting node. |
4532 |
-+ * |
4533 |
-+ * @node must be the deepest modified node after an update. This function |
4534 |
-+ * updates its min_start using the values held by its children, assuming |
4535 |
-+ * that they did not change, and then updates all the nodes that may have |
4536 |
-+ * changed in the path to the root. The only nodes that may have changed |
4537 |
-+ * are the ones in the path or their siblings. |
4538 |
-+ */ |
4539 |
-+static void bfq_update_active_tree(struct rb_node *node) |
4540 |
-+{ |
4541 |
-+ struct rb_node *parent; |
4542 |
-+ |
4543 |
-+up: |
4544 |
-+ bfq_update_active_node(node); |
4545 |
-+ |
4546 |
-+ parent = rb_parent(node); |
4547 |
-+ if (parent == NULL) |
4548 |
-+ return; |
4549 |
-+ |
4550 |
-+ if (node == parent->rb_left && parent->rb_right != NULL) |
4551 |
-+ bfq_update_active_node(parent->rb_right); |
4552 |
-+ else if (parent->rb_left != NULL) |
4553 |
-+ bfq_update_active_node(parent->rb_left); |
4554 |
-+ |
4555 |
-+ node = parent; |
4556 |
-+ goto up; |
4557 |
-+} |
4558 |
-+ |
4559 |
-+/** |
4560 |
-+ * bfq_active_insert - insert an entity in the active tree of its group/device. |
4561 |
-+ * @st: the service tree of the entity. |
4562 |
-+ * @entity: the entity being inserted. |
4563 |
-+ * |
4564 |
-+ * The active tree is ordered by finish time, but an extra key is kept |
4565 |
-+ * per each node, containing the minimum value for the start times of |
4566 |
-+ * its children (and the node itself), so it's possible to search for |
4567 |
-+ * the eligible node with the lowest finish time in logarithmic time. |
4568 |
-+ */ |
4569 |
-+static void bfq_active_insert(struct bfq_service_tree *st, |
4570 |
-+ struct bfq_entity *entity) |
4571 |
-+{ |
4572 |
-+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); |
4573 |
-+ struct rb_node *node = &entity->rb_node; |
4574 |
-+ |
4575 |
-+ bfq_insert(&st->active, entity); |
4576 |
-+ |
4577 |
-+ if (node->rb_left != NULL) |
4578 |
-+ node = node->rb_left; |
4579 |
-+ else if (node->rb_right != NULL) |
4580 |
-+ node = node->rb_right; |
4581 |
-+ |
4582 |
-+ bfq_update_active_tree(node); |
4583 |
-+ |
4584 |
-+ if (bfqq != NULL) |
4585 |
-+ list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list); |
4586 |
-+} |
4587 |
-+ |
4588 |
-+/** |
4589 |
-+ * bfq_ioprio_to_weight - calc a weight from an ioprio. |
4590 |
-+ * @ioprio: the ioprio value to convert. |
4591 |
-+ */ |
4592 |
-+static unsigned short bfq_ioprio_to_weight(int ioprio) |
4593 |
-+{ |
4594 |
-+ WARN_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR); |
4595 |
-+ return IOPRIO_BE_NR - ioprio; |
4596 |
-+} |
4597 |
-+ |
4598 |
-+/** |
4599 |
-+ * bfq_weight_to_ioprio - calc an ioprio from a weight. |
4600 |
-+ * @weight: the weight value to convert. |
4601 |
-+ * |
4602 |
-+ * To preserve as mush as possible the old only-ioprio user interface, |
4603 |
-+ * 0 is used as an escape ioprio value for weights (numerically) equal or |
4604 |
-+ * larger than IOPRIO_BE_NR |
4605 |
-+ */ |
4606 |
-+static unsigned short bfq_weight_to_ioprio(int weight) |
4607 |
-+{ |
4608 |
-+ WARN_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT); |
4609 |
-+ return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight; |
4610 |
-+} |
4611 |
-+ |
4612 |
-+static inline void bfq_get_entity(struct bfq_entity *entity) |
4613 |
-+{ |
4614 |
-+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); |
4615 |
-+ struct bfq_sched_data *sd; |
4616 |
-+ |
4617 |
-+ if (bfqq != NULL) { |
4618 |
-+ sd = entity->sched_data; |
4619 |
-+ atomic_inc(&bfqq->ref); |
4620 |
-+ bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", |
4621 |
-+ bfqq, atomic_read(&bfqq->ref)); |
4622 |
-+ } |
4623 |
-+} |
4624 |
-+ |
4625 |
-+/** |
4626 |
-+ * bfq_find_deepest - find the deepest node that an extraction can modify. |
4627 |
-+ * @node: the node being removed. |
4628 |
-+ * |
4629 |
-+ * Do the first step of an extraction in an rb tree, looking for the |
4630 |
-+ * node that will replace @node, and returning the deepest node that |
4631 |
-+ * the following modifications to the tree can touch. If @node is the |
4632 |
-+ * last node in the tree return %NULL. |
4633 |
-+ */ |
4634 |
-+static struct rb_node *bfq_find_deepest(struct rb_node *node) |
4635 |
-+{ |
4636 |
-+ struct rb_node *deepest; |
4637 |
-+ |
4638 |
-+ if (node->rb_right == NULL && node->rb_left == NULL) |
4639 |
-+ deepest = rb_parent(node); |
4640 |
-+ else if (node->rb_right == NULL) |
4641 |
-+ deepest = node->rb_left; |
4642 |
-+ else if (node->rb_left == NULL) |
4643 |
-+ deepest = node->rb_right; |
4644 |
-+ else { |
4645 |
-+ deepest = rb_next(node); |
4646 |
-+ if (deepest->rb_right != NULL) |
4647 |
-+ deepest = deepest->rb_right; |
4648 |
-+ else if (rb_parent(deepest) != node) |
4649 |
-+ deepest = rb_parent(deepest); |
4650 |
-+ } |
4651 |
-+ |
4652 |
-+ return deepest; |
4653 |
-+} |
4654 |
-+ |
4655 |
-+/** |
4656 |
-+ * bfq_active_extract - remove an entity from the active tree. |
4657 |
-+ * @st: the service_tree containing the tree. |
4658 |
-+ * @entity: the entity being removed. |
4659 |
-+ */ |
4660 |
-+static void bfq_active_extract(struct bfq_service_tree *st, |
4661 |
-+ struct bfq_entity *entity) |
4662 |
-+{ |
4663 |
-+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); |
4664 |
-+ struct rb_node *node; |
4665 |
-+ |
4666 |
-+ node = bfq_find_deepest(&entity->rb_node); |
4667 |
-+ bfq_extract(&st->active, entity); |
4668 |
-+ |
4669 |
-+ if (node != NULL) |
4670 |
-+ bfq_update_active_tree(node); |
4671 |
-+ |
4672 |
-+ if (bfqq != NULL) |
4673 |
-+ list_del(&bfqq->bfqq_list); |
4674 |
-+} |
4675 |
-+ |
4676 |
-+/** |
4677 |
-+ * bfq_idle_insert - insert an entity into the idle tree. |
4678 |
-+ * @st: the service tree containing the tree. |
4679 |
-+ * @entity: the entity to insert. |
4680 |
-+ */ |
4681 |
-+static void bfq_idle_insert(struct bfq_service_tree *st, |
4682 |
-+ struct bfq_entity *entity) |
4683 |
-+{ |
4684 |
-+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); |
4685 |
-+ struct bfq_entity *first_idle = st->first_idle; |
4686 |
-+ struct bfq_entity *last_idle = st->last_idle; |
4687 |
-+ |
4688 |
-+ if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish)) |
4689 |
-+ st->first_idle = entity; |
4690 |
-+ if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish)) |
4691 |
-+ st->last_idle = entity; |
4692 |
-+ |
4693 |
-+ bfq_insert(&st->idle, entity); |
4694 |
-+ |
4695 |
-+ if (bfqq != NULL) |
4696 |
-+ list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list); |
4697 |
-+} |
4698 |
-+ |
4699 |
-+/** |
4700 |
-+ * bfq_forget_entity - remove an entity from the wfq trees. |
4701 |
-+ * @st: the service tree. |
4702 |
-+ * @entity: the entity being removed. |
4703 |
-+ * |
4704 |
-+ * Update the device status and forget everything about @entity, putting |
4705 |
-+ * the device reference to it, if it is a queue. Entities belonging to |
4706 |
-+ * groups are not refcounted. |
4707 |
-+ */ |
4708 |
-+static void bfq_forget_entity(struct bfq_service_tree *st, |
4709 |
-+ struct bfq_entity *entity) |
4710 |
-+{ |
4711 |
-+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); |
4712 |
-+ struct bfq_sched_data *sd; |
4713 |
-+ |
4714 |
-+ BUG_ON(!entity->on_st); |
4715 |
-+ |
4716 |
-+ entity->on_st = 0; |
4717 |
-+ st->wsum -= entity->weight; |
4718 |
-+ if (bfqq != NULL) { |
4719 |
-+ sd = entity->sched_data; |
4720 |
-+ bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d", |
4721 |
-+ bfqq, atomic_read(&bfqq->ref)); |
4722 |
-+ bfq_put_queue(bfqq); |
4723 |
-+ } |
4724 |
-+} |
4725 |
-+ |
4726 |
-+/** |
4727 |
-+ * bfq_put_idle_entity - release the idle tree ref of an entity. |
4728 |
-+ * @st: service tree for the entity. |
4729 |
-+ * @entity: the entity being released. |
4730 |
-+ */ |
4731 |
-+static void bfq_put_idle_entity(struct bfq_service_tree *st, |
4732 |
-+ struct bfq_entity *entity) |
4733 |
-+{ |
4734 |
-+ bfq_idle_extract(st, entity); |
4735 |
-+ bfq_forget_entity(st, entity); |
4736 |
-+} |
4737 |
-+ |
4738 |
-+/** |
4739 |
-+ * bfq_forget_idle - update the idle tree if necessary. |
4740 |
-+ * @st: the service tree to act upon. |
4741 |
-+ * |
4742 |
-+ * To preserve the global O(log N) complexity we only remove one entry here; |
4743 |
-+ * as the idle tree will not grow indefinitely this can be done safely. |
4744 |
-+ */ |
4745 |
-+static void bfq_forget_idle(struct bfq_service_tree *st) |
4746 |
-+{ |
4747 |
-+ struct bfq_entity *first_idle = st->first_idle; |
4748 |
-+ struct bfq_entity *last_idle = st->last_idle; |
4749 |
-+ |
4750 |
-+ if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL && |
4751 |
-+ !bfq_gt(last_idle->finish, st->vtime)) { |
4752 |
-+ /* |
4753 |
-+ * Forget the whole idle tree, increasing the vtime past |
4754 |
-+ * the last finish time of idle entities. |
4755 |
-+ */ |
4756 |
-+ st->vtime = last_idle->finish; |
4757 |
-+ } |
4758 |
-+ |
4759 |
-+ if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime)) |
4760 |
-+ bfq_put_idle_entity(st, first_idle); |
4761 |
-+} |
4762 |
-+ |
4763 |
-+static struct bfq_service_tree * |
4764 |
-+__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, |
4765 |
-+ struct bfq_entity *entity) |
4766 |
-+{ |
4767 |
-+ struct bfq_service_tree *new_st = old_st; |
4768 |
-+ |
4769 |
-+ if (entity->ioprio_changed) { |
4770 |
-+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); |
4771 |
-+ |
4772 |
-+ BUG_ON(old_st->wsum < entity->weight); |
4773 |
-+ old_st->wsum -= entity->weight; |
4774 |
-+ |
4775 |
-+ if (entity->new_weight != entity->orig_weight) { |
4776 |
-+ entity->orig_weight = entity->new_weight; |
4777 |
-+ entity->ioprio = |
4778 |
-+ bfq_weight_to_ioprio(entity->orig_weight); |
4779 |
-+ } else if (entity->new_ioprio != entity->ioprio) { |
4780 |
-+ entity->ioprio = entity->new_ioprio; |
4781 |
-+ entity->orig_weight = |
4782 |
-+ bfq_ioprio_to_weight(entity->ioprio); |
4783 |
-+ } else |
4784 |
-+ entity->new_weight = entity->orig_weight = |
4785 |
-+ bfq_ioprio_to_weight(entity->ioprio); |
4786 |
-+ |
4787 |
-+ entity->ioprio_class = entity->new_ioprio_class; |
4788 |
-+ entity->ioprio_changed = 0; |
4789 |
-+ |
4790 |
-+ /* |
4791 |
-+ * NOTE: here we may be changing the weight too early, |
4792 |
-+ * this will cause unfairness. The correct approach |
4793 |
-+ * would have required additional complexity to defer |
4794 |
-+ * weight changes to the proper time instants (i.e., |
4795 |
-+ * when entity->finish <= old_st->vtime). |
4796 |
-+ */ |
4797 |
-+ new_st = bfq_entity_service_tree(entity); |
4798 |
-+ entity->weight = entity->orig_weight * |
4799 |
-+ (bfqq != NULL ? bfqq->raising_coeff : 1); |
4800 |
-+ new_st->wsum += entity->weight; |
4801 |
-+ |
4802 |
-+ if (new_st != old_st) |
4803 |
-+ entity->start = new_st->vtime; |
4804 |
-+ } |
4805 |
-+ |
4806 |
-+ return new_st; |
4807 |
-+} |
4808 |
-+ |
4809 |
-+/** |
4810 |
-+ * bfq_bfqq_served - update the scheduler status after selection for service. |
4811 |
-+ * @bfqq: the queue being served. |
4812 |
-+ * @served: bytes to transfer. |
4813 |
-+ * |
4814 |
-+ * NOTE: this can be optimized, as the timestamps of upper level entities |
4815 |
-+ * are synchronized every time a new bfqq is selected for service. By now, |
4816 |
-+ * we keep it to better check consistency. |
4817 |
-+ */ |
4818 |
-+static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served) |
4819 |
-+{ |
4820 |
-+ struct bfq_entity *entity = &bfqq->entity; |
4821 |
-+ struct bfq_service_tree *st; |
4822 |
-+ |
4823 |
-+ for_each_entity(entity) { |
4824 |
-+ st = bfq_entity_service_tree(entity); |
4825 |
-+ |
4826 |
-+ entity->service += served; |
4827 |
-+ BUG_ON(entity->service > entity->budget); |
4828 |
-+ BUG_ON(st->wsum == 0); |
4829 |
-+ |
4830 |
-+ st->vtime += bfq_delta(served, st->wsum); |
4831 |
-+ bfq_forget_idle(st); |
4832 |
-+ } |
4833 |
-+ bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served); |
4834 |
-+} |
4835 |
-+ |
4836 |
-+/** |
4837 |
-+ * bfq_bfqq_charge_full_budget - set the service to the entity budget. |
4838 |
-+ * @bfqq: the queue that needs a service update. |
4839 |
-+ * |
4840 |
-+ * When it's not possible to be fair in the service domain, because |
4841 |
-+ * a queue is not consuming its budget fast enough (the meaning of |
4842 |
-+ * fast depends on the timeout parameter), we charge it a full |
4843 |
-+ * budget. In this way we should obtain a sort of time-domain |
4844 |
-+ * fairness among all the seeky/slow queues. |
4845 |
-+ */ |
4846 |
-+static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq) |
4847 |
-+{ |
4848 |
-+ struct bfq_entity *entity = &bfqq->entity; |
4849 |
-+ |
4850 |
-+ bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget"); |
4851 |
-+ |
4852 |
-+ bfq_bfqq_served(bfqq, entity->budget - entity->service); |
4853 |
-+} |
4854 |
-+ |
4855 |
-+/** |
4856 |
-+ * __bfq_activate_entity - activate an entity. |
4857 |
-+ * @entity: the entity being activated. |
4858 |
-+ * |
4859 |
-+ * Called whenever an entity is activated, i.e., it is not active and one |
4860 |
-+ * of its children receives a new request, or has to be reactivated due to |
4861 |
-+ * budget exhaustion. It uses the current budget of the entity (and the |
4862 |
-+ * service received if @entity is active) of the queue to calculate its |
4863 |
-+ * timestamps. |
4864 |
-+ */ |
4865 |
-+static void __bfq_activate_entity(struct bfq_entity *entity) |
4866 |
-+{ |
4867 |
-+ struct bfq_sched_data *sd = entity->sched_data; |
4868 |
-+ struct bfq_service_tree *st = bfq_entity_service_tree(entity); |
4869 |
-+ |
4870 |
-+ if (entity == sd->active_entity) { |
4871 |
-+ BUG_ON(entity->tree != NULL); |
4872 |
-+ /* |
4873 |
-+ * If we are requeueing the current entity we have |
4874 |
-+ * to take care of not charging to it service it has |
4875 |
-+ * not received. |
4876 |
-+ */ |
4877 |
-+ bfq_calc_finish(entity, entity->service); |
4878 |
-+ entity->start = entity->finish; |
4879 |
-+ sd->active_entity = NULL; |
4880 |
-+ } else if (entity->tree == &st->active) { |
4881 |
-+ /* |
4882 |
-+ * Requeueing an entity due to a change of some |
4883 |
-+ * next_active entity below it. We reuse the old |
4884 |
-+ * start time. |
4885 |
-+ */ |
4886 |
-+ bfq_active_extract(st, entity); |
4887 |
-+ } else if (entity->tree == &st->idle) { |
4888 |
-+ /* |
4889 |
-+ * Must be on the idle tree, bfq_idle_extract() will |
4890 |
-+ * check for that. |
4891 |
-+ */ |
4892 |
-+ bfq_idle_extract(st, entity); |
4893 |
-+ entity->start = bfq_gt(st->vtime, entity->finish) ? |
4894 |
-+ st->vtime : entity->finish; |
4895 |
-+ } else { |
4896 |
-+ /* |
4897 |
-+ * The finish time of the entity may be invalid, and |
4898 |
-+ * it is in the past for sure, otherwise the queue |
4899 |
-+ * would have been on the idle tree. |
4900 |
-+ */ |
4901 |
-+ entity->start = st->vtime; |
4902 |
-+ st->wsum += entity->weight; |
4903 |
-+ bfq_get_entity(entity); |
4904 |
-+ |
4905 |
-+ BUG_ON(entity->on_st); |
4906 |
-+ entity->on_st = 1; |
4907 |
-+ } |
4908 |
-+ |
4909 |
-+ st = __bfq_entity_update_weight_prio(st, entity); |
4910 |
-+ bfq_calc_finish(entity, entity->budget); |
4911 |
-+ bfq_active_insert(st, entity); |
4912 |
-+} |
4913 |
-+ |
4914 |
-+/** |
4915 |
-+ * bfq_activate_entity - activate an entity and its ancestors if necessary. |
4916 |
-+ * @entity: the entity to activate. |
4917 |
-+ * |
4918 |
-+ * Activate @entity and all the entities on the path from it to the root. |
4919 |
-+ */ |
4920 |
-+static void bfq_activate_entity(struct bfq_entity *entity) |
4921 |
-+{ |
4922 |
-+ struct bfq_sched_data *sd; |
4923 |
-+ |
4924 |
-+ for_each_entity(entity) { |
4925 |
-+ __bfq_activate_entity(entity); |
4926 |
-+ |
4927 |
-+ sd = entity->sched_data; |
4928 |
-+ if (!bfq_update_next_active(sd)) |
4929 |
-+ /* |
4930 |
-+ * No need to propagate the activation to the |
4931 |
-+ * upper entities, as they will be updated when |
4932 |
-+ * the active entity is rescheduled. |
4933 |
-+ */ |
4934 |
-+ break; |
4935 |
-+ } |
4936 |
-+} |
4937 |
-+ |
4938 |
-+/** |
4939 |
-+ * __bfq_deactivate_entity - deactivate an entity from its service tree. |
4940 |
-+ * @entity: the entity to deactivate. |
4941 |
-+ * @requeue: if false, the entity will not be put into the idle tree. |
4942 |
-+ * |
4943 |
-+ * Deactivate an entity, independently from its previous state. If the |
4944 |
-+ * entity was not on a service tree just return, otherwise if it is on |
4945 |
-+ * any scheduler tree, extract it from that tree, and if necessary |
4946 |
-+ * and if the caller did not specify @requeue, put it on the idle tree. |
4947 |
-+ * |
4948 |
-+ * Return %1 if the caller should update the entity hierarchy, i.e., |
4949 |
-+ * if the entity was under service or if it was the next_active for |
4950 |
-+ * its sched_data; return %0 otherwise. |
4951 |
-+ */ |
4952 |
-+static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue) |
4953 |
-+{ |
4954 |
-+ struct bfq_sched_data *sd = entity->sched_data; |
4955 |
-+ struct bfq_service_tree *st = bfq_entity_service_tree(entity); |
4956 |
-+ int was_active = entity == sd->active_entity; |
4957 |
-+ int ret = 0; |
4958 |
-+ |
4959 |
-+ if (!entity->on_st) |
4960 |
-+ return 0; |
4961 |
-+ |
4962 |
-+ BUG_ON(was_active && entity->tree != NULL); |
4963 |
-+ |
4964 |
-+ if (was_active) { |
4965 |
-+ bfq_calc_finish(entity, entity->service); |
4966 |
-+ sd->active_entity = NULL; |
4967 |
-+ } else if (entity->tree == &st->active) |
4968 |
-+ bfq_active_extract(st, entity); |
4969 |
-+ else if (entity->tree == &st->idle) |
4970 |
-+ bfq_idle_extract(st, entity); |
4971 |
-+ else if (entity->tree != NULL) |
4972 |
-+ BUG(); |
4973 |
-+ |
4974 |
-+ if (was_active || sd->next_active == entity) |
4975 |
-+ ret = bfq_update_next_active(sd); |
4976 |
-+ |
4977 |
-+ if (!requeue || !bfq_gt(entity->finish, st->vtime)) |
4978 |
-+ bfq_forget_entity(st, entity); |
4979 |
-+ else |
4980 |
-+ bfq_idle_insert(st, entity); |
4981 |
-+ |
4982 |
-+ BUG_ON(sd->active_entity == entity); |
4983 |
-+ BUG_ON(sd->next_active == entity); |
4984 |
-+ |
4985 |
-+ return ret; |
4986 |
-+} |
4987 |
-+ |
4988 |
-+/** |
4989 |
-+ * bfq_deactivate_entity - deactivate an entity. |
4990 |
-+ * @entity: the entity to deactivate. |
4991 |
-+ * @requeue: true if the entity can be put on the idle tree |
4992 |
-+ */ |
4993 |
-+static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) |
4994 |
-+{ |
4995 |
-+ struct bfq_sched_data *sd; |
4996 |
-+ struct bfq_entity *parent; |
4997 |
-+ |
4998 |
-+ for_each_entity_safe(entity, parent) { |
4999 |
-+ sd = entity->sched_data; |
5000 |
-+ |
5001 |
-+ if (!__bfq_deactivate_entity(entity, requeue)) |
5002 |
-+ /* |
5003 |
-+ * The parent entity is still backlogged, and |
5004 |
-+ * we don't need to update it as it is still |
5005 |
-+ * under service. |
5006 |
-+ */ |
5007 |
-+ break; |
5008 |
-+ |
5009 |
-+ if (sd->next_active != NULL) |
5010 |
-+ /* |
5011 |
-+ * The parent entity is still backlogged and |
5012 |
-+ * the budgets on the path towards the root |
5013 |
-+ * need to be updated. |
5014 |
-+ */ |
5015 |
-+ goto update; |
5016 |
-+ |
5017 |
-+ /* |
5018 |
-+ * If we reach there the parent is no more backlogged and |
5019 |
-+ * we want to propagate the dequeue upwards. |
5020 |
-+ */ |
5021 |
-+ requeue = 1; |
5022 |
-+ } |
5023 |
-+ |
5024 |
-+ return; |
5025 |
-+ |
5026 |
-+update: |
5027 |
-+ entity = parent; |
5028 |
-+ for_each_entity(entity) { |
5029 |
-+ __bfq_activate_entity(entity); |
5030 |
-+ |
5031 |
-+ sd = entity->sched_data; |
5032 |
-+ if (!bfq_update_next_active(sd)) |
5033 |
-+ break; |
5034 |
-+ } |
5035 |
-+} |
5036 |
-+ |
5037 |
-+/** |
5038 |
-+ * bfq_update_vtime - update vtime if necessary. |
5039 |
-+ * @st: the service tree to act upon. |
5040 |
-+ * |
5041 |
-+ * If necessary update the service tree vtime to have at least one |
5042 |
-+ * eligible entity, skipping to its start time. Assumes that the |
5043 |
-+ * active tree of the device is not empty. |
5044 |
-+ * |
5045 |
-+ * NOTE: this hierarchical implementation updates vtimes quite often, |
5046 |
-+ * we may end up with reactivated tasks getting timestamps after a |
5047 |
-+ * vtime skip done because we needed a ->first_active entity on some |
5048 |
-+ * intermediate node. |
5049 |
-+ */ |
5050 |
-+static void bfq_update_vtime(struct bfq_service_tree *st) |
5051 |
-+{ |
5052 |
-+ struct bfq_entity *entry; |
5053 |
-+ struct rb_node *node = st->active.rb_node; |
5054 |
-+ |
5055 |
-+ entry = rb_entry(node, struct bfq_entity, rb_node); |
5056 |
-+ if (bfq_gt(entry->min_start, st->vtime)) { |
5057 |
-+ st->vtime = entry->min_start; |
5058 |
-+ bfq_forget_idle(st); |
5059 |
-+ } |
5060 |
-+} |
5061 |
-+ |
5062 |
-+/** |
5063 |
-+ * bfq_first_active - find the eligible entity with the smallest finish time |
5064 |
-+ * @st: the service tree to select from. |
5065 |
-+ * |
5066 |
-+ * This function searches the first schedulable entity, starting from the |
5067 |
-+ * root of the tree and going on the left every time on this side there is |
5068 |
-+ * a subtree with at least one eligible (start >= vtime) entity. The path |
5069 |
-+ * on the right is followed only if a) the left subtree contains no eligible |
5070 |
-+ * entities and b) no eligible entity has been found yet. |
5071 |
-+ */ |
5072 |
-+static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) |
5073 |
-+{ |
5074 |
-+ struct bfq_entity *entry, *first = NULL; |
5075 |
-+ struct rb_node *node = st->active.rb_node; |
5076 |
-+ |
5077 |
-+ while (node != NULL) { |
5078 |
-+ entry = rb_entry(node, struct bfq_entity, rb_node); |
5079 |
-+left: |
5080 |
-+ if (!bfq_gt(entry->start, st->vtime)) |
5081 |
-+ first = entry; |
5082 |
-+ |
5083 |
-+ BUG_ON(bfq_gt(entry->min_start, st->vtime)); |
5084 |
-+ |
5085 |
-+ if (node->rb_left != NULL) { |
5086 |
-+ entry = rb_entry(node->rb_left, |
5087 |
-+ struct bfq_entity, rb_node); |
5088 |
-+ if (!bfq_gt(entry->min_start, st->vtime)) { |
5089 |
-+ node = node->rb_left; |
5090 |
-+ goto left; |
5091 |
-+ } |
5092 |
-+ } |
5093 |
-+ if (first != NULL) |
5094 |
-+ break; |
5095 |
-+ node = node->rb_right; |
5096 |
-+ } |
5097 |
-+ |
5098 |
-+ BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active)); |
5099 |
-+ return first; |
5100 |
-+} |
5101 |
-+ |
5102 |
-+/** |
5103 |
-+ * __bfq_lookup_next_entity - return the first eligible entity in @st. |
5104 |
-+ * @st: the service tree. |
5105 |
-+ * |
5106 |
-+ * Update the virtual time in @st and return the first eligible entity |
5107 |
-+ * it contains. |
5108 |
-+ */ |
5109 |
-+static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st, |
5110 |
-+ bool force) |
5111 |
-+{ |
5112 |
-+ struct bfq_entity *entity, *new_next_active = NULL; |
5113 |
-+ |
5114 |
-+ if (RB_EMPTY_ROOT(&st->active)) |
5115 |
-+ return NULL; |
5116 |
-+ |
5117 |
-+ bfq_update_vtime(st); |
5118 |
-+ entity = bfq_first_active_entity(st); |
5119 |
-+ BUG_ON(bfq_gt(entity->start, st->vtime)); |
5120 |
-+ |
5121 |
-+ /* |
5122 |
-+ * If the chosen entity does not match with the sched_data's |
5123 |
-+ * next_active and we are forcedly serving the IDLE priority |
5124 |
-+ * class tree, bubble up budget update. |
5125 |
-+ */ |
5126 |
-+ if (unlikely(force && entity != entity->sched_data->next_active)) { |
5127 |
-+ new_next_active = entity; |
5128 |
-+ for_each_entity(new_next_active) |
5129 |
-+ bfq_update_budget(new_next_active); |
5130 |
-+ } |
5131 |
-+ |
5132 |
-+ return entity; |
5133 |
-+} |
5134 |
-+ |
5135 |
-+/** |
5136 |
-+ * bfq_lookup_next_entity - return the first eligible entity in @sd. |
5137 |
-+ * @sd: the sched_data. |
5138 |
-+ * @extract: if true the returned entity will be also extracted from @sd. |
5139 |
-+ * |
5140 |
-+ * NOTE: since we cache the next_active entity at each level of the |
5141 |
-+ * hierarchy, the complexity of the lookup can be decreased with |
5142 |
-+ * absolutely no effort just returning the cached next_active value; |
5143 |
-+ * we prefer to do full lookups to test the consistency of * the data |
5144 |
-+ * structures. |
5145 |
-+ */ |
5146 |
-+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, |
5147 |
-+ int extract, |
5148 |
-+ struct bfq_data *bfqd) |
5149 |
-+{ |
5150 |
-+ struct bfq_service_tree *st = sd->service_tree; |
5151 |
-+ struct bfq_entity *entity; |
5152 |
-+ int i=0; |
5153 |
-+ |
5154 |
-+ BUG_ON(sd->active_entity != NULL); |
5155 |
-+ |
5156 |
-+ if (bfqd != NULL && |
5157 |
-+ jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) { |
5158 |
-+ entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, true); |
5159 |
-+ if (entity != NULL) { |
5160 |
-+ i = BFQ_IOPRIO_CLASSES - 1; |
5161 |
-+ bfqd->bfq_class_idle_last_service = jiffies; |
5162 |
-+ sd->next_active = entity; |
5163 |
-+ } |
5164 |
-+ } |
5165 |
-+ for (; i < BFQ_IOPRIO_CLASSES; i++) { |
5166 |
-+ entity = __bfq_lookup_next_entity(st + i, false); |
5167 |
-+ if (entity != NULL) { |
5168 |
-+ if (extract) { |
5169 |
-+ bfq_check_next_active(sd, entity); |
5170 |
-+ bfq_active_extract(st + i, entity); |
5171 |
-+ sd->active_entity = entity; |
5172 |
-+ sd->next_active = NULL; |
5173 |
-+ } |
5174 |
-+ break; |
5175 |
-+ } |
5176 |
-+ } |
5177 |
-+ |
5178 |
-+ return entity; |
5179 |
-+} |
5180 |
-+ |
5181 |
-+/* |
5182 |
-+ * Get next queue for service. |
5183 |
-+ */ |
5184 |
-+static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) |
5185 |
-+{ |
5186 |
-+ struct bfq_entity *entity = NULL; |
5187 |
-+ struct bfq_sched_data *sd; |
5188 |
-+ struct bfq_queue *bfqq; |
5189 |
-+ |
5190 |
-+ BUG_ON(bfqd->active_queue != NULL); |
5191 |
-+ |
5192 |
-+ if (bfqd->busy_queues == 0) |
5193 |
-+ return NULL; |
5194 |
-+ |
5195 |
-+ sd = &bfqd->root_group->sched_data; |
5196 |
-+ for (; sd != NULL; sd = entity->my_sched_data) { |
5197 |
-+ entity = bfq_lookup_next_entity(sd, 1, bfqd); |
5198 |
-+ BUG_ON(entity == NULL); |
5199 |
-+ entity->service = 0; |
5200 |
-+ } |
5201 |
-+ |
5202 |
-+ bfqq = bfq_entity_to_bfqq(entity); |
5203 |
-+ BUG_ON(bfqq == NULL); |
5204 |
-+ |
5205 |
-+ return bfqq; |
5206 |
-+} |
5207 |
-+ |
5208 |
-+/* |
5209 |
-+ * Forced extraction of the given queue. |
5210 |
-+ */ |
5211 |
-+static void bfq_get_next_queue_forced(struct bfq_data *bfqd, |
5212 |
-+ struct bfq_queue *bfqq) |
5213 |
-+{ |
5214 |
-+ struct bfq_entity *entity; |
5215 |
-+ struct bfq_sched_data *sd; |
5216 |
-+ |
5217 |
-+ BUG_ON(bfqd->active_queue != NULL); |
5218 |
-+ |
5219 |
-+ entity = &bfqq->entity; |
5220 |
-+ /* |
5221 |
-+ * Bubble up extraction/update from the leaf to the root. |
5222 |
-+ */ |
5223 |
-+ for_each_entity(entity) { |
5224 |
-+ sd = entity->sched_data; |
5225 |
-+ bfq_update_budget(entity); |
5226 |
-+ bfq_update_vtime(bfq_entity_service_tree(entity)); |
5227 |
-+ bfq_active_extract(bfq_entity_service_tree(entity), entity); |
5228 |
-+ sd->active_entity = entity; |
5229 |
-+ sd->next_active = NULL; |
5230 |
-+ entity->service = 0; |
5231 |
-+ } |
5232 |
-+ |
5233 |
-+ return; |
5234 |
-+} |
5235 |
-+ |
5236 |
-+static void __bfq_bfqd_reset_active(struct bfq_data *bfqd) |
5237 |
-+{ |
5238 |
-+ if (bfqd->active_bic != NULL) { |
5239 |
-+ put_io_context(bfqd->active_bic->icq.ioc); |
5240 |
-+ bfqd->active_bic = NULL; |
5241 |
-+ } |
5242 |
-+ |
5243 |
-+ bfqd->active_queue = NULL; |
5244 |
-+ del_timer(&bfqd->idle_slice_timer); |
5245 |
-+} |
5246 |
-+ |
5247 |
-+static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
5248 |
-+ int requeue) |
5249 |
-+{ |
5250 |
-+ struct bfq_entity *entity = &bfqq->entity; |
5251 |
-+ |
5252 |
-+ if (bfqq == bfqd->active_queue) |
5253 |
-+ __bfq_bfqd_reset_active(bfqd); |
5254 |
-+ |
5255 |
-+ bfq_deactivate_entity(entity, requeue); |
5256 |
-+} |
5257 |
-+ |
5258 |
-+static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) |
5259 |
-+{ |
5260 |
-+ struct bfq_entity *entity = &bfqq->entity; |
5261 |
-+ |
5262 |
-+ bfq_activate_entity(entity); |
5263 |
-+} |
5264 |
-+ |
5265 |
-+/* |
5266 |
-+ * Called when the bfqq no longer has requests pending, remove it from |
5267 |
-+ * the service tree. |
5268 |
-+ */ |
5269 |
-+static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
5270 |
-+ int requeue) |
5271 |
-+{ |
5272 |
-+ BUG_ON(!bfq_bfqq_busy(bfqq)); |
5273 |
-+ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); |
5274 |
-+ |
5275 |
-+ bfq_log_bfqq(bfqd, bfqq, "del from busy"); |
5276 |
-+ |
5277 |
-+ bfq_clear_bfqq_busy(bfqq); |
5278 |
-+ |
5279 |
-+ BUG_ON(bfqd->busy_queues == 0); |
5280 |
-+ bfqd->busy_queues--; |
5281 |
-+ |
5282 |
-+ bfq_deactivate_bfqq(bfqd, bfqq, requeue); |
5283 |
-+} |
5284 |
-+ |
5285 |
-+/* |
5286 |
-+ * Called when an inactive queue receives a new request. |
5287 |
-+ */ |
5288 |
-+static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) |
5289 |
-+{ |
5290 |
-+ BUG_ON(bfq_bfqq_busy(bfqq)); |
5291 |
-+ BUG_ON(bfqq == bfqd->active_queue); |
5292 |
-+ |
5293 |
-+ bfq_log_bfqq(bfqd, bfqq, "add to busy"); |
5294 |
-+ |
5295 |
-+ bfq_activate_bfqq(bfqd, bfqq); |
5296 |
-+ |
5297 |
-+ bfq_mark_bfqq_busy(bfqq); |
5298 |
-+ bfqd->busy_queues++; |
5299 |
-+} |
5300 |
-diff --git a/block/bfq.h b/block/bfq.h |
5301 |
-new file mode 100644 |
5302 |
-index 0000000..48ecde9 |
5303 |
---- /dev/null |
5304 |
-+++ b/block/bfq.h |
5305 |
-@@ -0,0 +1,603 @@ |
5306 |
-+/* |
5307 |
-+ * BFQ-v6r2 for 3.10.0: data structures and common functions prototypes. |
5308 |
-+ * |
5309 |
-+ * Based on ideas and code from CFQ: |
5310 |
-+ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk> |
5311 |
-+ * |
5312 |
-+ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it> |
5313 |
-+ * Paolo Valente <paolo.valente@×××××××.it> |
5314 |
-+ * |
5315 |
-+ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it> |
5316 |
-+ */ |
5317 |
-+ |
5318 |
-+#ifndef _BFQ_H |
5319 |
-+#define _BFQ_H |
5320 |
-+ |
5321 |
-+#include <linux/blktrace_api.h> |
5322 |
-+#include <linux/hrtimer.h> |
5323 |
-+#include <linux/ioprio.h> |
5324 |
-+#include <linux/rbtree.h> |
5325 |
-+ |
5326 |
-+#define BFQ_IOPRIO_CLASSES 3 |
5327 |
-+#define BFQ_CL_IDLE_TIMEOUT HZ/5 |
5328 |
-+ |
5329 |
-+#define BFQ_MIN_WEIGHT 1 |
5330 |
-+#define BFQ_MAX_WEIGHT 1000 |
5331 |
-+ |
5332 |
-+#define BFQ_DEFAULT_GRP_WEIGHT 10 |
5333 |
-+#define BFQ_DEFAULT_GRP_IOPRIO 0 |
5334 |
-+#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE |
5335 |
-+ |
5336 |
-+struct bfq_entity; |
5337 |
-+ |
5338 |
-+/** |
5339 |
-+ * struct bfq_service_tree - per ioprio_class service tree. |
5340 |
-+ * @active: tree for active entities (i.e., those backlogged). |
5341 |
-+ * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i). |
5342 |
-+ * @first_idle: idle entity with minimum F_i. |
5343 |
-+ * @last_idle: idle entity with maximum F_i. |
5344 |
-+ * @vtime: scheduler virtual time. |
5345 |
-+ * @wsum: scheduler weight sum; active and idle entities contribute to it. |
5346 |
-+ * |
5347 |
-+ * Each service tree represents a B-WF2Q+ scheduler on its own. Each |
5348 |
-+ * ioprio_class has its own independent scheduler, and so its own |
5349 |
-+ * bfq_service_tree. All the fields are protected by the queue lock |
5350 |
-+ * of the containing bfqd. |
5351 |
-+ */ |
5352 |
-+struct bfq_service_tree { |
5353 |
-+ struct rb_root active; |
5354 |
-+ struct rb_root idle; |
5355 |
-+ |
5356 |
-+ struct bfq_entity *first_idle; |
5357 |
-+ struct bfq_entity *last_idle; |
5358 |
-+ |
5359 |
-+ u64 vtime; |
5360 |
-+ unsigned long wsum; |
5361 |
-+}; |
5362 |
-+ |
5363 |
-+/** |
5364 |
-+ * struct bfq_sched_data - multi-class scheduler. |
5365 |
-+ * @active_entity: entity under service. |
5366 |
-+ * @next_active: head-of-the-line entity in the scheduler. |
5367 |
-+ * @service_tree: array of service trees, one per ioprio_class. |
5368 |
-+ * |
5369 |
-+ * bfq_sched_data is the basic scheduler queue. It supports three |
5370 |
-+ * ioprio_classes, and can be used either as a toplevel queue or as |
5371 |
-+ * an intermediate queue on a hierarchical setup. |
5372 |
-+ * @next_active points to the active entity of the sched_data service |
5373 |
-+ * trees that will be scheduled next. |
5374 |
-+ * |
5375 |
-+ * The supported ioprio_classes are the same as in CFQ, in descending |
5376 |
-+ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. |
5377 |
-+ * Requests from higher priority queues are served before all the |
5378 |
-+ * requests from lower priority queues; among requests of the same |
5379 |
-+ * queue requests are served according to B-WF2Q+. |
5380 |
-+ * All the fields are protected by the queue lock of the containing bfqd. |
5381 |
-+ */ |
5382 |
-+struct bfq_sched_data { |
5383 |
-+ struct bfq_entity *active_entity; |
5384 |
-+ struct bfq_entity *next_active; |
5385 |
-+ struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES]; |
5386 |
-+}; |
5387 |
-+ |
5388 |
-+/** |
5389 |
-+ * struct bfq_entity - schedulable entity. |
5390 |
-+ * @rb_node: service_tree member. |
5391 |
-+ * @on_st: flag, true if the entity is on a tree (either the active or |
5392 |
-+ * the idle one of its service_tree). |
5393 |
-+ * @finish: B-WF2Q+ finish timestamp (aka F_i). |
5394 |
-+ * @start: B-WF2Q+ start timestamp (aka S_i). |
5395 |
-+ * @tree: tree the entity is enqueued into; %NULL if not on a tree. |
5396 |
-+ * @min_start: minimum start time of the (active) subtree rooted at |
5397 |
-+ * this entity; used for O(log N) lookups into active trees. |
5398 |
-+ * @service: service received during the last round of service. |
5399 |
-+ * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight. |
5400 |
-+ * @weight: weight of the queue |
5401 |
-+ * @parent: parent entity, for hierarchical scheduling. |
5402 |
-+ * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the |
5403 |
-+ * associated scheduler queue, %NULL on leaf nodes. |
5404 |
-+ * @sched_data: the scheduler queue this entity belongs to. |
5405 |
-+ * @ioprio: the ioprio in use. |
5406 |
-+ * @new_weight: when a weight change is requested, the new weight value. |
5407 |
-+ * @orig_weight: original weight, used to implement weight boosting |
5408 |
-+ * @new_ioprio: when an ioprio change is requested, the new ioprio value. |
5409 |
-+ * @ioprio_class: the ioprio_class in use. |
5410 |
-+ * @new_ioprio_class: when an ioprio_class change is requested, the new |
5411 |
-+ * ioprio_class value. |
5412 |
-+ * @ioprio_changed: flag, true when the user requested a weight, ioprio or |
5413 |
-+ * ioprio_class change. |
5414 |
-+ * |
5415 |
-+ * A bfq_entity is used to represent either a bfq_queue (leaf node in the |
5416 |
-+ * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each |
5417 |
-+ * entity belongs to the sched_data of the parent group in the cgroup |
5418 |
-+ * hierarchy. Non-leaf entities have also their own sched_data, stored |
5419 |
-+ * in @my_sched_data. |
5420 |
-+ * |
5421 |
-+ * Each entity stores independently its priority values; this would |
5422 |
-+ * allow different weights on different devices, but this |
5423 |
-+ * functionality is not exported to userspace by now. Priorities and |
5424 |
-+ * weights are updated lazily, first storing the new values into the |
5425 |
-+ * new_* fields, then setting the @ioprio_changed flag. As soon as |
5426 |
-+ * there is a transition in the entity state that allows the priority |
5427 |
-+ * update to take place the effective and the requested priority |
5428 |
-+ * values are synchronized. |
5429 |
-+ * |
5430 |
-+ * Unless cgroups are used, the weight value is calculated from the |
5431 |
-+ * ioprio to export the same interface as CFQ. When dealing with |
5432 |
-+ * ``well-behaved'' queues (i.e., queues that do not spend too much |
5433 |
-+ * time to consume their budget and have true sequential behavior, and |
5434 |
-+ * when there are no external factors breaking anticipation) the |
5435 |
-+ * relative weights at each level of the cgroups hierarchy should be |
5436 |
-+ * guaranteed. All the fields are protected by the queue lock of the |
5437 |
-+ * containing bfqd. |
5438 |
-+ */ |
5439 |
-+struct bfq_entity { |
5440 |
-+ struct rb_node rb_node; |
5441 |
-+ |
5442 |
-+ int on_st; |
5443 |
-+ |
5444 |
-+ u64 finish; |
5445 |
-+ u64 start; |
5446 |
-+ |
5447 |
-+ struct rb_root *tree; |
5448 |
-+ |
5449 |
-+ u64 min_start; |
5450 |
-+ |
5451 |
-+ unsigned long service, budget; |
5452 |
-+ unsigned short weight, new_weight; |
5453 |
-+ unsigned short orig_weight; |
5454 |
-+ |
5455 |
-+ struct bfq_entity *parent; |
5456 |
-+ |
5457 |
-+ struct bfq_sched_data *my_sched_data; |
5458 |
-+ struct bfq_sched_data *sched_data; |
5459 |
-+ |
5460 |
-+ unsigned short ioprio, new_ioprio; |
5461 |
-+ unsigned short ioprio_class, new_ioprio_class; |
5462 |
-+ |
5463 |
-+ int ioprio_changed; |
5464 |
-+}; |
5465 |
-+ |
5466 |
-+struct bfq_group; |
5467 |
-+ |
5468 |
-+/** |
5469 |
-+ * struct bfq_queue - leaf schedulable entity. |
5470 |
-+ * @ref: reference counter. |
5471 |
-+ * @bfqd: parent bfq_data. |
5472 |
-+ * @new_bfqq: shared bfq_queue if queue is cooperating with |
5473 |
-+ * one or more other queues. |
5474 |
-+ * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree). |
5475 |
-+ * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree). |
5476 |
-+ * @sort_list: sorted list of pending requests. |
5477 |
-+ * @next_rq: if fifo isn't expired, next request to serve. |
5478 |
-+ * @queued: nr of requests queued in @sort_list. |
5479 |
-+ * @allocated: currently allocated requests. |
5480 |
-+ * @meta_pending: pending metadata requests. |
5481 |
-+ * @fifo: fifo list of requests in sort_list. |
5482 |
-+ * @entity: entity representing this queue in the scheduler. |
5483 |
-+ * @max_budget: maximum budget allowed from the feedback mechanism. |
5484 |
-+ * @budget_timeout: budget expiration (in jiffies). |
5485 |
-+ * @dispatched: number of requests on the dispatch list or inside driver. |
5486 |
-+ * @org_ioprio: saved ioprio during boosted periods. |
5487 |
-+ * @flags: status flags. |
5488 |
-+ * @bfqq_list: node for active/idle bfqq list inside our bfqd. |
5489 |
-+ * @seek_samples: number of seeks sampled |
5490 |
-+ * @seek_total: sum of the distances of the seeks sampled |
5491 |
-+ * @seek_mean: mean seek distance |
5492 |
-+ * @last_request_pos: position of the last request enqueued |
5493 |
-+ * @pid: pid of the process owning the queue, used for logging purposes. |
5494 |
-+ * @last_rais_start_time: last (idle -> weight-raised) transition attempt |
5495 |
-+ * @raising_cur_max_time: current max raising time for this queue |
5496 |
-+ * |
5497 |
-+ * A bfq_queue is a leaf request queue; it can be associated to an io_context |
5498 |
-+ * or more (if it is an async one). @cgroup holds a reference to the |
5499 |
-+ * cgroup, to be sure that it does not disappear while a bfqq still |
5500 |
-+ * references it (mostly to avoid races between request issuing and task |
5501 |
-+ * migration followed by cgroup distruction). |
5502 |
-+ * All the fields are protected by the queue lock of the containing bfqd. |
5503 |
-+ */ |
5504 |
-+struct bfq_queue { |
5505 |
-+ atomic_t ref; |
5506 |
-+ struct bfq_data *bfqd; |
5507 |
-+ |
5508 |
-+ /* fields for cooperating queues handling */ |
5509 |
-+ struct bfq_queue *new_bfqq; |
5510 |
-+ struct rb_node pos_node; |
5511 |
-+ struct rb_root *pos_root; |
5512 |
-+ |
5513 |
-+ struct rb_root sort_list; |
5514 |
-+ struct request *next_rq; |
5515 |
-+ int queued[2]; |
5516 |
-+ int allocated[2]; |
5517 |
-+ int meta_pending; |
5518 |
-+ struct list_head fifo; |
5519 |
-+ |
5520 |
-+ struct bfq_entity entity; |
5521 |
-+ |
5522 |
-+ unsigned long max_budget; |
5523 |
-+ unsigned long budget_timeout; |
5524 |
-+ |
5525 |
-+ int dispatched; |
5526 |
-+ |
5527 |
-+ unsigned short org_ioprio; |
5528 |
-+ |
5529 |
-+ unsigned int flags; |
5530 |
-+ |
5531 |
-+ struct list_head bfqq_list; |
5532 |
-+ |
5533 |
-+ unsigned int seek_samples; |
5534 |
-+ u64 seek_total; |
5535 |
-+ sector_t seek_mean; |
5536 |
-+ sector_t last_request_pos; |
5537 |
-+ |
5538 |
-+ pid_t pid; |
5539 |
-+ |
5540 |
-+ /* weight-raising fields */ |
5541 |
-+ unsigned int raising_cur_max_time; |
5542 |
-+ u64 last_rais_start_finish, soft_rt_next_start; |
5543 |
-+ unsigned int raising_coeff; |
5544 |
-+}; |
5545 |
-+ |
5546 |
-+/** |
5547 |
-+ * struct bfq_ttime - per process thinktime stats. |
5548 |
-+ * @ttime_total: total process thinktime |
5549 |
-+ * @ttime_samples: number of thinktime samples |
5550 |
-+ * @ttime_mean: average process thinktime |
5551 |
-+ */ |
5552 |
-+struct bfq_ttime { |
5553 |
-+ unsigned long last_end_request; |
5554 |
-+ |
5555 |
-+ unsigned long ttime_total; |
5556 |
-+ unsigned long ttime_samples; |
5557 |
-+ unsigned long ttime_mean; |
5558 |
-+}; |
5559 |
-+ |
5560 |
-+/** |
5561 |
-+ * struct bfq_io_cq - per (request_queue, io_context) structure. |
5562 |
-+ * @icq: associated io_cq structure |
5563 |
-+ * @bfqq: array of two process queues, the sync and the async |
5564 |
-+ * @ttime: associated @bfq_ttime struct |
5565 |
-+ */ |
5566 |
-+struct bfq_io_cq { |
5567 |
-+ struct io_cq icq; /* must be the first member */ |
5568 |
-+ struct bfq_queue *bfqq[2]; |
5569 |
-+ struct bfq_ttime ttime; |
5570 |
-+ int ioprio; |
5571 |
-+}; |
5572 |
-+ |
5573 |
-+/** |
5574 |
-+ * struct bfq_data - per device data structure. |
5575 |
-+ * @queue: request queue for the managed device. |
5576 |
-+ * @root_group: root bfq_group for the device. |
5577 |
-+ * @rq_pos_tree: rbtree sorted by next_request position, |
5578 |
-+ * used when determining if two or more queues |
5579 |
-+ * have interleaving requests (see bfq_close_cooperator). |
5580 |
-+ * @busy_queues: number of bfq_queues containing requests (including the |
5581 |
-+ * queue under service, even if it is idling). |
5582 |
-+ * @queued: number of queued requests. |
5583 |
-+ * @rq_in_driver: number of requests dispatched and waiting for completion. |
5584 |
-+ * @sync_flight: number of sync requests in the driver. |
5585 |
-+ * @max_rq_in_driver: max number of reqs in driver in the last @hw_tag_samples |
5586 |
-+ * completed requests . |
5587 |
-+ * @hw_tag_samples: nr of samples used to calculate hw_tag. |
5588 |
-+ * @hw_tag: flag set to one if the driver is showing a queueing behavior. |
5589 |
-+ * @budgets_assigned: number of budgets assigned. |
5590 |
-+ * @idle_slice_timer: timer set when idling for the next sequential request |
5591 |
-+ * from the queue under service. |
5592 |
-+ * @unplug_work: delayed work to restart dispatching on the request queue. |
5593 |
-+ * @active_queue: bfq_queue under service. |
5594 |
-+ * @active_bic: bfq_io_cq (bic) associated with the @active_queue. |
5595 |
-+ * @last_position: on-disk position of the last served request. |
5596 |
-+ * @last_budget_start: beginning of the last budget. |
5597 |
-+ * @last_idling_start: beginning of the last idle slice. |
5598 |
-+ * @peak_rate: peak transfer rate observed for a budget. |
5599 |
-+ * @peak_rate_samples: number of samples used to calculate @peak_rate. |
5600 |
-+ * @bfq_max_budget: maximum budget allotted to a bfq_queue before rescheduling. |
5601 |
-+ * @group_list: list of all the bfq_groups active on the device. |
5602 |
-+ * @active_list: list of all the bfq_queues active on the device. |
5603 |
-+ * @idle_list: list of all the bfq_queues idle on the device. |
5604 |
-+ * @bfq_quantum: max number of requests dispatched per dispatch round. |
5605 |
-+ * @bfq_fifo_expire: timeout for async/sync requests; when it expires |
5606 |
-+ * requests are served in fifo order. |
5607 |
-+ * @bfq_back_penalty: weight of backward seeks wrt forward ones. |
5608 |
-+ * @bfq_back_max: maximum allowed backward seek. |
5609 |
-+ * @bfq_slice_idle: maximum idling time. |
5610 |
-+ * @bfq_user_max_budget: user-configured max budget value (0 for auto-tuning). |
5611 |
-+ * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to |
5612 |
-+ * async queues. |
5613 |
-+ * @bfq_timeout: timeout for bfq_queues to consume their budget; used to |
5614 |
-+ * to prevent seeky queues to impose long latencies to well |
5615 |
-+ * behaved ones (this also implies that seeky queues cannot |
5616 |
-+ * receive guarantees in the service domain; after a timeout |
5617 |
-+ * they are charged for the whole allocated budget, to try |
5618 |
-+ * to preserve a behavior reasonably fair among them, but |
5619 |
-+ * without service-domain guarantees). |
5620 |
-+ * @bfq_raising_coeff: Maximum factor by which the weight of a boosted |
5621 |
-+ * queue is multiplied |
5622 |
-+ * @bfq_raising_max_time: maximum duration of a weight-raising period (jiffies) |
5623 |
-+ * @bfq_raising_rt_max_time: maximum duration for soft real-time processes |
5624 |
-+ * @bfq_raising_min_idle_time: minimum idle period after which weight-raising |
5625 |
-+ * may be reactivated for a queue (in jiffies) |
5626 |
-+ * @bfq_raising_min_inter_arr_async: minimum period between request arrivals |
5627 |
-+ * after which weight-raising may be |
5628 |
-+ * reactivated for an already busy queue |
5629 |
-+ * (in jiffies) |
5630 |
-+ * @bfq_raising_max_softrt_rate: max service-rate for a soft real-time queue, |
5631 |
-+ * sectors per seconds |
5632 |
-+ * @RT_prod: cached value of the product R*T used for computing the maximum |
5633 |
-+ * duration of the weight raising automatically |
5634 |
-+ * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions |
5635 |
-+ * |
5636 |
-+ * All the fields are protected by the @queue lock. |
5637 |
-+ */ |
5638 |
-+struct bfq_data { |
5639 |
-+ struct request_queue *queue; |
5640 |
-+ |
5641 |
-+ struct bfq_group *root_group; |
5642 |
-+ |
5643 |
-+ struct rb_root rq_pos_tree; |
5644 |
-+ |
5645 |
-+ int busy_queues; |
5646 |
-+ int queued; |
5647 |
-+ int rq_in_driver; |
5648 |
-+ int sync_flight; |
5649 |
-+ |
5650 |
-+ int max_rq_in_driver; |
5651 |
-+ int hw_tag_samples; |
5652 |
-+ int hw_tag; |
5653 |
-+ |
5654 |
-+ int budgets_assigned; |
5655 |
-+ |
5656 |
-+ struct timer_list idle_slice_timer; |
5657 |
-+ struct work_struct unplug_work; |
5658 |
-+ |
5659 |
-+ struct bfq_queue *active_queue; |
5660 |
-+ struct bfq_io_cq *active_bic; |
5661 |
-+ |
5662 |
-+ sector_t last_position; |
5663 |
-+ |
5664 |
-+ ktime_t last_budget_start; |
5665 |
-+ ktime_t last_idling_start; |
5666 |
-+ int peak_rate_samples; |
5667 |
-+ u64 peak_rate; |
5668 |
-+ unsigned long bfq_max_budget; |
5669 |
-+ |
5670 |
-+ struct hlist_head group_list; |
5671 |
-+ struct list_head active_list; |
5672 |
-+ struct list_head idle_list; |
5673 |
-+ |
5674 |
-+ unsigned int bfq_quantum; |
5675 |
-+ unsigned int bfq_fifo_expire[2]; |
5676 |
-+ unsigned int bfq_back_penalty; |
5677 |
-+ unsigned int bfq_back_max; |
5678 |
-+ unsigned int bfq_slice_idle; |
5679 |
-+ u64 bfq_class_idle_last_service; |
5680 |
-+ |
5681 |
-+ unsigned int bfq_user_max_budget; |
5682 |
-+ unsigned int bfq_max_budget_async_rq; |
5683 |
-+ unsigned int bfq_timeout[2]; |
5684 |
-+ |
5685 |
-+ bool low_latency; |
5686 |
-+ |
5687 |
-+ /* parameters of the low_latency heuristics */ |
5688 |
-+ unsigned int bfq_raising_coeff; |
5689 |
-+ unsigned int bfq_raising_max_time; |
5690 |
-+ unsigned int bfq_raising_rt_max_time; |
5691 |
-+ unsigned int bfq_raising_min_idle_time; |
5692 |
-+ unsigned int bfq_raising_min_inter_arr_async; |
5693 |
-+ unsigned int bfq_raising_max_softrt_rate; |
5694 |
-+ u64 RT_prod; |
5695 |
-+ |
5696 |
-+ struct bfq_queue oom_bfqq; |
5697 |
-+}; |
5698 |
-+ |
5699 |
-+enum bfqq_state_flags { |
5700 |
-+ BFQ_BFQQ_FLAG_busy = 0, /* has requests or is under service */ |
5701 |
-+ BFQ_BFQQ_FLAG_wait_request, /* waiting for a request */ |
5702 |
-+ BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ |
5703 |
-+ BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ |
5704 |
-+ BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ |
5705 |
-+ BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */ |
5706 |
-+ BFQ_BFQQ_FLAG_sync, /* synchronous queue */ |
5707 |
-+ BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ |
5708 |
-+ BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ |
5709 |
-+ BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */ |
5710 |
-+ BFQ_BFQQ_FLAG_some_coop_idle, /* some cooperator is inactive */ |
5711 |
-+}; |
5712 |
-+ |
5713 |
-+#define BFQ_BFQQ_FNS(name) \ |
5714 |
-+static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \ |
5715 |
-+{ \ |
5716 |
-+ (bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name); \ |
5717 |
-+} \ |
5718 |
-+static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq) \ |
5719 |
-+{ \ |
5720 |
-+ (bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name); \ |
5721 |
-+} \ |
5722 |
-+static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ |
5723 |
-+{ \ |
5724 |
-+ return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \ |
5725 |
-+} |
5726 |
-+ |
5727 |
-+BFQ_BFQQ_FNS(busy); |
5728 |
-+BFQ_BFQQ_FNS(wait_request); |
5729 |
-+BFQ_BFQQ_FNS(must_alloc); |
5730 |
-+BFQ_BFQQ_FNS(fifo_expire); |
5731 |
-+BFQ_BFQQ_FNS(idle_window); |
5732 |
-+BFQ_BFQQ_FNS(prio_changed); |
5733 |
-+BFQ_BFQQ_FNS(sync); |
5734 |
-+BFQ_BFQQ_FNS(budget_new); |
5735 |
-+BFQ_BFQQ_FNS(coop); |
5736 |
-+BFQ_BFQQ_FNS(split_coop); |
5737 |
-+BFQ_BFQQ_FNS(some_coop_idle); |
5738 |
-+#undef BFQ_BFQQ_FNS |
5739 |
-+ |
5740 |
-+/* Logging facilities. */ |
5741 |
-+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ |
5742 |
-+ blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args) |
5743 |
-+ |
5744 |
-+#define bfq_log(bfqd, fmt, args...) \ |
5745 |
-+ blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) |
5746 |
-+ |
5747 |
-+/* Expiration reasons. */ |
5748 |
-+enum bfqq_expiration { |
5749 |
-+ BFQ_BFQQ_TOO_IDLE = 0, /* queue has been idling for too long */ |
5750 |
-+ BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */ |
5751 |
-+ BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */ |
5752 |
-+ BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */ |
5753 |
-+}; |
5754 |
-+ |
5755 |
-+#ifdef CONFIG_CGROUP_BFQIO |
5756 |
-+/** |
5757 |
-+ * struct bfq_group - per (device, cgroup) data structure. |
5758 |
-+ * @entity: schedulable entity to insert into the parent group sched_data. |
5759 |
-+ * @sched_data: own sched_data, to contain child entities (they may be |
5760 |
-+ * both bfq_queues and bfq_groups). |
5761 |
-+ * @group_node: node to be inserted into the bfqio_cgroup->group_data |
5762 |
-+ * list of the containing cgroup's bfqio_cgroup. |
5763 |
-+ * @bfqd_node: node to be inserted into the @bfqd->group_list list |
5764 |
-+ * of the groups active on the same device; used for cleanup. |
5765 |
-+ * @bfqd: the bfq_data for the device this group acts upon. |
5766 |
-+ * @async_bfqq: array of async queues for all the tasks belonging to |
5767 |
-+ * the group, one queue per ioprio value per ioprio_class, |
5768 |
-+ * except for the idle class that has only one queue. |
5769 |
-+ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored). |
5770 |
-+ * @my_entity: pointer to @entity, %NULL for the toplevel group; used |
5771 |
-+ * to avoid too many special cases during group creation/migration. |
5772 |
-+ * |
5773 |
-+ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup |
5774 |
-+ * there is a set of bfq_groups, each one collecting the lower-level |
5775 |
-+ * entities belonging to the group that are acting on the same device. |
5776 |
-+ * |
5777 |
-+ * Locking works as follows: |
5778 |
-+ * o @group_node is protected by the bfqio_cgroup lock, and is accessed |
5779 |
-+ * via RCU from its readers. |
5780 |
-+ * o @bfqd is protected by the queue lock, RCU is used to access it |
5781 |
-+ * from the readers. |
5782 |
-+ * o All the other fields are protected by the @bfqd queue lock. |
5783 |
-+ */ |
5784 |
-+struct bfq_group { |
5785 |
-+ struct bfq_entity entity; |
5786 |
-+ struct bfq_sched_data sched_data; |
5787 |
-+ |
5788 |
-+ struct hlist_node group_node; |
5789 |
-+ struct hlist_node bfqd_node; |
5790 |
-+ |
5791 |
-+ void *bfqd; |
5792 |
-+ |
5793 |
-+ struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; |
5794 |
-+ struct bfq_queue *async_idle_bfqq; |
5795 |
-+ |
5796 |
-+ struct bfq_entity *my_entity; |
5797 |
-+}; |
5798 |
-+ |
5799 |
-+/** |
5800 |
-+ * struct bfqio_cgroup - bfq cgroup data structure. |
5801 |
-+ * @css: subsystem state for bfq in the containing cgroup. |
5802 |
-+ * @weight: cgroup weight. |
5803 |
-+ * @ioprio: cgroup ioprio. |
5804 |
-+ * @ioprio_class: cgroup ioprio_class. |
5805 |
-+ * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data. |
5806 |
-+ * @group_data: list containing the bfq_group belonging to this cgroup. |
5807 |
-+ * |
5808 |
-+ * @group_data is accessed using RCU, with @lock protecting the updates, |
5809 |
-+ * @ioprio and @ioprio_class are protected by @lock. |
5810 |
-+ */ |
5811 |
-+struct bfqio_cgroup { |
5812 |
-+ struct cgroup_subsys_state css; |
5813 |
-+ |
5814 |
-+ unsigned short weight, ioprio, ioprio_class; |
5815 |
-+ |
5816 |
-+ spinlock_t lock; |
5817 |
-+ struct hlist_head group_data; |
5818 |
-+}; |
5819 |
-+#else |
5820 |
-+struct bfq_group { |
5821 |
-+ struct bfq_sched_data sched_data; |
5822 |
-+ |
5823 |
-+ struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; |
5824 |
-+ struct bfq_queue *async_idle_bfqq; |
5825 |
-+}; |
5826 |
-+#endif |
5827 |
-+ |
5828 |
-+static inline struct bfq_service_tree * |
5829 |
-+bfq_entity_service_tree(struct bfq_entity *entity) |
5830 |
-+{ |
5831 |
-+ struct bfq_sched_data *sched_data = entity->sched_data; |
5832 |
-+ unsigned int idx = entity->ioprio_class - 1; |
5833 |
-+ |
5834 |
-+ BUG_ON(idx >= BFQ_IOPRIO_CLASSES); |
5835 |
-+ BUG_ON(sched_data == NULL); |
5836 |
-+ |
5837 |
-+ return sched_data->service_tree + idx; |
5838 |
-+} |
5839 |
-+ |
5840 |
-+static inline struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, |
5841 |
-+ int is_sync) |
5842 |
-+{ |
5843 |
-+ return bic->bfqq[!!is_sync]; |
5844 |
-+} |
5845 |
-+ |
5846 |
-+static inline void bic_set_bfqq(struct bfq_io_cq *bic, |
5847 |
-+ struct bfq_queue *bfqq, int is_sync) |
5848 |
-+{ |
5849 |
-+ bic->bfqq[!!is_sync] = bfqq; |
5850 |
-+} |
5851 |
-+ |
5852 |
-+static inline struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic) |
5853 |
-+{ |
5854 |
-+ return bic->icq.q->elevator->elevator_data; |
5855 |
-+} |
5856 |
-+ |
5857 |
-+/** |
5858 |
-+ * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer. |
5859 |
-+ * @ptr: a pointer to a bfqd. |
5860 |
-+ * @flags: storage for the flags to be saved. |
5861 |
-+ * |
5862 |
-+ * This function allows bfqg->bfqd to be protected by the |
5863 |
-+ * queue lock of the bfqd they reference; the pointer is dereferenced |
5864 |
-+ * under RCU, so the storage for bfqd is assured to be safe as long |
5865 |
-+ * as the RCU read side critical section does not end. After the |
5866 |
-+ * bfqd->queue->queue_lock is taken the pointer is rechecked, to be |
5867 |
-+ * sure that no other writer accessed it. If we raced with a writer, |
5868 |
-+ * the function returns NULL, with the queue unlocked, otherwise it |
5869 |
-+ * returns the dereferenced pointer, with the queue locked. |
5870 |
-+ */ |
5871 |
-+static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr, |
5872 |
-+ unsigned long *flags) |
5873 |
-+{ |
5874 |
-+ struct bfq_data *bfqd; |
5875 |
-+ |
5876 |
-+ rcu_read_lock(); |
5877 |
-+ bfqd = rcu_dereference(*(struct bfq_data **)ptr); |
5878 |
-+ |
5879 |
-+ if (bfqd != NULL) { |
5880 |
-+ spin_lock_irqsave(bfqd->queue->queue_lock, *flags); |
5881 |
-+ if (*ptr == bfqd) |
5882 |
-+ goto out; |
5883 |
-+ spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); |
5884 |
-+ } |
5885 |
-+ |
5886 |
-+ bfqd = NULL; |
5887 |
-+out: |
5888 |
-+ rcu_read_unlock(); |
5889 |
-+ return bfqd; |
5890 |
-+} |
5891 |
-+ |
5892 |
-+static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd, |
5893 |
-+ unsigned long *flags) |
5894 |
-+{ |
5895 |
-+ spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); |
5896 |
-+} |
5897 |
-+ |
5898 |
-+static void bfq_changed_ioprio(struct bfq_io_cq *bic); |
5899 |
-+static void bfq_put_queue(struct bfq_queue *bfqq); |
5900 |
-+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); |
5901 |
-+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, |
5902 |
-+ struct bfq_group *bfqg, int is_sync, |
5903 |
-+ struct bfq_io_cq *bic, gfp_t gfp_mask); |
5904 |
-+static void bfq_end_raising_async_queues(struct bfq_data *bfqd, |
5905 |
-+ struct bfq_group *bfqg); |
5906 |
-+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); |
5907 |
-+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); |
5908 |
-+#endif |
5909 |
-diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h |
5910 |
-index ffa1d1f..e5e6b0d 100644 |
5911 |
---- a/include/linux/cgroup_subsys.h |
5912 |
-+++ b/include/linux/cgroup_subsys.h |
5913 |
-@@ -85,7 +85,7 @@ SUBSYS(bcache) |
5914 |
- |
5915 |
- /* */ |
5916 |
- |
5917 |
--#ifdef CONFIG_CGROUP_BFQIO |
5918 |
-+#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_BFQIO) |
5919 |
- SUBSYS(bfqio) |
5920 |
- #endif |
5921 |
- |
5922 |
--- |
5923 |
-1.8.1.4 |
5924 |
- |
5925 |
|
5926 |
Deleted: genpatches-2.6/trunk/3.11/1803_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v6r2-for-3.10.0.patch1 |
5927 |
=================================================================== |
5928 |
--- genpatches-2.6/trunk/3.11/1803_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v6r2-for-3.10.0.patch1 2013-09-02 23:07:59 UTC (rev 2507) |
5929 |
+++ genpatches-2.6/trunk/3.11/1803_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v6r2-for-3.10.0.patch1 2013-09-02 23:10:55 UTC (rev 2508) |
5930 |
@@ -1,1049 +0,0 @@ |
5931 |
-From 9204dcb026a40cd2cb4310fecf788924d0fbec8d Mon Sep 17 00:00:00 2001 |
5932 |
-From: Mauro Andreolini <mauro.andreolini@×××××××.it> |
5933 |
-Date: Fri, 14 Jun 2013 13:46:47 +0200 |
5934 |
-Subject: [PATCH 3/3] block, bfq: add Early Queue Merge (EQM) to BFQ-v6r2 for |
5935 |
- 3.10.0 |
5936 |
- |
5937 |
-A set of processes may happen to perform interleaved reads, i.e., requests |
5938 |
-whose union would give rise to a sequential read pattern. There are two |
5939 |
-typical cases: in the first case, processes read fixed-size chunks of |
5940 |
-data at a fixed distance from each other, while in the second case processes |
5941 |
-may read variable-size chunks at variable distances. The latter case occurs |
5942 |
-for example with KVM, which splits the I/O generated by the guest into |
5943 |
-multiple chunks, and lets these chunks be served by a pool of cooperating |
5944 |
-processes, iteratively assigning the next chunk of I/O to the first |
5945 |
-available process. CFQ uses actual queue merging for the first type of |
5946 |
-processes, whereas it uses preemption to get a sequential read pattern out |
5947 |
-of the read requests performed by the second type of processes. In the end |
5948 |
-it uses two different mechanisms to achieve the same goal: boosting the |
5949 |
-throughput with interleaved I/O. |
5950 |
- |
5951 |
-This patch introduces Early Queue Merge (EQM), a unified mechanism to get a |
5952 |
-sequential read pattern with both types of processes. The main idea is |
5953 |
-checking newly arrived requests against the next request of the active queue |
5954 |
-both in case of actual request insert and in case of request merge. By doing |
5955 |
-so, both the types of processes can be handled by just merging their queues. |
5956 |
-EQM is then simpler and more compact than the pair of mechanisms used in |
5957 |
-CFQ. |
5958 |
- |
5959 |
-Finally, EQM also preserves the typical low-latency properties of BFQ, by |
5960 |
-properly restoring the weight-raising state of a queue when it gets back to |
5961 |
-a non-merged state. |
5962 |
- |
5963 |
-Signed-off-by: Mauro Andreolini <mauro.andreolini@×××××××.it> |
5964 |
-Signed-off-by: Arianna Avanzini <avanzini.arianna@×××××.com> |
5965 |
-Reviewed-by: Paolo Valente <paolo.valente@×××××××.it> |
5966 |
---- |
5967 |
- block/bfq-iosched.c | 653 ++++++++++++++++++++++++++++++++++++---------------- |
5968 |
- block/bfq-sched.c | 28 --- |
5969 |
- block/bfq.h | 16 ++ |
5970 |
- 3 files changed, 466 insertions(+), 231 deletions(-) |
5971 |
- |
5972 |
-diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c |
5973 |
-index b230927..bc57923 100644 |
5974 |
---- a/block/bfq-iosched.c |
5975 |
-+++ b/block/bfq-iosched.c |
5976 |
-@@ -444,6 +444,43 @@ static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd) |
5977 |
- return dur; |
5978 |
- } |
5979 |
- |
5980 |
-+static inline void |
5981 |
-+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) |
5982 |
-+{ |
5983 |
-+ if (bic->saved_idle_window) |
5984 |
-+ bfq_mark_bfqq_idle_window(bfqq); |
5985 |
-+ else |
5986 |
-+ bfq_clear_bfqq_idle_window(bfqq); |
5987 |
-+ if (bic->raising_time_left && bfqq->bfqd->low_latency) { |
5988 |
-+ /* |
5989 |
-+ * Start a weight raising period with the duration given by |
5990 |
-+ * the raising_time_left snapshot. |
5991 |
-+ */ |
5992 |
-+ bfqq->raising_coeff = bfqq->bfqd->bfq_raising_coeff; |
5993 |
-+ bfqq->raising_cur_max_time = bic->raising_time_left; |
5994 |
-+ bfqq->last_rais_start_finish = jiffies; |
5995 |
-+ } |
5996 |
-+ /* |
5997 |
-+ * Clear raising_time_left to prevent bfq_bfqq_save_state() from |
5998 |
-+ * getting confused about the queue's need of a weight-raising |
5999 |
-+ * period. |
6000 |
-+ */ |
6001 |
-+ bic->raising_time_left = 0; |
6002 |
-+} |
6003 |
-+ |
6004 |
-+/* |
6005 |
-+ * Must be called with the queue_lock held. |
6006 |
-+ */ |
6007 |
-+static int bfqq_process_refs(struct bfq_queue *bfqq) |
6008 |
-+{ |
6009 |
-+ int process_refs, io_refs; |
6010 |
-+ |
6011 |
-+ io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; |
6012 |
-+ process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; |
6013 |
-+ BUG_ON(process_refs < 0); |
6014 |
-+ return process_refs; |
6015 |
-+} |
6016 |
-+ |
6017 |
- static void bfq_add_rq_rb(struct request *rq) |
6018 |
- { |
6019 |
- struct bfq_queue *bfqq = RQ_BFQQ(rq); |
6020 |
-@@ -483,11 +520,20 @@ static void bfq_add_rq_rb(struct request *rq) |
6021 |
- if (! bfqd->low_latency) |
6022 |
- goto add_bfqq_busy; |
6023 |
- |
6024 |
-+ if (bfq_bfqq_just_split(bfqq)) |
6025 |
-+ goto set_ioprio_changed; |
6026 |
-+ |
6027 |
- /* |
6028 |
-- * If the queue is not being boosted and has been idle |
6029 |
-- * for enough time, start a weight-raising period |
6030 |
-+ * If the queue: |
6031 |
-+ * - is not being boosted, |
6032 |
-+ * - has been idle for enough time, |
6033 |
-+ * - is not a sync queue or is linked to a bfq_io_cq (it is |
6034 |
-+ * shared "for its nature" or it is not shared and its |
6035 |
-+ * requests have not been redirected to a shared queue) |
6036 |
-+ * start a weight-raising period. |
6037 |
- */ |
6038 |
-- if(old_raising_coeff == 1 && (idle_for_long_time || soft_rt)) { |
6039 |
-+ if(old_raising_coeff == 1 && (idle_for_long_time || soft_rt) && |
6040 |
-+ (!bfq_bfqq_sync(bfqq) || bfqq->bic != NULL)) { |
6041 |
- bfqq->raising_coeff = bfqd->bfq_raising_coeff; |
6042 |
- if (idle_for_long_time) |
6043 |
- bfqq->raising_cur_max_time = |
6044 |
-@@ -517,6 +563,7 @@ static void bfq_add_rq_rb(struct request *rq) |
6045 |
- raising_cur_max_time)); |
6046 |
- } |
6047 |
- } |
6048 |
-+set_ioprio_changed: |
6049 |
- if (old_raising_coeff != bfqq->raising_coeff) |
6050 |
- entity->ioprio_changed = 1; |
6051 |
- add_bfqq_busy: |
6052 |
-@@ -695,89 +742,35 @@ static void bfq_end_raising(struct bfq_data *bfqd) |
6053 |
- spin_unlock_irq(bfqd->queue->queue_lock); |
6054 |
- } |
6055 |
- |
6056 |
--static int bfq_allow_merge(struct request_queue *q, struct request *rq, |
6057 |
-- struct bio *bio) |
6058 |
-+static inline sector_t bfq_io_struct_pos(void *io_struct, bool request) |
6059 |
- { |
6060 |
-- struct bfq_data *bfqd = q->elevator->elevator_data; |
6061 |
-- struct bfq_io_cq *bic; |
6062 |
-- struct bfq_queue *bfqq; |
6063 |
-- |
6064 |
-- /* |
6065 |
-- * Disallow merge of a sync bio into an async request. |
6066 |
-- */ |
6067 |
-- if (bfq_bio_sync(bio) && !rq_is_sync(rq)) |
6068 |
-- return 0; |
6069 |
-- |
6070 |
-- /* |
6071 |
-- * Lookup the bfqq that this bio will be queued with. Allow |
6072 |
-- * merge only if rq is queued there. |
6073 |
-- * Queue lock is held here. |
6074 |
-- */ |
6075 |
-- bic = bfq_bic_lookup(bfqd, current->io_context); |
6076 |
-- if (bic == NULL) |
6077 |
-- return 0; |
6078 |
-- |
6079 |
-- bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); |
6080 |
-- return bfqq == RQ_BFQQ(rq); |
6081 |
--} |
6082 |
-- |
6083 |
--static void __bfq_set_active_queue(struct bfq_data *bfqd, |
6084 |
-- struct bfq_queue *bfqq) |
6085 |
--{ |
6086 |
-- if (bfqq != NULL) { |
6087 |
-- bfq_mark_bfqq_must_alloc(bfqq); |
6088 |
-- bfq_mark_bfqq_budget_new(bfqq); |
6089 |
-- bfq_clear_bfqq_fifo_expire(bfqq); |
6090 |
-- |
6091 |
-- bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; |
6092 |
-- |
6093 |
-- bfq_log_bfqq(bfqd, bfqq, "set_active_queue, cur-budget = %lu", |
6094 |
-- bfqq->entity.budget); |
6095 |
-- } |
6096 |
-- |
6097 |
-- bfqd->active_queue = bfqq; |
6098 |
--} |
6099 |
-- |
6100 |
--/* |
6101 |
-- * Get and set a new active queue for service. |
6102 |
-- */ |
6103 |
--static struct bfq_queue *bfq_set_active_queue(struct bfq_data *bfqd, |
6104 |
-- struct bfq_queue *bfqq) |
6105 |
--{ |
6106 |
-- if (!bfqq) |
6107 |
-- bfqq = bfq_get_next_queue(bfqd); |
6108 |
-+ if (request) |
6109 |
-+ return blk_rq_pos(io_struct); |
6110 |
- else |
6111 |
-- bfq_get_next_queue_forced(bfqd, bfqq); |
6112 |
-- |
6113 |
-- __bfq_set_active_queue(bfqd, bfqq); |
6114 |
-- return bfqq; |
6115 |
-+ return ((struct bio *)io_struct)->bi_sector; |
6116 |
- } |
6117 |
- |
6118 |
--static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd, |
6119 |
-- struct request *rq) |
6120 |
-+static inline sector_t bfq_dist_from(sector_t pos1, |
6121 |
-+ sector_t pos2) |
6122 |
- { |
6123 |
-- if (blk_rq_pos(rq) >= bfqd->last_position) |
6124 |
-- return blk_rq_pos(rq) - bfqd->last_position; |
6125 |
-+ if (pos1 >= pos2) |
6126 |
-+ return pos1 - pos2; |
6127 |
- else |
6128 |
-- return bfqd->last_position - blk_rq_pos(rq); |
6129 |
-+ return pos2 - pos1; |
6130 |
- } |
6131 |
- |
6132 |
--/* |
6133 |
-- * Return true if bfqq has no request pending and rq is close enough to |
6134 |
-- * bfqd->last_position, or if rq is closer to bfqd->last_position than |
6135 |
-- * bfqq->next_rq |
6136 |
-- */ |
6137 |
--static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq) |
6138 |
-+static inline int bfq_rq_close_to_sector(void *io_struct, bool request, |
6139 |
-+ sector_t sector) |
6140 |
- { |
6141 |
-- return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR; |
6142 |
-+ return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <= |
6143 |
-+ BFQQ_SEEK_THR; |
6144 |
- } |
6145 |
- |
6146 |
--static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) |
6147 |
-+static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector) |
6148 |
- { |
6149 |
- struct rb_root *root = &bfqd->rq_pos_tree; |
6150 |
- struct rb_node *parent, *node; |
6151 |
- struct bfq_queue *__bfqq; |
6152 |
-- sector_t sector = bfqd->last_position; |
6153 |
- |
6154 |
- if (RB_EMPTY_ROOT(root)) |
6155 |
- return NULL; |
6156 |
-@@ -796,7 +789,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) |
6157 |
- * position). |
6158 |
- */ |
6159 |
- __bfqq = rb_entry(parent, struct bfq_queue, pos_node); |
6160 |
-- if (bfq_rq_close(bfqd, __bfqq->next_rq)) |
6161 |
-+ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) |
6162 |
- return __bfqq; |
6163 |
- |
6164 |
- if (blk_rq_pos(__bfqq->next_rq) < sector) |
6165 |
-@@ -807,7 +800,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) |
6166 |
- return NULL; |
6167 |
- |
6168 |
- __bfqq = rb_entry(node, struct bfq_queue, pos_node); |
6169 |
-- if (bfq_rq_close(bfqd, __bfqq->next_rq)) |
6170 |
-+ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) |
6171 |
- return __bfqq; |
6172 |
- |
6173 |
- return NULL; |
6174 |
-@@ -816,14 +809,12 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) |
6175 |
- /* |
6176 |
- * bfqd - obvious |
6177 |
- * cur_bfqq - passed in so that we don't decide that the current queue |
6178 |
-- * is closely cooperating with itself. |
6179 |
-- * |
6180 |
-- * We are assuming that cur_bfqq has dispatched at least one request, |
6181 |
-- * and that bfqd->last_position reflects a position on the disk associated |
6182 |
-- * with the I/O issued by cur_bfqq. |
6183 |
-+ * is closely cooperating with itself |
6184 |
-+ * sector - used as a reference point to search for a close queue |
6185 |
- */ |
6186 |
- static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, |
6187 |
-- struct bfq_queue *cur_bfqq) |
6188 |
-+ struct bfq_queue *cur_bfqq, |
6189 |
-+ sector_t sector) |
6190 |
- { |
6191 |
- struct bfq_queue *bfqq; |
6192 |
- |
6193 |
-@@ -843,7 +834,7 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, |
6194 |
- * working closely on the same area of the disk. In that case, |
6195 |
- * we can group them together and don't waste time idling. |
6196 |
- */ |
6197 |
-- bfqq = bfqq_close(bfqd); |
6198 |
-+ bfqq = bfqq_close(bfqd, sector); |
6199 |
- if (bfqq == NULL || bfqq == cur_bfqq) |
6200 |
- return NULL; |
6201 |
- |
6202 |
-@@ -870,6 +861,275 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, |
6203 |
- return bfqq; |
6204 |
- } |
6205 |
- |
6206 |
-+static struct bfq_queue * |
6207 |
-+bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) |
6208 |
-+{ |
6209 |
-+ int process_refs, new_process_refs; |
6210 |
-+ struct bfq_queue *__bfqq; |
6211 |
-+ |
6212 |
-+ /* |
6213 |
-+ * If there are no process references on the new_bfqq, then it is |
6214 |
-+ * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain |
6215 |
-+ * may have dropped their last reference (not just their last process |
6216 |
-+ * reference). |
6217 |
-+ */ |
6218 |
-+ if (!bfqq_process_refs(new_bfqq)) |
6219 |
-+ return NULL; |
6220 |
-+ |
6221 |
-+ /* Avoid a circular list and skip interim queue merges. */ |
6222 |
-+ while ((__bfqq = new_bfqq->new_bfqq)) { |
6223 |
-+ if (__bfqq == bfqq) |
6224 |
-+ return NULL; |
6225 |
-+ new_bfqq = __bfqq; |
6226 |
-+ } |
6227 |
-+ |
6228 |
-+ process_refs = bfqq_process_refs(bfqq); |
6229 |
-+ new_process_refs = bfqq_process_refs(new_bfqq); |
6230 |
-+ /* |
6231 |
-+ * If the process for the bfqq has gone away, there is no |
6232 |
-+ * sense in merging the queues. |
6233 |
-+ */ |
6234 |
-+ if (process_refs == 0 || new_process_refs == 0) |
6235 |
-+ return NULL; |
6236 |
-+ |
6237 |
-+ bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", |
6238 |
-+ new_bfqq->pid); |
6239 |
-+ |
6240 |
-+ /* |
6241 |
-+ * Merging is just a redirection: the requests of the process owning |
6242 |
-+ * one of the two queues are redirected to the other queue. The latter |
6243 |
-+ * queue, in its turn, is set as shared if this is the first time that |
6244 |
-+ * the requests of some process are redirected to it. |
6245 |
-+ * |
6246 |
-+ * We redirect bfqq to new_bfqq and not the opposite, because we |
6247 |
-+ * are in the context of the process owning bfqq, hence we have the |
6248 |
-+ * io_cq of this process. So we can immediately configure this io_cq |
6249 |
-+ * to redirect the requests of the process to new_bfqq. |
6250 |
-+ * |
6251 |
-+ * NOTE, even if new_bfqq coincides with the active queue, the io_cq of |
6252 |
-+ * new_bfqq is not available, because, if the active queue is shared, |
6253 |
-+ * bfqd->active_bic may not point to the io_cq of the active queue. |
6254 |
-+ * Redirecting the requests of the process owning bfqq to the currently |
6255 |
-+ * active queue is in any case the best option, as we feed the active queue |
6256 |
-+ * with new requests close to the last request served and, by doing so, |
6257 |
-+ * hopefully increase the throughput. |
6258 |
-+ */ |
6259 |
-+ bfqq->new_bfqq = new_bfqq; |
6260 |
-+ atomic_add(process_refs, &new_bfqq->ref); |
6261 |
-+ return new_bfqq; |
6262 |
-+} |
6263 |
-+ |
6264 |
-+/* |
6265 |
-+ * Attempt to schedule a merge of bfqq with the currently active queue or |
6266 |
-+ * with a close queue among the scheduled queues. |
6267 |
-+ * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue |
6268 |
-+ * structure otherwise. |
6269 |
-+ */ |
6270 |
-+static struct bfq_queue * |
6271 |
-+bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
6272 |
-+ void *io_struct, bool request) |
6273 |
-+{ |
6274 |
-+ struct bfq_queue *active_bfqq, *new_bfqq; |
6275 |
-+ |
6276 |
-+ if (bfqq->new_bfqq) |
6277 |
-+ return bfqq->new_bfqq; |
6278 |
-+ |
6279 |
-+ if (!io_struct) |
6280 |
-+ return NULL; |
6281 |
-+ |
6282 |
-+ active_bfqq = bfqd->active_queue; |
6283 |
-+ |
6284 |
-+ if (active_bfqq == NULL || active_bfqq == bfqq || !bfqd->active_bic) |
6285 |
-+ goto check_scheduled; |
6286 |
-+ |
6287 |
-+ if (bfq_class_idle(active_bfqq) || bfq_class_idle(bfqq)) |
6288 |
-+ goto check_scheduled; |
6289 |
-+ |
6290 |
-+ if (bfq_class_rt(active_bfqq) != bfq_class_rt(bfqq)) |
6291 |
-+ goto check_scheduled; |
6292 |
-+ |
6293 |
-+ if (active_bfqq->entity.parent != bfqq->entity.parent) |
6294 |
-+ goto check_scheduled; |
6295 |
-+ |
6296 |
-+ if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && |
6297 |
-+ bfq_bfqq_sync(active_bfqq) && bfq_bfqq_sync(bfqq)) |
6298 |
-+ if ((new_bfqq = bfq_setup_merge(bfqq, active_bfqq))) |
6299 |
-+ return new_bfqq; /* Merge with the active queue */ |
6300 |
-+ |
6301 |
-+ /* |
6302 |
-+ * Check whether there is a cooperator among currently scheduled |
6303 |
-+ * queues. The only thing we need is that the bio/request is not |
6304 |
-+ * NULL, as we need it to establish whether a cooperator exists. |
6305 |
-+ */ |
6306 |
-+check_scheduled: |
6307 |
-+ new_bfqq = bfq_close_cooperator(bfqd, bfqq, |
6308 |
-+ bfq_io_struct_pos(io_struct, request)); |
6309 |
-+ if (new_bfqq) |
6310 |
-+ return bfq_setup_merge(bfqq, new_bfqq); |
6311 |
-+ |
6312 |
-+ return NULL; |
6313 |
-+} |
6314 |
-+ |
6315 |
-+static inline void |
6316 |
-+bfq_bfqq_save_state(struct bfq_queue *bfqq) |
6317 |
-+{ |
6318 |
-+ /* |
6319 |
-+ * If bfqq->bic == NULL, the queue is already shared or its requests |
6320 |
-+ * have already been redirected to a shared queue; both idle window |
6321 |
-+ * and weight raising state have already been saved. Do nothing. |
6322 |
-+ */ |
6323 |
-+ if (bfqq->bic == NULL) |
6324 |
-+ return; |
6325 |
-+ if (bfqq->bic->raising_time_left) |
6326 |
-+ /* |
6327 |
-+ * This is the queue of a just-started process, and would |
6328 |
-+ * deserve weight raising: we set raising_time_left to the full |
6329 |
-+ * weight-raising duration to trigger weight-raising when and |
6330 |
-+ * if the queue is split and the first request of the queue |
6331 |
-+ * is enqueued. |
6332 |
-+ */ |
6333 |
-+ bfqq->bic->raising_time_left = bfq_wrais_duration(bfqq->bfqd); |
6334 |
-+ else if (bfqq->raising_coeff > 1) { |
6335 |
-+ unsigned long wrais_duration = |
6336 |
-+ jiffies - bfqq->last_rais_start_finish; |
6337 |
-+ /* |
6338 |
-+ * It may happen that a queue's weight raising period lasts |
6339 |
-+ * longer than its raising_cur_max_time, as weight raising is |
6340 |
-+ * handled only when a request is enqueued or dispatched (it |
6341 |
-+ * does not use any timer). If the weight raising period is |
6342 |
-+ * about to end, don't save it. |
6343 |
-+ */ |
6344 |
-+ if (bfqq->raising_cur_max_time <= wrais_duration) |
6345 |
-+ bfqq->bic->raising_time_left = 0; |
6346 |
-+ else |
6347 |
-+ bfqq->bic->raising_time_left = |
6348 |
-+ bfqq->raising_cur_max_time - wrais_duration; |
6349 |
-+ /* |
6350 |
-+ * The bfq_queue is becoming shared or the requests of the |
6351 |
-+ * process owning the queue are being redirected to a shared |
6352 |
-+ * queue. Stop the weight raising period of the queue, as in |
6353 |
-+ * both cases it should not be owned by an interactive or soft |
6354 |
-+ * real-time application. |
6355 |
-+ */ |
6356 |
-+ bfq_bfqq_end_raising(bfqq); |
6357 |
-+ } else |
6358 |
-+ bfqq->bic->raising_time_left = 0; |
6359 |
-+ bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); |
6360 |
-+} |
6361 |
-+ |
6362 |
-+static inline void |
6363 |
-+bfq_get_bic_reference(struct bfq_queue *bfqq) |
6364 |
-+{ |
6365 |
-+ /* |
6366 |
-+ * If bfqq->bic has a non-NULL value, the bic to which it belongs |
6367 |
-+ * is about to begin using a shared bfq_queue. |
6368 |
-+ */ |
6369 |
-+ if (bfqq->bic) |
6370 |
-+ atomic_long_inc(&bfqq->bic->icq.ioc->refcount); |
6371 |
-+} |
6372 |
-+ |
6373 |
-+static void |
6374 |
-+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, |
6375 |
-+ struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) |
6376 |
-+{ |
6377 |
-+ bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", |
6378 |
-+ (long unsigned)new_bfqq->pid); |
6379 |
-+ /* Save weight raising and idle window of the merged queues */ |
6380 |
-+ bfq_bfqq_save_state(bfqq); |
6381 |
-+ bfq_bfqq_save_state(new_bfqq); |
6382 |
-+ /* |
6383 |
-+ * Grab a reference to the bic, to prevent it from being destroyed |
6384 |
-+ * before being possibly touched by a bfq_split_bfqq(). |
6385 |
-+ */ |
6386 |
-+ bfq_get_bic_reference(bfqq); |
6387 |
-+ bfq_get_bic_reference(new_bfqq); |
6388 |
-+ /* Merge queues (that is, let bic redirect its requests to new_bfqq) */ |
6389 |
-+ bic_set_bfqq(bic, new_bfqq, 1); |
6390 |
-+ bfq_mark_bfqq_coop(new_bfqq); |
6391 |
-+ /* |
6392 |
-+ * new_bfqq now belongs to at least two bics (it is a shared queue): set |
6393 |
-+ * new_bfqq->bic to NULL. bfqq either: |
6394 |
-+ * - does not belong to any bic any more, and hence bfqq->bic must |
6395 |
-+ * be set to NULL, or |
6396 |
-+ * - is a queue whose owning bics have already been redirected to a |
6397 |
-+ * different queue, hence the queue is destined to not belong to any |
6398 |
-+ * bic soon and bfqq->bic is already NULL (therefore the next |
6399 |
-+ * assignment causes no harm). |
6400 |
-+ */ |
6401 |
-+ new_bfqq->bic = NULL; |
6402 |
-+ bfqq->bic = NULL; |
6403 |
-+ bfq_put_queue(bfqq); |
6404 |
-+} |
6405 |
-+ |
6406 |
-+static int bfq_allow_merge(struct request_queue *q, struct request *rq, |
6407 |
-+ struct bio *bio) |
6408 |
-+{ |
6409 |
-+ struct bfq_data *bfqd = q->elevator->elevator_data; |
6410 |
-+ struct bfq_io_cq *bic; |
6411 |
-+ struct bfq_queue *bfqq, *new_bfqq; |
6412 |
-+ |
6413 |
-+ /* |
6414 |
-+ * Disallow merge of a sync bio into an async request. |
6415 |
-+ */ |
6416 |
-+ if (bfq_bio_sync(bio) && !rq_is_sync(rq)) |
6417 |
-+ return 0; |
6418 |
-+ |
6419 |
-+ /* |
6420 |
-+ * Lookup the bfqq that this bio will be queued with. Allow |
6421 |
-+ * merge only if rq is queued there. |
6422 |
-+ * Queue lock is held here. |
6423 |
-+ */ |
6424 |
-+ bic = bfq_bic_lookup(bfqd, current->io_context); |
6425 |
-+ if (bic == NULL) |
6426 |
-+ return 0; |
6427 |
-+ |
6428 |
-+ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); |
6429 |
-+ /* |
6430 |
-+ * We take advantage of this function to perform an early merge |
6431 |
-+ * of the queues of possible cooperating processes. |
6432 |
-+ */ |
6433 |
-+ if (bfqq != NULL && |
6434 |
-+ (new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false))) { |
6435 |
-+ bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq); |
6436 |
-+ /* |
6437 |
-+ * If we get here, the bio will be queued in the shared queue, |
6438 |
-+ * i.e., new_bfqq, so use new_bfqq to decide whether bio and |
6439 |
-+ * rq can be merged. |
6440 |
-+ */ |
6441 |
-+ bfqq = new_bfqq; |
6442 |
-+ } |
6443 |
-+ |
6444 |
-+ return bfqq == RQ_BFQQ(rq); |
6445 |
-+} |
6446 |
-+ |
6447 |
-+static void __bfq_set_active_queue(struct bfq_data *bfqd, |
6448 |
-+ struct bfq_queue *bfqq) |
6449 |
-+{ |
6450 |
-+ if (bfqq != NULL) { |
6451 |
-+ bfq_mark_bfqq_must_alloc(bfqq); |
6452 |
-+ bfq_mark_bfqq_budget_new(bfqq); |
6453 |
-+ bfq_clear_bfqq_fifo_expire(bfqq); |
6454 |
-+ |
6455 |
-+ bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; |
6456 |
-+ |
6457 |
-+ bfq_log_bfqq(bfqd, bfqq, "set_active_queue, cur-budget = %lu", |
6458 |
-+ bfqq->entity.budget); |
6459 |
-+ } |
6460 |
-+ |
6461 |
-+ bfqd->active_queue = bfqq; |
6462 |
-+} |
6463 |
-+ |
6464 |
-+/* |
6465 |
-+ * Get and set a new active queue for service. |
6466 |
-+ */ |
6467 |
-+static struct bfq_queue *bfq_set_active_queue(struct bfq_data *bfqd) |
6468 |
-+{ |
6469 |
-+ struct bfq_queue *bfqq = bfq_get_next_queue(bfqd); |
6470 |
-+ |
6471 |
-+ __bfq_set_active_queue(bfqd, bfqq); |
6472 |
-+ return bfqq; |
6473 |
-+} |
6474 |
-+ |
6475 |
- /* |
6476 |
- * If enough samples have been computed, return the current max budget |
6477 |
- * stored in bfqd, which is dynamically updated according to the |
6478 |
-@@ -1017,63 +1277,6 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq) |
6479 |
- return rq; |
6480 |
- } |
6481 |
- |
6482 |
--/* |
6483 |
-- * Must be called with the queue_lock held. |
6484 |
-- */ |
6485 |
--static int bfqq_process_refs(struct bfq_queue *bfqq) |
6486 |
--{ |
6487 |
-- int process_refs, io_refs; |
6488 |
-- |
6489 |
-- io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; |
6490 |
-- process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; |
6491 |
-- BUG_ON(process_refs < 0); |
6492 |
-- return process_refs; |
6493 |
--} |
6494 |
-- |
6495 |
--static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) |
6496 |
--{ |
6497 |
-- int process_refs, new_process_refs; |
6498 |
-- struct bfq_queue *__bfqq; |
6499 |
-- |
6500 |
-- /* |
6501 |
-- * If there are no process references on the new_bfqq, then it is |
6502 |
-- * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain |
6503 |
-- * may have dropped their last reference (not just their last process |
6504 |
-- * reference). |
6505 |
-- */ |
6506 |
-- if (!bfqq_process_refs(new_bfqq)) |
6507 |
-- return; |
6508 |
-- |
6509 |
-- /* Avoid a circular list and skip interim queue merges. */ |
6510 |
-- while ((__bfqq = new_bfqq->new_bfqq)) { |
6511 |
-- if (__bfqq == bfqq) |
6512 |
-- return; |
6513 |
-- new_bfqq = __bfqq; |
6514 |
-- } |
6515 |
-- |
6516 |
-- process_refs = bfqq_process_refs(bfqq); |
6517 |
-- new_process_refs = bfqq_process_refs(new_bfqq); |
6518 |
-- /* |
6519 |
-- * If the process for the bfqq has gone away, there is no |
6520 |
-- * sense in merging the queues. |
6521 |
-- */ |
6522 |
-- if (process_refs == 0 || new_process_refs == 0) |
6523 |
-- return; |
6524 |
-- |
6525 |
-- /* |
6526 |
-- * Merge in the direction of the lesser amount of work. |
6527 |
-- */ |
6528 |
-- if (new_process_refs >= process_refs) { |
6529 |
-- bfqq->new_bfqq = new_bfqq; |
6530 |
-- atomic_add(process_refs, &new_bfqq->ref); |
6531 |
-- } else { |
6532 |
-- new_bfqq->new_bfqq = bfqq; |
6533 |
-- atomic_add(new_process_refs, &bfqq->ref); |
6534 |
-- } |
6535 |
-- bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", |
6536 |
-- new_bfqq->pid); |
6537 |
--} |
6538 |
-- |
6539 |
- static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) |
6540 |
- { |
6541 |
- struct bfq_entity *entity = &bfqq->entity; |
6542 |
-@@ -1493,6 +1696,14 @@ static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) |
6543 |
- * is likely to boost the disk throughput); |
6544 |
- * - the queue is weight-raised (waiting for the request is necessary for |
6545 |
- * providing the queue with fairness and latency guarantees). |
6546 |
-+ * |
6547 |
-+ * In any case, idling can be disabled for cooperation issues, if |
6548 |
-+ * 1) there is a close cooperator for the queue, or |
6549 |
-+ * 2) the queue is shared and some cooperator is likely to be idle (in this |
6550 |
-+ * case, by not arming the idle timer, we try to slow down the queue, to |
6551 |
-+ * prevent the zones of the disk accessed by the active cooperators to |
6552 |
-+ * become too distant from the zone that will be accessed by the currently |
6553 |
-+ * idle cooperators). |
6554 |
- */ |
6555 |
- static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq, |
6556 |
- int budg_timeout) |
6557 |
-@@ -1507,7 +1718,7 @@ static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq, |
6558 |
- (bfqd->rq_in_driver == 0 || |
6559 |
- budg_timeout || |
6560 |
- bfqq->raising_coeff > 1) && |
6561 |
-- !bfq_close_cooperator(bfqd, bfqq) && |
6562 |
-+ !bfq_close_cooperator(bfqd, bfqq, bfqd->last_position) && |
6563 |
- (!bfq_bfqq_coop(bfqq) || |
6564 |
- !bfq_bfqq_some_coop_idle(bfqq)) && |
6565 |
- !bfq_queue_nonrot_noidle(bfqd, bfqq)); |
6566 |
-@@ -1519,7 +1730,7 @@ static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq, |
6567 |
- */ |
6568 |
- static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) |
6569 |
- { |
6570 |
-- struct bfq_queue *bfqq, *new_bfqq = NULL; |
6571 |
-+ struct bfq_queue *bfqq; |
6572 |
- struct request *next_rq; |
6573 |
- enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; |
6574 |
- int budg_timeout; |
6575 |
-@@ -1530,17 +1741,6 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) |
6576 |
- |
6577 |
- bfq_log_bfqq(bfqd, bfqq, "select_queue: already active queue"); |
6578 |
- |
6579 |
-- /* |
6580 |
-- * If another queue has a request waiting within our mean seek |
6581 |
-- * distance, let it run. The expire code will check for close |
6582 |
-- * cooperators and put the close queue at the front of the |
6583 |
-- * service tree. If possible, merge the expiring queue with the |
6584 |
-- * new bfqq. |
6585 |
-- */ |
6586 |
-- new_bfqq = bfq_close_cooperator(bfqd, bfqq); |
6587 |
-- if (new_bfqq != NULL && bfqq->new_bfqq == NULL) |
6588 |
-- bfq_setup_merge(bfqq, new_bfqq); |
6589 |
-- |
6590 |
- budg_timeout = bfq_may_expire_for_budg_timeout(bfqq); |
6591 |
- if (budg_timeout && |
6592 |
- !bfq_bfqq_must_idle(bfqq, budg_timeout)) |
6593 |
-@@ -1577,10 +1777,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) |
6594 |
- bfq_clear_bfqq_wait_request(bfqq); |
6595 |
- del_timer(&bfqd->idle_slice_timer); |
6596 |
- } |
6597 |
-- if (new_bfqq == NULL) |
6598 |
-- goto keep_queue; |
6599 |
-- else |
6600 |
-- goto expire; |
6601 |
-+ goto keep_queue; |
6602 |
- } |
6603 |
- } |
6604 |
- |
6605 |
-@@ -1589,26 +1786,19 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) |
6606 |
- * queue still has requests in flight or is idling for a new request, |
6607 |
- * then keep it. |
6608 |
- */ |
6609 |
-- if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) || |
6610 |
-+ if (timer_pending(&bfqd->idle_slice_timer) || |
6611 |
- (bfqq->dispatched != 0 && |
6612 |
- (bfq_bfqq_idle_window(bfqq) || bfqq->raising_coeff > 1) && |
6613 |
-- !bfq_queue_nonrot_noidle(bfqd, bfqq)))) { |
6614 |
-+ !bfq_queue_nonrot_noidle(bfqd, bfqq))) { |
6615 |
- bfqq = NULL; |
6616 |
- goto keep_queue; |
6617 |
-- } else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) { |
6618 |
-- /* |
6619 |
-- * Expiring the queue because there is a close cooperator, |
6620 |
-- * cancel timer. |
6621 |
-- */ |
6622 |
-- bfq_clear_bfqq_wait_request(bfqq); |
6623 |
-- del_timer(&bfqd->idle_slice_timer); |
6624 |
- } |
6625 |
- |
6626 |
- reason = BFQ_BFQQ_NO_MORE_REQUESTS; |
6627 |
- expire: |
6628 |
- bfq_bfqq_expire(bfqd, bfqq, 0, reason); |
6629 |
- new_queue: |
6630 |
-- bfqq = bfq_set_active_queue(bfqd, new_bfqq); |
6631 |
-+ bfqq = bfq_set_active_queue(bfqd); |
6632 |
- bfq_log(bfqd, "select_queue: new queue %d returned", |
6633 |
- bfqq != NULL ? bfqq->pid : 0); |
6634 |
- keep_queue: |
6635 |
-@@ -1617,9 +1807,8 @@ keep_queue: |
6636 |
- |
6637 |
- static void update_raising_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) |
6638 |
- { |
6639 |
-+ struct bfq_entity *entity = &bfqq->entity; |
6640 |
- if (bfqq->raising_coeff > 1) { /* queue is being boosted */ |
6641 |
-- struct bfq_entity *entity = &bfqq->entity; |
6642 |
-- |
6643 |
- bfq_log_bfqq(bfqd, bfqq, |
6644 |
- "raising period dur %u/%u msec, " |
6645 |
- "old raising coeff %u, w %d(%d)", |
6646 |
-@@ -1656,12 +1845,14 @@ static void update_raising_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) |
6647 |
- jiffies_to_msecs(bfqq-> |
6648 |
- raising_cur_max_time)); |
6649 |
- bfq_bfqq_end_raising(bfqq); |
6650 |
-- __bfq_entity_update_weight_prio( |
6651 |
-- bfq_entity_service_tree(entity), |
6652 |
-- entity); |
6653 |
- } |
6654 |
- } |
6655 |
- } |
6656 |
-+ /* Update weight both if it must be raised and if it must be lowered */ |
6657 |
-+ if ((entity->weight > entity->orig_weight) != (bfqq->raising_coeff > 1)) |
6658 |
-+ __bfq_entity_update_weight_prio( |
6659 |
-+ bfq_entity_service_tree(entity), |
6660 |
-+ entity); |
6661 |
- } |
6662 |
- |
6663 |
- /* |
6664 |
-@@ -1901,6 +2092,25 @@ static void bfq_init_icq(struct io_cq *icq) |
6665 |
- struct bfq_io_cq *bic = icq_to_bic(icq); |
6666 |
- |
6667 |
- bic->ttime.last_end_request = jiffies; |
6668 |
-+ /* |
6669 |
-+ * A newly created bic indicates that the process has just |
6670 |
-+ * started doing I/O, and is probably mapping into memory its |
6671 |
-+ * executable and libraries: it definitely needs weight raising. |
6672 |
-+ * There is however the possibility that the process performs, |
6673 |
-+ * for a while, I/O close to some other process. EQM intercepts |
6674 |
-+ * this behavior and may merge the queue corresponding to the |
6675 |
-+ * process with some other queue, BEFORE the weight of the queue |
6676 |
-+ * is raised. Merged queues are not weight-raised (they are assumed |
6677 |
-+ * to belong to processes that benefit only from high throughput). |
6678 |
-+ * If the merge is basically the consequence of an accident, then |
6679 |
-+ * the queue will be split soon and will get back its old weight. |
6680 |
-+ * It is then important to write down somewhere that this queue |
6681 |
-+ * does need weight raising, even if it did not make it to get its |
6682 |
-+ * weight raised before being merged. To this purpose, we overload |
6683 |
-+ * the field raising_time_left and assign 1 to it, to mark the queue |
6684 |
-+ * as needing weight raising. |
6685 |
-+ */ |
6686 |
-+ bic->raising_time_left = 1; |
6687 |
- } |
6688 |
- |
6689 |
- static void bfq_exit_icq(struct io_cq *icq) |
6690 |
-@@ -1914,6 +2124,13 @@ static void bfq_exit_icq(struct io_cq *icq) |
6691 |
- } |
6692 |
- |
6693 |
- if (bic->bfqq[BLK_RW_SYNC]) { |
6694 |
-+ /* |
6695 |
-+ * If the bic is using a shared queue, put the reference |
6696 |
-+ * taken on the io_context when the bic started using a |
6697 |
-+ * shared bfq_queue. |
6698 |
-+ */ |
6699 |
-+ if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC])) |
6700 |
-+ put_io_context(icq->ioc); |
6701 |
- bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]); |
6702 |
- bic->bfqq[BLK_RW_SYNC] = NULL; |
6703 |
- } |
6704 |
-@@ -2211,6 +2428,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, |
6705 |
- if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) |
6706 |
- return; |
6707 |
- |
6708 |
-+ /* Idle window just restored, statistics are meaningless. */ |
6709 |
-+ if (bfq_bfqq_just_split(bfqq)) |
6710 |
-+ return; |
6711 |
-+ |
6712 |
- enable_idle = bfq_bfqq_idle_window(bfqq); |
6713 |
- |
6714 |
- if (atomic_read(&bic->icq.ioc->active_ref) == 0 || |
6715 |
-@@ -2251,6 +2472,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
6716 |
- if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || |
6717 |
- !BFQQ_SEEKY(bfqq)) |
6718 |
- bfq_update_idle_window(bfqd, bfqq, bic); |
6719 |
-+ bfq_clear_bfqq_just_split(bfqq); |
6720 |
- |
6721 |
- bfq_log_bfqq(bfqd, bfqq, |
6722 |
- "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", |
6723 |
-@@ -2302,13 +2524,45 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
6724 |
- static void bfq_insert_request(struct request_queue *q, struct request *rq) |
6725 |
- { |
6726 |
- struct bfq_data *bfqd = q->elevator->elevator_data; |
6727 |
-- struct bfq_queue *bfqq = RQ_BFQQ(rq); |
6728 |
-+ struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq; |
6729 |
- |
6730 |
- assert_spin_locked(bfqd->queue->queue_lock); |
6731 |
-+ |
6732 |
-+ /* |
6733 |
-+ * An unplug may trigger a requeue of a request from the device |
6734 |
-+ * driver: make sure we are in process context while trying to |
6735 |
-+ * merge two bfq_queues. |
6736 |
-+ */ |
6737 |
-+ if (!in_interrupt() && |
6738 |
-+ (new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true))) { |
6739 |
-+ if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq) |
6740 |
-+ new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1); |
6741 |
-+ /* |
6742 |
-+ * Release the request's reference to the old bfqq |
6743 |
-+ * and make sure one is taken to the shared queue. |
6744 |
-+ */ |
6745 |
-+ new_bfqq->allocated[rq_data_dir(rq)]++; |
6746 |
-+ bfqq->allocated[rq_data_dir(rq)]--; |
6747 |
-+ atomic_inc(&new_bfqq->ref); |
6748 |
-+ bfq_put_queue(bfqq); |
6749 |
-+ if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) |
6750 |
-+ bfq_merge_bfqqs(bfqd, RQ_BIC(rq), bfqq, new_bfqq); |
6751 |
-+ rq->elv.priv[1] = new_bfqq; |
6752 |
-+ bfqq = new_bfqq; |
6753 |
-+ } |
6754 |
-+ |
6755 |
- bfq_init_prio_data(bfqq, RQ_BIC(rq)); |
6756 |
- |
6757 |
- bfq_add_rq_rb(rq); |
6758 |
- |
6759 |
-+ /* |
6760 |
-+ * Here a newly-created bfq_queue has already started a weight-raising |
6761 |
-+ * period: clear raising_time_left to prevent bfq_bfqq_save_state() |
6762 |
-+ * from assigning it a full weight-raising period. See the detailed |
6763 |
-+ * comments about this field in bfq_init_icq(). |
6764 |
-+ */ |
6765 |
-+ if (bfqq->bic != NULL) |
6766 |
-+ bfqq->bic->raising_time_left = 0; |
6767 |
- rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]); |
6768 |
- list_add_tail(&rq->queuelist, &bfqq->fifo); |
6769 |
- |
6770 |
-@@ -2371,15 +2625,6 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq) |
6771 |
- if (bfq_bfqq_budget_new(bfqq)) |
6772 |
- bfq_set_budget_timeout(bfqd); |
6773 |
- |
6774 |
-- /* Idling is disabled also for cooperation issues: |
6775 |
-- * 1) there is a close cooperator for the queue, or |
6776 |
-- * 2) the queue is shared and some cooperator is likely |
6777 |
-- * to be idle (in this case, by not arming the idle timer, |
6778 |
-- * we try to slow down the queue, to prevent the zones |
6779 |
-- * of the disk accessed by the active cooperators to become |
6780 |
-- * too distant from the zone that will be accessed by the |
6781 |
-- * currently idle cooperators) |
6782 |
-- */ |
6783 |
- if (bfq_bfqq_must_idle(bfqq, budg_timeout)) |
6784 |
- bfq_arm_slice_timer(bfqd); |
6785 |
- else if (budg_timeout) |
6786 |
-@@ -2449,18 +2694,6 @@ static void bfq_put_request(struct request *rq) |
6787 |
- } |
6788 |
- } |
6789 |
- |
6790 |
--static struct bfq_queue * |
6791 |
--bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, |
6792 |
-- struct bfq_queue *bfqq) |
6793 |
--{ |
6794 |
-- bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", |
6795 |
-- (long unsigned)bfqq->new_bfqq->pid); |
6796 |
-- bic_set_bfqq(bic, bfqq->new_bfqq, 1); |
6797 |
-- bfq_mark_bfqq_coop(bfqq->new_bfqq); |
6798 |
-- bfq_put_queue(bfqq); |
6799 |
-- return bic_to_bfqq(bic, 1); |
6800 |
--} |
6801 |
-- |
6802 |
- /* |
6803 |
- * Returns NULL if a new bfqq should be allocated, or the old bfqq if this |
6804 |
- * was the last process referring to said bfqq. |
6805 |
-@@ -2469,6 +2702,9 @@ static struct bfq_queue * |
6806 |
- bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) |
6807 |
- { |
6808 |
- bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); |
6809 |
-+ |
6810 |
-+ put_io_context(bic->icq.ioc); |
6811 |
-+ |
6812 |
- if (bfqq_process_refs(bfqq) == 1) { |
6813 |
- bfqq->pid = current->pid; |
6814 |
- bfq_clear_bfqq_some_coop_idle(bfqq); |
6815 |
-@@ -2498,6 +2734,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, |
6816 |
- struct bfq_queue *bfqq; |
6817 |
- struct bfq_group *bfqg; |
6818 |
- unsigned long flags; |
6819 |
-+ bool split = false; |
6820 |
- |
6821 |
- might_sleep_if(gfp_mask & __GFP_WAIT); |
6822 |
- |
6823 |
-@@ -2516,24 +2753,14 @@ new_queue: |
6824 |
- bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask); |
6825 |
- bic_set_bfqq(bic, bfqq, is_sync); |
6826 |
- } else { |
6827 |
-- /* |
6828 |
-- * If the queue was seeky for too long, break it apart. |
6829 |
-- */ |
6830 |
-+ /* If the queue was seeky for too long, break it apart. */ |
6831 |
- if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { |
6832 |
- bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); |
6833 |
- bfqq = bfq_split_bfqq(bic, bfqq); |
6834 |
-+ split = true; |
6835 |
- if (!bfqq) |
6836 |
- goto new_queue; |
6837 |
- } |
6838 |
-- |
6839 |
-- /* |
6840 |
-- * Check to see if this queue is scheduled to merge with |
6841 |
-- * another closely cooperating queue. The merging of queues |
6842 |
-- * happens here as it must be done in process context. |
6843 |
-- * The reference on new_bfqq was taken in merge_bfqqs. |
6844 |
-- */ |
6845 |
-- if (bfqq->new_bfqq != NULL) |
6846 |
-- bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq); |
6847 |
- } |
6848 |
- |
6849 |
- bfqq->allocated[rw]++; |
6850 |
-@@ -2544,6 +2771,26 @@ new_queue: |
6851 |
- rq->elv.priv[0] = bic; |
6852 |
- rq->elv.priv[1] = bfqq; |
6853 |
- |
6854 |
-+ /* |
6855 |
-+ * If a bfq_queue has only one process reference, it is owned |
6856 |
-+ * by only one bfq_io_cq: we can set the bic field of the |
6857 |
-+ * bfq_queue to the address of that structure. Also, if the |
6858 |
-+ * queue has just been split, mark a flag so that the |
6859 |
-+ * information is available to the other scheduler hooks. |
6860 |
-+ */ |
6861 |
-+ if (bfqq_process_refs(bfqq) == 1) { |
6862 |
-+ bfqq->bic = bic; |
6863 |
-+ if (split) { |
6864 |
-+ bfq_mark_bfqq_just_split(bfqq); |
6865 |
-+ /* |
6866 |
-+ * If the queue has just been split from a shared queue, |
6867 |
-+ * restore the idle window and the possible weight |
6868 |
-+ * raising period. |
6869 |
-+ */ |
6870 |
-+ bfq_bfqq_resume_state(bfqq, bic); |
6871 |
-+ } |
6872 |
-+ } |
6873 |
-+ |
6874 |
- spin_unlock_irqrestore(q->queue_lock, flags); |
6875 |
- |
6876 |
- return 0; |
6877 |
-diff --git a/block/bfq-sched.c b/block/bfq-sched.c |
6878 |
-index 03f8061..a0edaa2 100644 |
6879 |
---- a/block/bfq-sched.c |
6880 |
-+++ b/block/bfq-sched.c |
6881 |
-@@ -978,34 +978,6 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) |
6882 |
- return bfqq; |
6883 |
- } |
6884 |
- |
6885 |
--/* |
6886 |
-- * Forced extraction of the given queue. |
6887 |
-- */ |
6888 |
--static void bfq_get_next_queue_forced(struct bfq_data *bfqd, |
6889 |
-- struct bfq_queue *bfqq) |
6890 |
--{ |
6891 |
-- struct bfq_entity *entity; |
6892 |
-- struct bfq_sched_data *sd; |
6893 |
-- |
6894 |
-- BUG_ON(bfqd->active_queue != NULL); |
6895 |
-- |
6896 |
-- entity = &bfqq->entity; |
6897 |
-- /* |
6898 |
-- * Bubble up extraction/update from the leaf to the root. |
6899 |
-- */ |
6900 |
-- for_each_entity(entity) { |
6901 |
-- sd = entity->sched_data; |
6902 |
-- bfq_update_budget(entity); |
6903 |
-- bfq_update_vtime(bfq_entity_service_tree(entity)); |
6904 |
-- bfq_active_extract(bfq_entity_service_tree(entity), entity); |
6905 |
-- sd->active_entity = entity; |
6906 |
-- sd->next_active = NULL; |
6907 |
-- entity->service = 0; |
6908 |
-- } |
6909 |
-- |
6910 |
-- return; |
6911 |
--} |
6912 |
-- |
6913 |
- static void __bfq_bfqd_reset_active(struct bfq_data *bfqd) |
6914 |
- { |
6915 |
- if (bfqd->active_bic != NULL) { |
6916 |
-diff --git a/block/bfq.h b/block/bfq.h |
6917 |
-index 48ecde9..bb52975 100644 |
6918 |
---- a/block/bfq.h |
6919 |
-+++ b/block/bfq.h |
6920 |
-@@ -188,6 +188,8 @@ struct bfq_group; |
6921 |
- * @pid: pid of the process owning the queue, used for logging purposes. |
6922 |
- * @last_rais_start_time: last (idle -> weight-raised) transition attempt |
6923 |
- * @raising_cur_max_time: current max raising time for this queue |
6924 |
-+ * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the |
6925 |
-+ * queue is shared |
6926 |
- * |
6927 |
- * A bfq_queue is a leaf request queue; it can be associated to an io_context |
6928 |
- * or more (if it is an async one). @cgroup holds a reference to the |
6929 |
-@@ -231,6 +233,7 @@ struct bfq_queue { |
6930 |
- sector_t last_request_pos; |
6931 |
- |
6932 |
- pid_t pid; |
6933 |
-+ struct bfq_io_cq *bic; |
6934 |
- |
6935 |
- /* weight-raising fields */ |
6936 |
- unsigned int raising_cur_max_time; |
6937 |
-@@ -257,12 +260,23 @@ struct bfq_ttime { |
6938 |
- * @icq: associated io_cq structure |
6939 |
- * @bfqq: array of two process queues, the sync and the async |
6940 |
- * @ttime: associated @bfq_ttime struct |
6941 |
-+ * @raising_time_left: snapshot of the time left before weight raising ends |
6942 |
-+ * for the sync queue associated to this process; this |
6943 |
-+ * snapshot is taken to remember this value while the weight |
6944 |
-+ * raising is suspended because the queue is merged with a |
6945 |
-+ * shared queue, and is used to set @raising_cur_max_time |
6946 |
-+ * when the queue is split from the shared queue and its |
6947 |
-+ * weight is raised again |
6948 |
-+ * @saved_idle_window: same purpose as the previous field for the idle window |
6949 |
- */ |
6950 |
- struct bfq_io_cq { |
6951 |
- struct io_cq icq; /* must be the first member */ |
6952 |
- struct bfq_queue *bfqq[2]; |
6953 |
- struct bfq_ttime ttime; |
6954 |
- int ioprio; |
6955 |
-+ |
6956 |
-+ unsigned int raising_time_left; |
6957 |
-+ unsigned int saved_idle_window; |
6958 |
- }; |
6959 |
- |
6960 |
- /** |
6961 |
-@@ -403,6 +417,7 @@ enum bfqq_state_flags { |
6962 |
- BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ |
6963 |
- BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */ |
6964 |
- BFQ_BFQQ_FLAG_some_coop_idle, /* some cooperator is inactive */ |
6965 |
-+ BFQ_BFQQ_FLAG_just_split, /* queue has just been split */ |
6966 |
- }; |
6967 |
- |
6968 |
- #define BFQ_BFQQ_FNS(name) \ |
6969 |
-@@ -430,6 +445,7 @@ BFQ_BFQQ_FNS(budget_new); |
6970 |
- BFQ_BFQQ_FNS(coop); |
6971 |
- BFQ_BFQQ_FNS(split_coop); |
6972 |
- BFQ_BFQQ_FNS(some_coop_idle); |
6973 |
-+BFQ_BFQQ_FNS(just_split); |
6974 |
- #undef BFQ_BFQQ_FNS |
6975 |
- |
6976 |
- /* Logging facilities. */ |
6977 |
--- |
6978 |
-1.8.1.4 |
6979 |
- |