Gentoo Archives: gentoo-commits

From: "Mike Pagano (mpagano)" <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] linux-patches r2772 - genpatches-2.6/trunk/3.15
Date: Mon, 05 May 2014 14:28:54
Message-Id: 20140505142849.483CA2004C@flycatcher.gentoo.org
1 Author: mpagano
2 Date: 2014-05-05 14:28:48 +0000 (Mon, 05 May 2014)
3 New Revision: 2772
4
5 Removed:
6 genpatches-2.6/trunk/3.15/1000_linux-3.14.1.patch
7 genpatches-2.6/trunk/3.15/1001_linux-3.14.2.patch
8 genpatches-2.6/trunk/3.15/4200_fbcondecor-0.9.6.patch
9 genpatches-2.6/trunk/3.15/5000_enable-additional-cpu-optimizations-for-gcc.patch
10 genpatches-2.6/trunk/3.15/5001_BFQ-1-block-cgroups-kconfig-build-bits-for-BFQ-v7r2-3.14.patch
11 genpatches-2.6/trunk/3.15/5002_BFQ-2-block-introduce-the-BFQ-v7r2-I-O-sched-for-3.14.patch1
12 genpatches-2.6/trunk/3.15/5003_BFQ-3-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r2-for-3.14.0.patch
13 Modified:
14 genpatches-2.6/trunk/3.15/0000_README
15 Log:
16 Remove patches incompatible with 3.15 until they can be worked on.
17
18 Modified: genpatches-2.6/trunk/3.15/0000_README
19 ===================================================================
20 --- genpatches-2.6/trunk/3.15/0000_README 2014-05-05 14:22:27 UTC (rev 2771)
21 +++ genpatches-2.6/trunk/3.15/0000_README 2014-05-05 14:28:48 UTC (rev 2772)
22 @@ -42,14 +42,6 @@
23
24 Individual Patch Descriptions:
25 --------------------------------------------------------------------------
26 -Patch: 1000_linux-3.14.1.patch
27 -From: http://www.kernel.org
28 -Desc: Linux 3.14.1
29 -
30 -Patch: 1001_linux-3.14.2.patch
31 -From: http://www.kernel.org
32 -Desc: Linux 3.14.2
33 -
34 Patch: 1500_XATTR_USER_PREFIX.patch
35 From: https://bugs.gentoo.org/show_bug.cgi?id=470644
36 Desc: Support for namespace user.pax.* on tmpfs.
37 @@ -74,10 +66,6 @@
38 From: Al Viro <viro <at> ZenIV.linux.org.uk>
39 Desc: Do not lock when UMH is waiting on current thread spawned by linuxrc. (bug #481344)
40
41 -Patch: 4200_fbcondecor-0.9.6.patch
42 -From: http://dev.gentoo.org/~spock
43 -Desc: Bootsplash successor by Michal Januszewski ported by Jeremy (bug #452574)
44 -
45 Patch: 4500_support-for-pogoplug-e02.patch
46 From: Cristoph Junghans <ottxor@g.o>
47 Desc: Support for Pogoplug e02 (bug #460350), adjusted to be opt-in by TomWij.
48 @@ -85,20 +73,3 @@
49 Patch: 4567_distro-Gentoo-Kconfig.patch
50 From: Tom Wijsman <TomWij@g.o>
51 Desc: Add Gentoo Linux support config settings and defaults.
52 -
53 -Patch: 5000_enable-additional-cpu-optimizations-for-gcc.patch
54 -From: https://github.com/graysky2/kernel_gcc_patch/
55 -Desc: Kernel patch enables gcc optimizations for additional CPUs.
56 -
57 -Patch: 5001_BFQ-1-block-cgroups-kconfig-build-bits-for-v7r2-3.14.patch
58 -From: http://algo.ing.unimo.it/people/paolo/disk_sched/
59 -Desc: BFQ v7r2 patch 1 for 3.14: Build, cgroups and kconfig bits
60 -
61 -Patch: 5002_BFQ-2-block-introduce-the-v7r2-I-O-sched-for-3.14.patch1
62 -From: http://algo.ing.unimo.it/people/paolo/disk_sched/
63 -Desc: BFQ v7r2 patch 2 for 3.14: BFQ Scheduler
64 -
65 -Patch: 5003_BFQ-3-block-add-Early-Queue-Merge-EQM-v7r2-for-3.14.0.patch
66 -From: http://algo.ing.unimo.it/people/paolo/disk_sched/
67 -Desc: BFQ v7r2 patch 3 for 3.14: Early Queue Merge (EQM)
68 -
69
70 Deleted: genpatches-2.6/trunk/3.15/1000_linux-3.14.1.patch
71 ===================================================================
72 --- genpatches-2.6/trunk/3.15/1000_linux-3.14.1.patch 2014-05-05 14:22:27 UTC (rev 2771)
73 +++ genpatches-2.6/trunk/3.15/1000_linux-3.14.1.patch 2014-05-05 14:28:48 UTC (rev 2772)
74 @@ -1,1221 +0,0 @@
75 -diff --git a/Makefile b/Makefile
76 -index e5ac8a62e6e5..7d0b6992d9ed 100644
77 ---- a/Makefile
78 -+++ b/Makefile
79 -@@ -1,6 +1,6 @@
80 - VERSION = 3
81 - PATCHLEVEL = 14
82 --SUBLEVEL = 0
83 -+SUBLEVEL = 1
84 - EXTRAVERSION =
85 - NAME = Shuffling Zombie Juror
86 -
87 -diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts
88 -index ea16d782af58..4f31b2eb5cdf 100644
89 ---- a/arch/arc/boot/dts/nsimosci.dts
90 -+++ b/arch/arc/boot/dts/nsimosci.dts
91 -@@ -11,13 +11,16 @@
92 -
93 - / {
94 - compatible = "snps,nsimosci";
95 -- clock-frequency = <80000000>; /* 80 MHZ */
96 -+ clock-frequency = <20000000>; /* 20 MHZ */
97 - #address-cells = <1>;
98 - #size-cells = <1>;
99 - interrupt-parent = <&intc>;
100 -
101 - chosen {
102 -- bootargs = "console=tty0 consoleblank=0";
103 -+ /* this is for console on PGU */
104 -+ /* bootargs = "console=tty0 consoleblank=0"; */
105 -+ /* this is for console on serial */
106 -+ bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=ttyS0,115200n8 consoleblank=0 debug";
107 - };
108 -
109 - aliases {
110 -@@ -44,15 +47,14 @@
111 - };
112 -
113 - uart0: serial@c0000000 {
114 -- compatible = "snps,dw-apb-uart";
115 -+ compatible = "ns8250";
116 - reg = <0xc0000000 0x2000>;
117 - interrupts = <11>;
118 -- #clock-frequency = <80000000>;
119 - clock-frequency = <3686400>;
120 - baud = <115200>;
121 - reg-shift = <2>;
122 - reg-io-width = <4>;
123 -- status = "okay";
124 -+ no-loopback-test = <1>;
125 - };
126 -
127 - pgu0: pgu@c9000000 {
128 -diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
129 -index 451af30914f6..c01ba35a4eff 100644
130 ---- a/arch/arc/configs/nsimosci_defconfig
131 -+++ b/arch/arc/configs/nsimosci_defconfig
132 -@@ -54,6 +54,7 @@ CONFIG_SERIO_ARC_PS2=y
133 - CONFIG_SERIAL_8250=y
134 - CONFIG_SERIAL_8250_CONSOLE=y
135 - CONFIG_SERIAL_8250_DW=y
136 -+CONFIG_SERIAL_OF_PLATFORM=y
137 - CONFIG_SERIAL_ARC=y
138 - CONFIG_SERIAL_ARC_CONSOLE=y
139 - # CONFIG_HW_RANDOM is not set
140 -diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
141 -index dbdd2231c75d..b2e322939256 100644
142 ---- a/arch/m68k/Kconfig
143 -+++ b/arch/m68k/Kconfig
144 -@@ -17,6 +17,7 @@ config M68K
145 - select FPU if MMU
146 - select ARCH_WANT_IPC_PARSE_VERSION
147 - select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE
148 -+ select HAVE_FUTEX_CMPXCHG if MMU && FUTEX
149 - select HAVE_MOD_ARCH_SPECIFIC
150 - select MODULES_USE_ELF_REL
151 - select MODULES_USE_ELF_RELA
152 -diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
153 -index 65a07750f4f9..bb74b21f007a 100644
154 ---- a/arch/s390/Kconfig
155 -+++ b/arch/s390/Kconfig
156 -@@ -117,6 +117,7 @@ config S390
157 - select HAVE_FUNCTION_GRAPH_TRACER
158 - select HAVE_FUNCTION_TRACER
159 - select HAVE_FUNCTION_TRACE_MCOUNT_TEST
160 -+ select HAVE_FUTEX_CMPXCHG if FUTEX
161 - select HAVE_KERNEL_BZIP2
162 - select HAVE_KERNEL_GZIP
163 - select HAVE_KERNEL_LZ4
164 -diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
165 -index 586f41aac361..185fad49d86f 100644
166 ---- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
167 -+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
168 -@@ -24,10 +24,6 @@
169 - .align 16
170 - .Lbswap_mask:
171 - .octa 0x000102030405060708090a0b0c0d0e0f
172 --.Lpoly:
173 -- .octa 0xc2000000000000000000000000000001
174 --.Ltwo_one:
175 -- .octa 0x00000001000000000000000000000001
176 -
177 - #define DATA %xmm0
178 - #define SHASH %xmm1
179 -@@ -134,28 +130,3 @@ ENTRY(clmul_ghash_update)
180 - .Lupdate_just_ret:
181 - ret
182 - ENDPROC(clmul_ghash_update)
183 --
184 --/*
185 -- * void clmul_ghash_setkey(be128 *shash, const u8 *key);
186 -- *
187 -- * Calculate hash_key << 1 mod poly
188 -- */
189 --ENTRY(clmul_ghash_setkey)
190 -- movaps .Lbswap_mask, BSWAP
191 -- movups (%rsi), %xmm0
192 -- PSHUFB_XMM BSWAP %xmm0
193 -- movaps %xmm0, %xmm1
194 -- psllq $1, %xmm0
195 -- psrlq $63, %xmm1
196 -- movaps %xmm1, %xmm2
197 -- pslldq $8, %xmm1
198 -- psrldq $8, %xmm2
199 -- por %xmm1, %xmm0
200 -- # reduction
201 -- pshufd $0b00100100, %xmm2, %xmm1
202 -- pcmpeqd .Ltwo_one, %xmm1
203 -- pand .Lpoly, %xmm1
204 -- pxor %xmm1, %xmm0
205 -- movups %xmm0, (%rdi)
206 -- ret
207 --ENDPROC(clmul_ghash_setkey)
208 -diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
209 -index 6759dd1135be..d785cf2c529c 100644
210 ---- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
211 -+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
212 -@@ -30,8 +30,6 @@ void clmul_ghash_mul(char *dst, const be128 *shash);
213 - void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
214 - const be128 *shash);
215 -
216 --void clmul_ghash_setkey(be128 *shash, const u8 *key);
217 --
218 - struct ghash_async_ctx {
219 - struct cryptd_ahash *cryptd_tfm;
220 - };
221 -@@ -58,13 +56,23 @@ static int ghash_setkey(struct crypto_shash *tfm,
222 - const u8 *key, unsigned int keylen)
223 - {
224 - struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
225 -+ be128 *x = (be128 *)key;
226 -+ u64 a, b;
227 -
228 - if (keylen != GHASH_BLOCK_SIZE) {
229 - crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
230 - return -EINVAL;
231 - }
232 -
233 -- clmul_ghash_setkey(&ctx->shash, key);
234 -+ /* perform multiplication by 'x' in GF(2^128) */
235 -+ a = be64_to_cpu(x->a);
236 -+ b = be64_to_cpu(x->b);
237 -+
238 -+ ctx->shash.a = (__be64)((b << 1) | (a >> 63));
239 -+ ctx->shash.b = (__be64)((a << 1) | (b >> 63));
240 -+
241 -+ if (a >> 63)
242 -+ ctx->shash.b ^= cpu_to_be64(0xc2);
243 -
244 - return 0;
245 - }
246 -diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
247 -index acd86c850414..f949715e3957 100644
248 ---- a/arch/x86/include/asm/efi.h
249 -+++ b/arch/x86/include/asm/efi.h
250 -@@ -130,7 +130,8 @@ extern void efi_memory_uc(u64 addr, unsigned long size);
251 - extern void __init efi_map_region(efi_memory_desc_t *md);
252 - extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
253 - extern void efi_sync_low_kernel_mappings(void);
254 --extern void efi_setup_page_tables(void);
255 -+extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
256 -+extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
257 - extern void __init old_map_region(efi_memory_desc_t *md);
258 - extern void __init runtime_code_page_mkexec(void);
259 - extern void __init efi_runtime_mkexec(void);
260 -diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
261 -index 1aa9ccd43223..94e40f1efdfd 100644
262 ---- a/arch/x86/include/asm/pgtable_types.h
263 -+++ b/arch/x86/include/asm/pgtable_types.h
264 -@@ -385,6 +385,8 @@ extern pte_t *lookup_address(unsigned long address, unsigned int *level);
265 - extern phys_addr_t slow_virt_to_phys(void *__address);
266 - extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
267 - unsigned numpages, unsigned long page_flags);
268 -+void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,
269 -+ unsigned numpages);
270 - #endif /* !__ASSEMBLY__ */
271 -
272 - #endif /* _ASM_X86_PGTABLE_DEFS_H */
273 -diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
274 -index b3b19f46c016..a3488689e301 100644
275 ---- a/arch/x86/mm/pageattr.c
276 -+++ b/arch/x86/mm/pageattr.c
277 -@@ -692,6 +692,18 @@ static bool try_to_free_pmd_page(pmd_t *pmd)
278 - return true;
279 - }
280 -
281 -+static bool try_to_free_pud_page(pud_t *pud)
282 -+{
283 -+ int i;
284 -+
285 -+ for (i = 0; i < PTRS_PER_PUD; i++)
286 -+ if (!pud_none(pud[i]))
287 -+ return false;
288 -+
289 -+ free_page((unsigned long)pud);
290 -+ return true;
291 -+}
292 -+
293 - static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
294 - {
295 - pte_t *pte = pte_offset_kernel(pmd, start);
296 -@@ -805,6 +817,16 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
297 - */
298 - }
299 -
300 -+static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end)
301 -+{
302 -+ pgd_t *pgd_entry = root + pgd_index(addr);
303 -+
304 -+ unmap_pud_range(pgd_entry, addr, end);
305 -+
306 -+ if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry)))
307 -+ pgd_clear(pgd_entry);
308 -+}
309 -+
310 - static int alloc_pte_page(pmd_t *pmd)
311 - {
312 - pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
313 -@@ -999,9 +1021,8 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
314 - static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
315 - {
316 - pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
317 -- bool allocd_pgd = false;
318 -- pgd_t *pgd_entry;
319 - pud_t *pud = NULL; /* shut up gcc */
320 -+ pgd_t *pgd_entry;
321 - int ret;
322 -
323 - pgd_entry = cpa->pgd + pgd_index(addr);
324 -@@ -1015,7 +1036,6 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
325 - return -1;
326 -
327 - set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
328 -- allocd_pgd = true;
329 - }
330 -
331 - pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
332 -@@ -1023,19 +1043,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
333 -
334 - ret = populate_pud(cpa, addr, pgd_entry, pgprot);
335 - if (ret < 0) {
336 -- unmap_pud_range(pgd_entry, addr,
337 -+ unmap_pgd_range(cpa->pgd, addr,
338 - addr + (cpa->numpages << PAGE_SHIFT));
339 --
340 -- if (allocd_pgd) {
341 -- /*
342 -- * If I allocated this PUD page, I can just as well
343 -- * free it in this error path.
344 -- */
345 -- pgd_clear(pgd_entry);
346 -- free_page((unsigned long)pud);
347 -- }
348 - return ret;
349 - }
350 -+
351 - cpa->numpages = ret;
352 - return 0;
353 - }
354 -@@ -1861,6 +1873,12 @@ out:
355 - return retval;
356 - }
357 -
358 -+void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,
359 -+ unsigned numpages)
360 -+{
361 -+ unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT));
362 -+}
363 -+
364 - /*
365 - * The testcases use internal knowledge of the implementation that shouldn't
366 - * be exposed to the rest of the kernel. Include these directly here.
367 -diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
368 -index b97acecf3fd9..abb81b0ad83f 100644
369 ---- a/arch/x86/platform/efi/efi.c
370 -+++ b/arch/x86/platform/efi/efi.c
371 -@@ -939,14 +939,36 @@ static void __init efi_map_regions_fixed(void)
372 -
373 - }
374 -
375 -+static void *realloc_pages(void *old_memmap, int old_shift)
376 -+{
377 -+ void *ret;
378 -+
379 -+ ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1);
380 -+ if (!ret)
381 -+ goto out;
382 -+
383 -+ /*
384 -+ * A first-time allocation doesn't have anything to copy.
385 -+ */
386 -+ if (!old_memmap)
387 -+ return ret;
388 -+
389 -+ memcpy(ret, old_memmap, PAGE_SIZE << old_shift);
390 -+
391 -+out:
392 -+ free_pages((unsigned long)old_memmap, old_shift);
393 -+ return ret;
394 -+}
395 -+
396 - /*
397 -- * Map efi memory ranges for runtime serivce and update new_memmap with virtual
398 -- * addresses.
399 -+ * Map the efi memory ranges of the runtime services and update new_mmap with
400 -+ * virtual addresses.
401 - */
402 --static void * __init efi_map_regions(int *count)
403 -+static void * __init efi_map_regions(int *count, int *pg_shift)
404 - {
405 -+ void *p, *new_memmap = NULL;
406 -+ unsigned long left = 0;
407 - efi_memory_desc_t *md;
408 -- void *p, *tmp, *new_memmap = NULL;
409 -
410 - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
411 - md = p;
412 -@@ -961,20 +983,23 @@ static void * __init efi_map_regions(int *count)
413 - efi_map_region(md);
414 - get_systab_virt_addr(md);
415 -
416 -- tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size,
417 -- GFP_KERNEL);
418 -- if (!tmp)
419 -- goto out;
420 -- new_memmap = tmp;
421 -+ if (left < memmap.desc_size) {
422 -+ new_memmap = realloc_pages(new_memmap, *pg_shift);
423 -+ if (!new_memmap)
424 -+ return NULL;
425 -+
426 -+ left += PAGE_SIZE << *pg_shift;
427 -+ (*pg_shift)++;
428 -+ }
429 -+
430 - memcpy(new_memmap + (*count * memmap.desc_size), md,
431 - memmap.desc_size);
432 -+
433 -+ left -= memmap.desc_size;
434 - (*count)++;
435 - }
436 -
437 - return new_memmap;
438 --out:
439 -- kfree(new_memmap);
440 -- return NULL;
441 - }
442 -
443 - /*
444 -@@ -1000,9 +1025,9 @@ out:
445 - */
446 - void __init efi_enter_virtual_mode(void)
447 - {
448 -- efi_status_t status;
449 -+ int err, count = 0, pg_shift = 0;
450 - void *new_memmap = NULL;
451 -- int err, count = 0;
452 -+ efi_status_t status;
453 -
454 - efi.systab = NULL;
455 -
456 -@@ -1019,20 +1044,24 @@ void __init efi_enter_virtual_mode(void)
457 - efi_map_regions_fixed();
458 - } else {
459 - efi_merge_regions();
460 -- new_memmap = efi_map_regions(&count);
461 -+ new_memmap = efi_map_regions(&count, &pg_shift);
462 - if (!new_memmap) {
463 - pr_err("Error reallocating memory, EFI runtime non-functional!\n");
464 - return;
465 - }
466 -- }
467 -
468 -- err = save_runtime_map();
469 -- if (err)
470 -- pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
471 -+ err = save_runtime_map();
472 -+ if (err)
473 -+ pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
474 -+ }
475 -
476 - BUG_ON(!efi.systab);
477 -
478 -- efi_setup_page_tables();
479 -+ if (!efi_setup) {
480 -+ if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift))
481 -+ return;
482 -+ }
483 -+
484 - efi_sync_low_kernel_mappings();
485 -
486 - if (!efi_setup) {
487 -@@ -1072,7 +1101,35 @@ void __init efi_enter_virtual_mode(void)
488 -
489 - efi_runtime_mkexec();
490 -
491 -- kfree(new_memmap);
492 -+
493 -+ /*
494 -+ * We mapped the descriptor array into the EFI pagetable above but we're
495 -+ * not unmapping it here. Here's why:
496 -+ *
497 -+ * We're copying select PGDs from the kernel page table to the EFI page
498 -+ * table and when we do so and make changes to those PGDs like unmapping
499 -+ * stuff from them, those changes appear in the kernel page table and we
500 -+ * go boom.
501 -+ *
502 -+ * From setup_real_mode():
503 -+ *
504 -+ * ...
505 -+ * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
506 -+ *
507 -+ * In this particular case, our allocation is in PGD 0 of the EFI page
508 -+ * table but we've copied that PGD from PGD[272] of the EFI page table:
509 -+ *
510 -+ * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272
511 -+ *
512 -+ * where the direct memory mapping in kernel space is.
513 -+ *
514 -+ * new_memmap's VA comes from that direct mapping and thus clearing it,
515 -+ * it would get cleared in the kernel page table too.
516 -+ *
517 -+ * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
518 -+ */
519 -+ if (!efi_setup)
520 -+ free_pages((unsigned long)new_memmap, pg_shift);
521 -
522 - /* clean DUMMY object */
523 - efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
524 -diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
525 -index 0b74cdf7f816..9ee3491e31fb 100644
526 ---- a/arch/x86/platform/efi/efi_32.c
527 -+++ b/arch/x86/platform/efi/efi_32.c
528 -@@ -40,7 +40,12 @@
529 - static unsigned long efi_rt_eflags;
530 -
531 - void efi_sync_low_kernel_mappings(void) {}
532 --void efi_setup_page_tables(void) {}
533 -+void __init efi_dump_pagetable(void) {}
534 -+int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
535 -+{
536 -+ return 0;
537 -+}
538 -+void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {}
539 -
540 - void __init efi_map_region(efi_memory_desc_t *md)
541 - {
542 -diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
543 -index 0c2a234fef1e..666b74a09092 100644
544 ---- a/arch/x86/platform/efi/efi_64.c
545 -+++ b/arch/x86/platform/efi/efi_64.c
546 -@@ -137,12 +137,38 @@ void efi_sync_low_kernel_mappings(void)
547 - sizeof(pgd_t) * num_pgds);
548 - }
549 -
550 --void efi_setup_page_tables(void)
551 -+int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
552 - {
553 -+ pgd_t *pgd;
554 -+
555 -+ if (efi_enabled(EFI_OLD_MEMMAP))
556 -+ return 0;
557 -+
558 - efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd;
559 -+ pgd = __va(efi_scratch.efi_pgt);
560 -
561 -- if (!efi_enabled(EFI_OLD_MEMMAP))
562 -- efi_scratch.use_pgd = true;
563 -+ /*
564 -+ * It can happen that the physical address of new_memmap lands in memory
565 -+ * which is not mapped in the EFI page table. Therefore we need to go
566 -+ * and ident-map those pages containing the map before calling
567 -+ * phys_efi_set_virtual_address_map().
568 -+ */
569 -+ if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) {
570 -+ pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
571 -+ return 1;
572 -+ }
573 -+
574 -+ efi_scratch.use_pgd = true;
575 -+
576 -+
577 -+ return 0;
578 -+}
579 -+
580 -+void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
581 -+{
582 -+ pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
583 -+
584 -+ kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages);
585 - }
586 -
587 - static void __init __map_region(efi_memory_desc_t *md, u64 va)
588 -diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c
589 -index 02125e6a9109..5a4da94aefb0 100644
590 ---- a/drivers/isdn/isdnloop/isdnloop.c
591 -+++ b/drivers/isdn/isdnloop/isdnloop.c
592 -@@ -518,9 +518,9 @@ static isdnloop_stat isdnloop_cmd_table[] =
593 - static void
594 - isdnloop_fake_err(isdnloop_card *card)
595 - {
596 -- char buf[60];
597 -+ char buf[64];
598 -
599 -- sprintf(buf, "E%s", card->omsg);
600 -+ snprintf(buf, sizeof(buf), "E%s", card->omsg);
601 - isdnloop_fake(card, buf, -1);
602 - isdnloop_fake(card, "NAK", -1);
603 - }
604 -@@ -903,6 +903,8 @@ isdnloop_parse_cmd(isdnloop_card *card)
605 - case 7:
606 - /* 0x;EAZ */
607 - p += 3;
608 -+ if (strlen(p) >= sizeof(card->eazlist[0]))
609 -+ break;
610 - strcpy(card->eazlist[ch - 1], p);
611 - break;
612 - case 8:
613 -@@ -1070,6 +1072,12 @@ isdnloop_start(isdnloop_card *card, isdnloop_sdef *sdefp)
614 - return -EBUSY;
615 - if (copy_from_user((char *) &sdef, (char *) sdefp, sizeof(sdef)))
616 - return -EFAULT;
617 -+
618 -+ for (i = 0; i < 3; i++) {
619 -+ if (!memchr(sdef.num[i], 0, sizeof(sdef.num[i])))
620 -+ return -EINVAL;
621 -+ }
622 -+
623 - spin_lock_irqsave(&card->isdnloop_lock, flags);
624 - switch (sdef.ptype) {
625 - case ISDN_PTYPE_EURO:
626 -@@ -1127,7 +1135,7 @@ isdnloop_command(isdn_ctrl *c, isdnloop_card *card)
627 - {
628 - ulong a;
629 - int i;
630 -- char cbuf[60];
631 -+ char cbuf[80];
632 - isdn_ctrl cmd;
633 - isdnloop_cdef cdef;
634 -
635 -@@ -1192,7 +1200,6 @@ isdnloop_command(isdn_ctrl *c, isdnloop_card *card)
636 - break;
637 - if ((c->arg & 255) < ISDNLOOP_BCH) {
638 - char *p;
639 -- char dial[50];
640 - char dcode[4];
641 -
642 - a = c->arg;
643 -@@ -1204,10 +1211,10 @@ isdnloop_command(isdn_ctrl *c, isdnloop_card *card)
644 - } else
645 - /* Normal Dial */
646 - strcpy(dcode, "CAL");
647 -- strcpy(dial, p);
648 -- sprintf(cbuf, "%02d;D%s_R%s,%02d,%02d,%s\n", (int) (a + 1),
649 -- dcode, dial, c->parm.setup.si1,
650 -- c->parm.setup.si2, c->parm.setup.eazmsn);
651 -+ snprintf(cbuf, sizeof(cbuf),
652 -+ "%02d;D%s_R%s,%02d,%02d,%s\n", (int) (a + 1),
653 -+ dcode, p, c->parm.setup.si1,
654 -+ c->parm.setup.si2, c->parm.setup.eazmsn);
655 - i = isdnloop_writecmd(cbuf, strlen(cbuf), 0, card);
656 - }
657 - break;
658 -diff --git a/drivers/net/ethernet/cadence/at91_ether.c b/drivers/net/ethernet/cadence/at91_ether.c
659 -index ce75de9bae9e..4a79edaf3885 100644
660 ---- a/drivers/net/ethernet/cadence/at91_ether.c
661 -+++ b/drivers/net/ethernet/cadence/at91_ether.c
662 -@@ -342,6 +342,9 @@ static int __init at91ether_probe(struct platform_device *pdev)
663 - }
664 - clk_enable(lp->pclk);
665 -
666 -+ lp->hclk = ERR_PTR(-ENOENT);
667 -+ lp->tx_clk = ERR_PTR(-ENOENT);
668 -+
669 - /* Install the interrupt handler */
670 - dev->irq = platform_get_irq(pdev, 0);
671 - res = devm_request_irq(&pdev->dev, dev->irq, at91ether_interrupt, 0, dev->name, dev);
672 -diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
673 -index 174a92f5fe51..7645a3ce3854 100644
674 ---- a/drivers/net/ethernet/sfc/ef10.c
675 -+++ b/drivers/net/ethernet/sfc/ef10.c
676 -@@ -565,10 +565,17 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
677 - * several of each (in fact that's the only option if host
678 - * page size is >4K). So we may allocate some extra VIs just
679 - * for writing PIO buffers through.
680 -+ *
681 -+ * The UC mapping contains (min_vis - 1) complete VIs and the
682 -+ * first half of the next VI. Then the WC mapping begins with
683 -+ * the second half of this last VI.
684 - */
685 - uc_mem_map_size = PAGE_ALIGN((min_vis - 1) * EFX_VI_PAGE_SIZE +
686 - ER_DZ_TX_PIOBUF);
687 - if (nic_data->n_piobufs) {
688 -+ /* pio_write_vi_base rounds down to give the number of complete
689 -+ * VIs inside the UC mapping.
690 -+ */
691 - pio_write_vi_base = uc_mem_map_size / EFX_VI_PAGE_SIZE;
692 - wc_mem_map_size = (PAGE_ALIGN((pio_write_vi_base +
693 - nic_data->n_piobufs) *
694 -diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
695 -index 83d464347021..f06c790fba5a 100644
696 ---- a/drivers/net/ethernet/sfc/efx.c
697 -+++ b/drivers/net/ethernet/sfc/efx.c
698 -@@ -1603,6 +1603,8 @@ static int efx_probe_nic(struct efx_nic *efx)
699 - if (rc)
700 - goto fail1;
701 -
702 -+ efx_set_channels(efx);
703 -+
704 - rc = efx->type->dimension_resources(efx);
705 - if (rc)
706 - goto fail2;
707 -@@ -1613,7 +1615,6 @@ static int efx_probe_nic(struct efx_nic *efx)
708 - efx->rx_indir_table[i] =
709 - ethtool_rxfh_indir_default(i, efx->rss_spread);
710 -
711 -- efx_set_channels(efx);
712 - netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
713 - netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
714 -
715 -diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
716 -index 1236812c7be6..d091e52b00e1 100644
717 ---- a/drivers/net/vxlan.c
718 -+++ b/drivers/net/vxlan.c
719 -@@ -871,6 +871,9 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
720 - if (err)
721 - return err;
722 -
723 -+ if (vxlan->default_dst.remote_ip.sa.sa_family != ip.sa.sa_family)
724 -+ return -EAFNOSUPPORT;
725 -+
726 - spin_lock_bh(&vxlan->hash_lock);
727 - err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags,
728 - port, vni, ifindex, ndm->ndm_flags);
729 -@@ -2612,9 +2615,10 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
730 - vni = nla_get_u32(data[IFLA_VXLAN_ID]);
731 - dst->remote_vni = vni;
732 -
733 -+ /* Unless IPv6 is explicitly requested, assume IPv4 */
734 -+ dst->remote_ip.sa.sa_family = AF_INET;
735 - if (data[IFLA_VXLAN_GROUP]) {
736 - dst->remote_ip.sin.sin_addr.s_addr = nla_get_be32(data[IFLA_VXLAN_GROUP]);
737 -- dst->remote_ip.sa.sa_family = AF_INET;
738 - } else if (data[IFLA_VXLAN_GROUP6]) {
739 - if (!IS_ENABLED(CONFIG_IPV6))
740 - return -EPFNOSUPPORT;
741 -diff --git a/drivers/net/wireless/iwlwifi/mvm/rs.c b/drivers/net/wireless/iwlwifi/mvm/rs.c
742 -index 6abf74e1351f..5bc871513505 100644
743 ---- a/drivers/net/wireless/iwlwifi/mvm/rs.c
744 -+++ b/drivers/net/wireless/iwlwifi/mvm/rs.c
745 -@@ -211,9 +211,9 @@ static const struct rs_tx_column rs_tx_columns[] = {
746 - .next_columns = {
747 - RS_COLUMN_LEGACY_ANT_B,
748 - RS_COLUMN_SISO_ANT_A,
749 -+ RS_COLUMN_SISO_ANT_B,
750 - RS_COLUMN_MIMO2,
751 -- RS_COLUMN_INVALID,
752 -- RS_COLUMN_INVALID,
753 -+ RS_COLUMN_MIMO2_SGI,
754 - },
755 - },
756 - [RS_COLUMN_LEGACY_ANT_B] = {
757 -@@ -221,10 +221,10 @@ static const struct rs_tx_column rs_tx_columns[] = {
758 - .ant = ANT_B,
759 - .next_columns = {
760 - RS_COLUMN_LEGACY_ANT_A,
761 -+ RS_COLUMN_SISO_ANT_A,
762 - RS_COLUMN_SISO_ANT_B,
763 - RS_COLUMN_MIMO2,
764 -- RS_COLUMN_INVALID,
765 -- RS_COLUMN_INVALID,
766 -+ RS_COLUMN_MIMO2_SGI,
767 - },
768 - },
769 - [RS_COLUMN_SISO_ANT_A] = {
770 -@@ -234,8 +234,8 @@ static const struct rs_tx_column rs_tx_columns[] = {
771 - RS_COLUMN_SISO_ANT_B,
772 - RS_COLUMN_MIMO2,
773 - RS_COLUMN_SISO_ANT_A_SGI,
774 -- RS_COLUMN_INVALID,
775 -- RS_COLUMN_INVALID,
776 -+ RS_COLUMN_SISO_ANT_B_SGI,
777 -+ RS_COLUMN_MIMO2_SGI,
778 - },
779 - .checks = {
780 - rs_siso_allow,
781 -@@ -248,8 +248,8 @@ static const struct rs_tx_column rs_tx_columns[] = {
782 - RS_COLUMN_SISO_ANT_A,
783 - RS_COLUMN_MIMO2,
784 - RS_COLUMN_SISO_ANT_B_SGI,
785 -- RS_COLUMN_INVALID,
786 -- RS_COLUMN_INVALID,
787 -+ RS_COLUMN_SISO_ANT_A_SGI,
788 -+ RS_COLUMN_MIMO2_SGI,
789 - },
790 - .checks = {
791 - rs_siso_allow,
792 -@@ -263,8 +263,8 @@ static const struct rs_tx_column rs_tx_columns[] = {
793 - RS_COLUMN_SISO_ANT_B_SGI,
794 - RS_COLUMN_MIMO2_SGI,
795 - RS_COLUMN_SISO_ANT_A,
796 -- RS_COLUMN_INVALID,
797 -- RS_COLUMN_INVALID,
798 -+ RS_COLUMN_SISO_ANT_B,
799 -+ RS_COLUMN_MIMO2,
800 - },
801 - .checks = {
802 - rs_siso_allow,
803 -@@ -279,8 +279,8 @@ static const struct rs_tx_column rs_tx_columns[] = {
804 - RS_COLUMN_SISO_ANT_A_SGI,
805 - RS_COLUMN_MIMO2_SGI,
806 - RS_COLUMN_SISO_ANT_B,
807 -- RS_COLUMN_INVALID,
808 -- RS_COLUMN_INVALID,
809 -+ RS_COLUMN_SISO_ANT_A,
810 -+ RS_COLUMN_MIMO2,
811 - },
812 - .checks = {
813 - rs_siso_allow,
814 -@@ -292,10 +292,10 @@ static const struct rs_tx_column rs_tx_columns[] = {
815 - .ant = ANT_AB,
816 - .next_columns = {
817 - RS_COLUMN_SISO_ANT_A,
818 -+ RS_COLUMN_SISO_ANT_B,
819 -+ RS_COLUMN_SISO_ANT_A_SGI,
820 -+ RS_COLUMN_SISO_ANT_B_SGI,
821 - RS_COLUMN_MIMO2_SGI,
822 -- RS_COLUMN_INVALID,
823 -- RS_COLUMN_INVALID,
824 -- RS_COLUMN_INVALID,
825 - },
826 - .checks = {
827 - rs_mimo_allow,
828 -@@ -307,10 +307,10 @@ static const struct rs_tx_column rs_tx_columns[] = {
829 - .sgi = true,
830 - .next_columns = {
831 - RS_COLUMN_SISO_ANT_A_SGI,
832 -+ RS_COLUMN_SISO_ANT_B_SGI,
833 -+ RS_COLUMN_SISO_ANT_A,
834 -+ RS_COLUMN_SISO_ANT_B,
835 - RS_COLUMN_MIMO2,
836 -- RS_COLUMN_INVALID,
837 -- RS_COLUMN_INVALID,
838 -- RS_COLUMN_INVALID,
839 - },
840 - .checks = {
841 - rs_mimo_allow,
842 -diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
843 -index ae413a2cbee7..4bf5b334664e 100644
844 ---- a/drivers/net/xen-netback/common.h
845 -+++ b/drivers/net/xen-netback/common.h
846 -@@ -113,6 +113,11 @@ struct xenvif {
847 - domid_t domid;
848 - unsigned int handle;
849 -
850 -+ /* Is this interface disabled? True when backend discovers
851 -+ * frontend is rogue.
852 -+ */
853 -+ bool disabled;
854 -+
855 - /* Use NAPI for guest TX */
856 - struct napi_struct napi;
857 - /* When feature-split-event-channels = 0, tx_irq = rx_irq. */
858 -diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
859 -index 301cc037fda8..2e92d52c0a6d 100644
860 ---- a/drivers/net/xen-netback/interface.c
861 -+++ b/drivers/net/xen-netback/interface.c
862 -@@ -62,6 +62,15 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
863 - struct xenvif *vif = container_of(napi, struct xenvif, napi);
864 - int work_done;
865 -
866 -+ /* This vif is rogue, we pretend we've there is nothing to do
867 -+ * for this vif to deschedule it from NAPI. But this interface
868 -+ * will be turned off in thread context later.
869 -+ */
870 -+ if (unlikely(vif->disabled)) {
871 -+ napi_complete(napi);
872 -+ return 0;
873 -+ }
874 -+
875 - work_done = xenvif_tx_action(vif, budget);
876 -
877 - if (work_done < budget) {
878 -@@ -321,6 +330,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
879 - vif->ip_csum = 1;
880 - vif->dev = dev;
881 -
882 -+ vif->disabled = false;
883 -+
884 - vif->credit_bytes = vif->remaining_credit = ~0UL;
885 - vif->credit_usec = 0UL;
886 - init_timer(&vif->credit_timeout);
887 -diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
888 -index 438d0c09b7e6..97030c193afd 100644
889 ---- a/drivers/net/xen-netback/netback.c
890 -+++ b/drivers/net/xen-netback/netback.c
891 -@@ -192,8 +192,8 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
892 - * into multiple copies tend to give large frags their
893 - * own buffers as before.
894 - */
895 -- if ((offset + size > MAX_BUFFER_OFFSET) &&
896 -- (size <= MAX_BUFFER_OFFSET) && offset && !head)
897 -+ BUG_ON(size > MAX_BUFFER_OFFSET);
898 -+ if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head)
899 - return true;
900 -
901 - return false;
902 -@@ -482,6 +482,8 @@ static void xenvif_rx_action(struct xenvif *vif)
903 -
904 - while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {
905 - RING_IDX max_slots_needed;
906 -+ RING_IDX old_req_cons;
907 -+ RING_IDX ring_slots_used;
908 - int i;
909 -
910 - /* We need a cheap worse case estimate for the number of
911 -@@ -493,9 +495,28 @@ static void xenvif_rx_action(struct xenvif *vif)
912 - PAGE_SIZE);
913 - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
914 - unsigned int size;
915 -+ unsigned int offset;
916 -+
917 - size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
918 -- max_slots_needed += DIV_ROUND_UP(size, PAGE_SIZE);
919 -+ offset = skb_shinfo(skb)->frags[i].page_offset;
920 -+
921 -+ /* For a worse-case estimate we need to factor in
922 -+ * the fragment page offset as this will affect the
923 -+ * number of times xenvif_gop_frag_copy() will
924 -+ * call start_new_rx_buffer().
925 -+ */
926 -+ max_slots_needed += DIV_ROUND_UP(offset + size,
927 -+ PAGE_SIZE);
928 - }
929 -+
930 -+ /* To avoid the estimate becoming too pessimal for some
931 -+ * frontends that limit posted rx requests, cap the estimate
932 -+ * at MAX_SKB_FRAGS.
933 -+ */
934 -+ if (max_slots_needed > MAX_SKB_FRAGS)
935 -+ max_slots_needed = MAX_SKB_FRAGS;
936 -+
937 -+ /* We may need one more slot for GSO metadata */
938 - if (skb_is_gso(skb) &&
939 - (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 ||
940 - skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
941 -@@ -511,8 +532,12 @@ static void xenvif_rx_action(struct xenvif *vif)
942 - vif->rx_last_skb_slots = 0;
943 -
944 - sco = (struct skb_cb_overlay *)skb->cb;
945 -+
946 -+ old_req_cons = vif->rx.req_cons;
947 - sco->meta_slots_used = xenvif_gop_skb(skb, &npo);
948 -- BUG_ON(sco->meta_slots_used > max_slots_needed);
949 -+ ring_slots_used = vif->rx.req_cons - old_req_cons;
950 -+
951 -+ BUG_ON(ring_slots_used > max_slots_needed);
952 -
953 - __skb_queue_tail(&rxq, skb);
954 - }
955 -@@ -655,7 +680,8 @@ static void xenvif_tx_err(struct xenvif *vif,
956 - static void xenvif_fatal_tx_err(struct xenvif *vif)
957 - {
958 - netdev_err(vif->dev, "fatal error; disabling device\n");
959 -- xenvif_carrier_off(vif);
960 -+ vif->disabled = true;
961 -+ xenvif_kick_thread(vif);
962 - }
963 -
964 - static int xenvif_count_requests(struct xenvif *vif,
965 -@@ -1126,7 +1152,7 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
966 - vif->tx.sring->req_prod, vif->tx.req_cons,
967 - XEN_NETIF_TX_RING_SIZE);
968 - xenvif_fatal_tx_err(vif);
969 -- continue;
970 -+ break;
971 - }
972 -
973 - work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx);
974 -@@ -1548,7 +1574,18 @@ int xenvif_kthread(void *data)
975 - while (!kthread_should_stop()) {
976 - wait_event_interruptible(vif->wq,
977 - rx_work_todo(vif) ||
978 -+ vif->disabled ||
979 - kthread_should_stop());
980 -+
981 -+ /* This frontend is found to be rogue, disable it in
982 -+ * kthread context. Currently this is only set when
983 -+ * netback finds out frontend sends malformed packet,
984 -+ * but we cannot disable the interface in softirq
985 -+ * context so we defer it here.
986 -+ */
987 -+ if (unlikely(vif->disabled && netif_carrier_ok(vif->dev)))
988 -+ xenvif_carrier_off(vif);
989 -+
990 - if (kthread_should_stop())
991 - break;
992 -
993 -diff --git a/include/linux/futex.h b/include/linux/futex.h
994 -index b0d95cac826e..6435f46d6e13 100644
995 ---- a/include/linux/futex.h
996 -+++ b/include/linux/futex.h
997 -@@ -55,7 +55,11 @@ union futex_key {
998 - #ifdef CONFIG_FUTEX
999 - extern void exit_robust_list(struct task_struct *curr);
1000 - extern void exit_pi_state_list(struct task_struct *curr);
1001 -+#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
1002 -+#define futex_cmpxchg_enabled 1
1003 -+#else
1004 - extern int futex_cmpxchg_enabled;
1005 -+#endif
1006 - #else
1007 - static inline void exit_robust_list(struct task_struct *curr)
1008 - {
1009 -diff --git a/init/Kconfig b/init/Kconfig
1010 -index 009a797dd242..d56cb03c1b49 100644
1011 ---- a/init/Kconfig
1012 -+++ b/init/Kconfig
1013 -@@ -1387,6 +1387,13 @@ config FUTEX
1014 - support for "fast userspace mutexes". The resulting kernel may not
1015 - run glibc-based applications correctly.
1016 -
1017 -+config HAVE_FUTEX_CMPXCHG
1018 -+ bool
1019 -+ help
1020 -+ Architectures should select this if futex_atomic_cmpxchg_inatomic()
1021 -+ is implemented and always working. This removes a couple of runtime
1022 -+ checks.
1023 -+
1024 - config EPOLL
1025 - bool "Enable eventpoll support" if EXPERT
1026 - default y
1027 -diff --git a/kernel/futex.c b/kernel/futex.c
1028 -index 08ec814ad9d2..6801b3751a95 100644
1029 ---- a/kernel/futex.c
1030 -+++ b/kernel/futex.c
1031 -@@ -157,7 +157,9 @@
1032 - * enqueue.
1033 - */
1034 -
1035 -+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
1036 - int __read_mostly futex_cmpxchg_enabled;
1037 -+#endif
1038 -
1039 - /*
1040 - * Futex flags used to encode options to functions and preserve them across
1041 -@@ -1450,6 +1452,7 @@ retry:
1042 - hb2 = hash_futex(&key2);
1043 -
1044 - retry_private:
1045 -+ hb_waiters_inc(hb2);
1046 - double_lock_hb(hb1, hb2);
1047 -
1048 - if (likely(cmpval != NULL)) {
1049 -@@ -1459,6 +1462,7 @@ retry_private:
1050 -
1051 - if (unlikely(ret)) {
1052 - double_unlock_hb(hb1, hb2);
1053 -+ hb_waiters_dec(hb2);
1054 -
1055 - ret = get_user(curval, uaddr1);
1056 - if (ret)
1057 -@@ -1508,6 +1512,7 @@ retry_private:
1058 - break;
1059 - case -EFAULT:
1060 - double_unlock_hb(hb1, hb2);
1061 -+ hb_waiters_dec(hb2);
1062 - put_futex_key(&key2);
1063 - put_futex_key(&key1);
1064 - ret = fault_in_user_writeable(uaddr2);
1065 -@@ -1517,6 +1522,7 @@ retry_private:
1066 - case -EAGAIN:
1067 - /* The owner was exiting, try again. */
1068 - double_unlock_hb(hb1, hb2);
1069 -+ hb_waiters_dec(hb2);
1070 - put_futex_key(&key2);
1071 - put_futex_key(&key1);
1072 - cond_resched();
1073 -@@ -1592,6 +1598,7 @@ retry_private:
1074 -
1075 - out_unlock:
1076 - double_unlock_hb(hb1, hb2);
1077 -+ hb_waiters_dec(hb2);
1078 -
1079 - /*
1080 - * drop_futex_key_refs() must be called outside the spinlocks. During
1081 -@@ -2875,9 +2882,28 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
1082 - return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
1083 - }
1084 -
1085 --static int __init futex_init(void)
1086 -+static void __init futex_detect_cmpxchg(void)
1087 - {
1088 -+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
1089 - u32 curval;
1090 -+
1091 -+ /*
1092 -+ * This will fail and we want it. Some arch implementations do
1093 -+ * runtime detection of the futex_atomic_cmpxchg_inatomic()
1094 -+ * functionality. We want to know that before we call in any
1095 -+ * of the complex code paths. Also we want to prevent
1096 -+ * registration of robust lists in that case. NULL is
1097 -+ * guaranteed to fault and we get -EFAULT on functional
1098 -+ * implementation, the non-functional ones will return
1099 -+ * -ENOSYS.
1100 -+ */
1101 -+ if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
1102 -+ futex_cmpxchg_enabled = 1;
1103 -+#endif
1104 -+}
1105 -+
1106 -+static int __init futex_init(void)
1107 -+{
1108 - unsigned int futex_shift;
1109 - unsigned long i;
1110 -
1111 -@@ -2893,18 +2919,8 @@ static int __init futex_init(void)
1112 - &futex_shift, NULL,
1113 - futex_hashsize, futex_hashsize);
1114 - futex_hashsize = 1UL << futex_shift;
1115 -- /*
1116 -- * This will fail and we want it. Some arch implementations do
1117 -- * runtime detection of the futex_atomic_cmpxchg_inatomic()
1118 -- * functionality. We want to know that before we call in any
1119 -- * of the complex code paths. Also we want to prevent
1120 -- * registration of robust lists in that case. NULL is
1121 -- * guaranteed to fault and we get -EFAULT on functional
1122 -- * implementation, the non-functional ones will return
1123 -- * -ENOSYS.
1124 -- */
1125 -- if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
1126 -- futex_cmpxchg_enabled = 1;
1127 -+
1128 -+ futex_detect_cmpxchg();
1129 -
1130 - for (i = 0; i < futex_hashsize; i++) {
1131 - atomic_set(&futex_queues[i].waiters, 0);
1132 -diff --git a/lib/nlattr.c b/lib/nlattr.c
1133 -index 18eca7809b08..fc6754720ced 100644
1134 ---- a/lib/nlattr.c
1135 -+++ b/lib/nlattr.c
1136 -@@ -303,9 +303,15 @@ int nla_memcmp(const struct nlattr *nla, const void *data,
1137 - */
1138 - int nla_strcmp(const struct nlattr *nla, const char *str)
1139 - {
1140 -- int len = strlen(str) + 1;
1141 -- int d = nla_len(nla) - len;
1142 -+ int len = strlen(str);
1143 -+ char *buf = nla_data(nla);
1144 -+ int attrlen = nla_len(nla);
1145 -+ int d;
1146 -
1147 -+ if (attrlen > 0 && buf[attrlen - 1] == '\0')
1148 -+ attrlen--;
1149 -+
1150 -+ d = attrlen - len;
1151 - if (d == 0)
1152 - d = memcmp(nla_data(nla), str, len);
1153 -
1154 -diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
1155 -index f2610e157660..7b326529e6a2 100644
1156 ---- a/net/ipv6/icmp.c
1157 -+++ b/net/ipv6/icmp.c
1158 -@@ -520,7 +520,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
1159 - np->tclass, NULL, &fl6, (struct rt6_info *)dst,
1160 - MSG_DONTWAIT, np->dontfrag);
1161 - if (err) {
1162 -- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
1163 -+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
1164 - ip6_flush_pending_frames(sk);
1165 - } else {
1166 - err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
1167 -diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
1168 -index 64d6073731d3..3702d179506d 100644
1169 ---- a/net/ipv6/ip6_output.c
1170 -+++ b/net/ipv6/ip6_output.c
1171 -@@ -1566,8 +1566,8 @@ int ip6_push_pending_frames(struct sock *sk)
1172 - if (proto == IPPROTO_ICMPV6) {
1173 - struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1174 -
1175 -- ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1176 -- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1177 -+ ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1178 -+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1179 - }
1180 -
1181 - err = ip6_local_out(skb);
1182 -diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
1183 -index e1e47350784b..08b367c6b9cf 100644
1184 ---- a/net/ipv6/mcast.c
1185 -+++ b/net/ipv6/mcast.c
1186 -@@ -1620,11 +1620,12 @@ static void mld_sendpack(struct sk_buff *skb)
1187 - dst_output);
1188 - out:
1189 - if (!err) {
1190 -- ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT);
1191 -- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1192 -- IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
1193 -- } else
1194 -- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);
1195 -+ ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
1196 -+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1197 -+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
1198 -+ } else {
1199 -+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
1200 -+ }
1201 -
1202 - rcu_read_unlock();
1203 - return;
1204 -diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
1205 -index 587bbdcb22b4..bda74291c3e0 100644
1206 ---- a/net/ipv6/ping.c
1207 -+++ b/net/ipv6/ping.c
1208 -@@ -182,8 +182,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1209 - MSG_DONTWAIT, np->dontfrag);
1210 -
1211 - if (err) {
1212 -- ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev,
1213 -- ICMP6_MIB_OUTERRORS);
1214 -+ ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
1215 -+ ICMP6_MIB_OUTERRORS);
1216 - ip6_flush_pending_frames(sk);
1217 - } else {
1218 - err = icmpv6_push_pending_frames(sk, &fl6,
1219 -diff --git a/net/rds/iw.c b/net/rds/iw.c
1220 -index 7826d46baa70..589935661d66 100644
1221 ---- a/net/rds/iw.c
1222 -+++ b/net/rds/iw.c
1223 -@@ -239,7 +239,8 @@ static int rds_iw_laddr_check(__be32 addr)
1224 - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
1225 - /* due to this, we will claim to support IB devices unless we
1226 - check node_type. */
1227 -- if (ret || cm_id->device->node_type != RDMA_NODE_RNIC)
1228 -+ if (ret || !cm_id->device ||
1229 -+ cm_id->device->node_type != RDMA_NODE_RNIC)
1230 - ret = -EADDRNOTAVAIL;
1231 -
1232 - rdsdebug("addr %pI4 ret %d node type %d\n",
1233 -diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
1234 -index b332e2cc0954..e294b86c8d88 100644
1235 ---- a/security/selinux/hooks.c
1236 -+++ b/security/selinux/hooks.c
1237 -@@ -1418,15 +1418,33 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
1238 - isec->sid = sbsec->sid;
1239 -
1240 - if ((sbsec->flags & SE_SBPROC) && !S_ISLNK(inode->i_mode)) {
1241 -- if (opt_dentry) {
1242 -- isec->sclass = inode_mode_to_security_class(inode->i_mode);
1243 -- rc = selinux_proc_get_sid(opt_dentry,
1244 -- isec->sclass,
1245 -- &sid);
1246 -- if (rc)
1247 -- goto out_unlock;
1248 -- isec->sid = sid;
1249 -- }
1250 -+ /* We must have a dentry to determine the label on
1251 -+ * procfs inodes */
1252 -+ if (opt_dentry)
1253 -+ /* Called from d_instantiate or
1254 -+ * d_splice_alias. */
1255 -+ dentry = dget(opt_dentry);
1256 -+ else
1257 -+ /* Called from selinux_complete_init, try to
1258 -+ * find a dentry. */
1259 -+ dentry = d_find_alias(inode);
1260 -+ /*
1261 -+ * This can be hit on boot when a file is accessed
1262 -+ * before the policy is loaded. When we load policy we
1263 -+ * may find inodes that have no dentry on the
1264 -+ * sbsec->isec_head list. No reason to complain as
1265 -+ * these will get fixed up the next time we go through
1266 -+ * inode_doinit() with a dentry, before these inodes
1267 -+ * could be used again by userspace.
1268 -+ */
1269 -+ if (!dentry)
1270 -+ goto out_unlock;
1271 -+ isec->sclass = inode_mode_to_security_class(inode->i_mode);
1272 -+ rc = selinux_proc_get_sid(dentry, isec->sclass, &sid);
1273 -+ dput(dentry);
1274 -+ if (rc)
1275 -+ goto out_unlock;
1276 -+ isec->sid = sid;
1277 - }
1278 - break;
1279 - }
1280 -diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
1281 -index e354ab1ec20f..a8dec9e9e876 100644
1282 ---- a/sound/pci/hda/hda_intel.c
1283 -+++ b/sound/pci/hda/hda_intel.c
1284 -@@ -297,9 +297,9 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 };
1285 - #define ULI_NUM_CAPTURE 5
1286 - #define ULI_NUM_PLAYBACK 6
1287 -
1288 --/* ATI HDMI may have up to 8 playbacks and 0 capture */
1289 -+/* ATI HDMI has 1 playback and 0 capture */
1290 - #define ATIHDMI_NUM_CAPTURE 0
1291 --#define ATIHDMI_NUM_PLAYBACK 8
1292 -+#define ATIHDMI_NUM_PLAYBACK 1
1293 -
1294 - /* TERA has 4 playback and 3 capture */
1295 - #define TERA_NUM_CAPTURE 3
1296
1297 Deleted: genpatches-2.6/trunk/3.15/1001_linux-3.14.2.patch
1298 ===================================================================
1299 --- genpatches-2.6/trunk/3.15/1001_linux-3.14.2.patch 2014-05-05 14:22:27 UTC (rev 2771)
1300 +++ genpatches-2.6/trunk/3.15/1001_linux-3.14.2.patch 2014-05-05 14:28:48 UTC (rev 2772)
1301 @@ -1,1201 +0,0 @@
1302 -diff --git a/Makefile b/Makefile
1303 -index 7d0b6992d9ed..b2f7de81e9a2 100644
1304 ---- a/Makefile
1305 -+++ b/Makefile
1306 -@@ -1,6 +1,6 @@
1307 - VERSION = 3
1308 - PATCHLEVEL = 14
1309 --SUBLEVEL = 1
1310 -+SUBLEVEL = 2
1311 - EXTRAVERSION =
1312 - NAME = Shuffling Zombie Juror
1313 -
1314 -diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
1315 -index 9f7ca266864a..832d05a914ba 100644
1316 ---- a/arch/x86/kernel/cpu/mshyperv.c
1317 -+++ b/arch/x86/kernel/cpu/mshyperv.c
1318 -@@ -26,6 +26,7 @@
1319 - #include <asm/irq_regs.h>
1320 - #include <asm/i8259.h>
1321 - #include <asm/apic.h>
1322 -+#include <asm/timer.h>
1323 -
1324 - struct ms_hyperv_info ms_hyperv;
1325 - EXPORT_SYMBOL_GPL(ms_hyperv);
1326 -@@ -105,6 +106,11 @@ static void __init ms_hyperv_init_platform(void)
1327 -
1328 - if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
1329 - clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
1330 -+
1331 -+#ifdef CONFIG_X86_IO_APIC
1332 -+ no_timer_check = 1;
1333 -+#endif
1334 -+
1335 - }
1336 -
1337 - const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
1338 -diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
1339 -index bc4a088f9023..6d7d5a1260a6 100644
1340 ---- a/arch/x86/kernel/early-quirks.c
1341 -+++ b/arch/x86/kernel/early-quirks.c
1342 -@@ -203,18 +203,15 @@ static void __init intel_remapping_check(int num, int slot, int func)
1343 - revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID);
1344 -
1345 - /*
1346 -- * Revision 13 of all triggering devices id in this quirk have
1347 -- * a problem draining interrupts when irq remapping is enabled,
1348 -- * and should be flagged as broken. Additionally revisions 0x12
1349 -- * and 0x22 of device id 0x3405 has this problem.
1350 -+ * Revision <= 13 of all triggering devices id in this quirk
1351 -+ * have a problem draining interrupts when irq remapping is
1352 -+ * enabled, and should be flagged as broken. Additionally
1353 -+ * revision 0x22 of device id 0x3405 has this problem.
1354 - */
1355 -- if (revision == 0x13)
1356 -+ if (revision <= 0x13)
1357 - set_irq_remapping_broken();
1358 -- else if ((device == 0x3405) &&
1359 -- ((revision == 0x12) ||
1360 -- (revision == 0x22)))
1361 -+ else if (device == 0x3405 && revision == 0x22)
1362 - set_irq_remapping_broken();
1363 --
1364 - }
1365 -
1366 - /*
1367 -diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
1368 -index 714e957a871a..db35594d4df7 100644
1369 ---- a/drivers/acpi/button.c
1370 -+++ b/drivers/acpi/button.c
1371 -@@ -302,6 +302,10 @@ static void acpi_button_notify(struct acpi_device *device, u32 event)
1372 - input_sync(input);
1373 -
1374 - pm_wakeup_event(&device->dev, 0);
1375 -+ acpi_bus_generate_netlink_event(
1376 -+ device->pnp.device_class,
1377 -+ dev_name(&device->dev),
1378 -+ event, ++button->pushed);
1379 - }
1380 - break;
1381 - default:
1382 -diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c
1383 -index f5e4cd7617f6..61e71616689b 100644
1384 ---- a/drivers/char/ipmi/ipmi_bt_sm.c
1385 -+++ b/drivers/char/ipmi/ipmi_bt_sm.c
1386 -@@ -352,7 +352,7 @@ static inline void write_all_bytes(struct si_sm_data *bt)
1387 -
1388 - static inline int read_all_bytes(struct si_sm_data *bt)
1389 - {
1390 -- unsigned char i;
1391 -+ unsigned int i;
1392 -
1393 - /*
1394 - * length is "framing info", minimum = 4: NetFn, Seq, Cmd, cCode.
1395 -diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c
1396 -index 17ce88f79d2b..f173dd09fce4 100644
1397 ---- a/drivers/pci/host/pcie-designware.c
1398 -+++ b/drivers/pci/host/pcie-designware.c
1399 -@@ -522,13 +522,13 @@ static void dw_pcie_prog_viewport_cfg1(struct pcie_port *pp, u32 busdev)
1400 - dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX1,
1401 - PCIE_ATU_VIEWPORT);
1402 - dw_pcie_writel_rc(pp, PCIE_ATU_TYPE_CFG1, PCIE_ATU_CR1);
1403 -- dw_pcie_writel_rc(pp, PCIE_ATU_ENABLE, PCIE_ATU_CR2);
1404 - dw_pcie_writel_rc(pp, pp->cfg1_base, PCIE_ATU_LOWER_BASE);
1405 - dw_pcie_writel_rc(pp, (pp->cfg1_base >> 32), PCIE_ATU_UPPER_BASE);
1406 - dw_pcie_writel_rc(pp, pp->cfg1_base + pp->config.cfg1_size - 1,
1407 - PCIE_ATU_LIMIT);
1408 - dw_pcie_writel_rc(pp, busdev, PCIE_ATU_LOWER_TARGET);
1409 - dw_pcie_writel_rc(pp, 0, PCIE_ATU_UPPER_TARGET);
1410 -+ dw_pcie_writel_rc(pp, PCIE_ATU_ENABLE, PCIE_ATU_CR2);
1411 - }
1412 -
1413 - static void dw_pcie_prog_viewport_mem_outbound(struct pcie_port *pp)
1414 -@@ -537,7 +537,6 @@ static void dw_pcie_prog_viewport_mem_outbound(struct pcie_port *pp)
1415 - dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX0,
1416 - PCIE_ATU_VIEWPORT);
1417 - dw_pcie_writel_rc(pp, PCIE_ATU_TYPE_MEM, PCIE_ATU_CR1);
1418 -- dw_pcie_writel_rc(pp, PCIE_ATU_ENABLE, PCIE_ATU_CR2);
1419 - dw_pcie_writel_rc(pp, pp->mem_base, PCIE_ATU_LOWER_BASE);
1420 - dw_pcie_writel_rc(pp, (pp->mem_base >> 32), PCIE_ATU_UPPER_BASE);
1421 - dw_pcie_writel_rc(pp, pp->mem_base + pp->config.mem_size - 1,
1422 -@@ -545,6 +544,7 @@ static void dw_pcie_prog_viewport_mem_outbound(struct pcie_port *pp)
1423 - dw_pcie_writel_rc(pp, pp->config.mem_bus_addr, PCIE_ATU_LOWER_TARGET);
1424 - dw_pcie_writel_rc(pp, upper_32_bits(pp->config.mem_bus_addr),
1425 - PCIE_ATU_UPPER_TARGET);
1426 -+ dw_pcie_writel_rc(pp, PCIE_ATU_ENABLE, PCIE_ATU_CR2);
1427 - }
1428 -
1429 - static void dw_pcie_prog_viewport_io_outbound(struct pcie_port *pp)
1430 -@@ -553,7 +553,6 @@ static void dw_pcie_prog_viewport_io_outbound(struct pcie_port *pp)
1431 - dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX1,
1432 - PCIE_ATU_VIEWPORT);
1433 - dw_pcie_writel_rc(pp, PCIE_ATU_TYPE_IO, PCIE_ATU_CR1);
1434 -- dw_pcie_writel_rc(pp, PCIE_ATU_ENABLE, PCIE_ATU_CR2);
1435 - dw_pcie_writel_rc(pp, pp->io_base, PCIE_ATU_LOWER_BASE);
1436 - dw_pcie_writel_rc(pp, (pp->io_base >> 32), PCIE_ATU_UPPER_BASE);
1437 - dw_pcie_writel_rc(pp, pp->io_base + pp->config.io_size - 1,
1438 -@@ -561,6 +560,7 @@ static void dw_pcie_prog_viewport_io_outbound(struct pcie_port *pp)
1439 - dw_pcie_writel_rc(pp, pp->config.io_bus_addr, PCIE_ATU_LOWER_TARGET);
1440 - dw_pcie_writel_rc(pp, upper_32_bits(pp->config.io_bus_addr),
1441 - PCIE_ATU_UPPER_TARGET);
1442 -+ dw_pcie_writel_rc(pp, PCIE_ATU_ENABLE, PCIE_ATU_CR2);
1443 - }
1444 -
1445 - static int dw_pcie_rd_other_conf(struct pcie_port *pp, struct pci_bus *bus,
1446 -@@ -800,7 +800,7 @@ void dw_pcie_setup_rc(struct pcie_port *pp)
1447 -
1448 - /* setup RC BARs */
1449 - dw_pcie_writel_rc(pp, 0x00000004, PCI_BASE_ADDRESS_0);
1450 -- dw_pcie_writel_rc(pp, 0x00000004, PCI_BASE_ADDRESS_1);
1451 -+ dw_pcie_writel_rc(pp, 0x00000000, PCI_BASE_ADDRESS_1);
1452 -
1453 - /* setup interrupt pins */
1454 - dw_pcie_readl_rc(pp, PCI_INTERRUPT_LINE, &val);
1455 -diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
1456 -index 470954aba728..36d1a23f14be 100644
1457 ---- a/drivers/scsi/sd.c
1458 -+++ b/drivers/scsi/sd.c
1459 -@@ -1463,8 +1463,8 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
1460 - sd_print_sense_hdr(sdkp, &sshdr);
1461 - /* we need to evaluate the error return */
1462 - if (scsi_sense_valid(&sshdr) &&
1463 -- /* 0x3a is medium not present */
1464 -- sshdr.asc == 0x3a)
1465 -+ (sshdr.asc == 0x3a || /* medium not present */
1466 -+ sshdr.asc == 0x20)) /* invalid command */
1467 - /* this is no error here */
1468 - return 0;
1469 -
1470 -diff --git a/drivers/staging/comedi/comedi_buf.c b/drivers/staging/comedi/comedi_buf.c
1471 -index 924fce977985..257595016161 100644
1472 ---- a/drivers/staging/comedi/comedi_buf.c
1473 -+++ b/drivers/staging/comedi/comedi_buf.c
1474 -@@ -61,6 +61,8 @@ static void __comedi_buf_free(struct comedi_device *dev,
1475 - struct comedi_subdevice *s)
1476 - {
1477 - struct comedi_async *async = s->async;
1478 -+ struct comedi_buf_map *bm;
1479 -+ unsigned long flags;
1480 -
1481 - if (async->prealloc_buf) {
1482 - vunmap(async->prealloc_buf);
1483 -@@ -68,8 +70,11 @@ static void __comedi_buf_free(struct comedi_device *dev,
1484 - async->prealloc_bufsz = 0;
1485 - }
1486 -
1487 -- comedi_buf_map_put(async->buf_map);
1488 -+ spin_lock_irqsave(&s->spin_lock, flags);
1489 -+ bm = async->buf_map;
1490 - async->buf_map = NULL;
1491 -+ spin_unlock_irqrestore(&s->spin_lock, flags);
1492 -+ comedi_buf_map_put(bm);
1493 - }
1494 -
1495 - static void __comedi_buf_alloc(struct comedi_device *dev,
1496 -@@ -80,6 +85,7 @@ static void __comedi_buf_alloc(struct comedi_device *dev,
1497 - struct page **pages = NULL;
1498 - struct comedi_buf_map *bm;
1499 - struct comedi_buf_page *buf;
1500 -+ unsigned long flags;
1501 - unsigned i;
1502 -
1503 - if (!IS_ENABLED(CONFIG_HAS_DMA) && s->async_dma_dir != DMA_NONE) {
1504 -@@ -92,8 +98,10 @@ static void __comedi_buf_alloc(struct comedi_device *dev,
1505 - if (!bm)
1506 - return;
1507 -
1508 -- async->buf_map = bm;
1509 - kref_init(&bm->refcount);
1510 -+ spin_lock_irqsave(&s->spin_lock, flags);
1511 -+ async->buf_map = bm;
1512 -+ spin_unlock_irqrestore(&s->spin_lock, flags);
1513 - bm->dma_dir = s->async_dma_dir;
1514 - if (bm->dma_dir != DMA_NONE)
1515 - /* Need ref to hardware device to free buffer later. */
1516 -@@ -127,7 +135,9 @@ static void __comedi_buf_alloc(struct comedi_device *dev,
1517 -
1518 - pages[i] = virt_to_page(buf->virt_addr);
1519 - }
1520 -+ spin_lock_irqsave(&s->spin_lock, flags);
1521 - bm->n_pages = i;
1522 -+ spin_unlock_irqrestore(&s->spin_lock, flags);
1523 -
1524 - /* vmap the prealloc_buf if all the pages were allocated */
1525 - if (i == n_pages)
1526 -@@ -150,6 +160,29 @@ int comedi_buf_map_put(struct comedi_buf_map *bm)
1527 - return 1;
1528 - }
1529 -
1530 -+/* returns s->async->buf_map and increments its kref refcount */
1531 -+struct comedi_buf_map *
1532 -+comedi_buf_map_from_subdev_get(struct comedi_subdevice *s)
1533 -+{
1534 -+ struct comedi_async *async = s->async;
1535 -+ struct comedi_buf_map *bm = NULL;
1536 -+ unsigned long flags;
1537 -+
1538 -+ if (!async)
1539 -+ return NULL;
1540 -+
1541 -+ spin_lock_irqsave(&s->spin_lock, flags);
1542 -+ bm = async->buf_map;
1543 -+ /* only want it if buffer pages allocated */
1544 -+ if (bm && bm->n_pages)
1545 -+ comedi_buf_map_get(bm);
1546 -+ else
1547 -+ bm = NULL;
1548 -+ spin_unlock_irqrestore(&s->spin_lock, flags);
1549 -+
1550 -+ return bm;
1551 -+}
1552 -+
1553 - bool comedi_buf_is_mmapped(struct comedi_async *async)
1554 - {
1555 - struct comedi_buf_map *bm = async->buf_map;
1556 -diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
1557 -index c22c617b0da1..eae3ee139330 100644
1558 ---- a/drivers/staging/comedi/comedi_fops.c
1559 -+++ b/drivers/staging/comedi/comedi_fops.c
1560 -@@ -1923,14 +1923,21 @@ static int comedi_mmap(struct file *file, struct vm_area_struct *vma)
1561 - struct comedi_device *dev = file->private_data;
1562 - struct comedi_subdevice *s;
1563 - struct comedi_async *async;
1564 -- struct comedi_buf_map *bm;
1565 -+ struct comedi_buf_map *bm = NULL;
1566 - unsigned long start = vma->vm_start;
1567 - unsigned long size;
1568 - int n_pages;
1569 - int i;
1570 - int retval;
1571 -
1572 -- mutex_lock(&dev->mutex);
1573 -+ /*
1574 -+ * 'trylock' avoids circular dependency with current->mm->mmap_sem
1575 -+ * and down-reading &dev->attach_lock should normally succeed without
1576 -+ * contention unless the device is in the process of being attached
1577 -+ * or detached.
1578 -+ */
1579 -+ if (!down_read_trylock(&dev->attach_lock))
1580 -+ return -EAGAIN;
1581 -
1582 - if (!dev->attached) {
1583 - dev_dbg(dev->class_dev, "no driver attached\n");
1584 -@@ -1970,7 +1977,9 @@ static int comedi_mmap(struct file *file, struct vm_area_struct *vma)
1585 - }
1586 -
1587 - n_pages = size >> PAGE_SHIFT;
1588 -- bm = async->buf_map;
1589 -+
1590 -+ /* get reference to current buf map (if any) */
1591 -+ bm = comedi_buf_map_from_subdev_get(s);
1592 - if (!bm || n_pages > bm->n_pages) {
1593 - retval = -EINVAL;
1594 - goto done;
1595 -@@ -1994,7 +2003,8 @@ static int comedi_mmap(struct file *file, struct vm_area_struct *vma)
1596 -
1597 - retval = 0;
1598 - done:
1599 -- mutex_unlock(&dev->mutex);
1600 -+ up_read(&dev->attach_lock);
1601 -+ comedi_buf_map_put(bm); /* put reference to buf map - okay if NULL */
1602 - return retval;
1603 - }
1604 -
1605 -diff --git a/drivers/staging/comedi/comedi_internal.h b/drivers/staging/comedi/comedi_internal.h
1606 -index 9a746570f161..a492f2d2436e 100644
1607 ---- a/drivers/staging/comedi/comedi_internal.h
1608 -+++ b/drivers/staging/comedi/comedi_internal.h
1609 -@@ -19,6 +19,8 @@ void comedi_buf_reset(struct comedi_async *async);
1610 - bool comedi_buf_is_mmapped(struct comedi_async *async);
1611 - void comedi_buf_map_get(struct comedi_buf_map *bm);
1612 - int comedi_buf_map_put(struct comedi_buf_map *bm);
1613 -+struct comedi_buf_map *comedi_buf_map_from_subdev_get(
1614 -+ struct comedi_subdevice *s);
1615 - unsigned int comedi_buf_write_n_allocated(struct comedi_async *async);
1616 - void comedi_device_cancel_all(struct comedi_device *dev);
1617 -
1618 -diff --git a/drivers/staging/comedi/drivers/8255_pci.c b/drivers/staging/comedi/drivers/8255_pci.c
1619 -index 8a57c3c1ade0..1097dc6a3086 100644
1620 ---- a/drivers/staging/comedi/drivers/8255_pci.c
1621 -+++ b/drivers/staging/comedi/drivers/8255_pci.c
1622 -@@ -56,6 +56,7 @@ Configuration Options: not applicable, uses PCI auto config
1623 - #include "../comedidev.h"
1624 -
1625 - #include "8255.h"
1626 -+#include "mite.h"
1627 -
1628 - enum pci_8255_boardid {
1629 - BOARD_ADLINK_PCI7224,
1630 -@@ -79,6 +80,7 @@ struct pci_8255_boardinfo {
1631 - const char *name;
1632 - int dio_badr;
1633 - int n_8255;
1634 -+ unsigned int has_mite:1;
1635 - };
1636 -
1637 - static const struct pci_8255_boardinfo pci_8255_boards[] = {
1638 -@@ -126,36 +128,43 @@ static const struct pci_8255_boardinfo pci_8255_boards[] = {
1639 - .name = "ni_pci-dio-96",
1640 - .dio_badr = 1,
1641 - .n_8255 = 4,
1642 -+ .has_mite = 1,
1643 - },
1644 - [BOARD_NI_PCIDIO96B] = {
1645 - .name = "ni_pci-dio-96b",
1646 - .dio_badr = 1,
1647 - .n_8255 = 4,
1648 -+ .has_mite = 1,
1649 - },
1650 - [BOARD_NI_PXI6508] = {
1651 - .name = "ni_pxi-6508",
1652 - .dio_badr = 1,
1653 - .n_8255 = 4,
1654 -+ .has_mite = 1,
1655 - },
1656 - [BOARD_NI_PCI6503] = {
1657 - .name = "ni_pci-6503",
1658 - .dio_badr = 1,
1659 - .n_8255 = 1,
1660 -+ .has_mite = 1,
1661 - },
1662 - [BOARD_NI_PCI6503B] = {
1663 - .name = "ni_pci-6503b",
1664 - .dio_badr = 1,
1665 - .n_8255 = 1,
1666 -+ .has_mite = 1,
1667 - },
1668 - [BOARD_NI_PCI6503X] = {
1669 - .name = "ni_pci-6503x",
1670 - .dio_badr = 1,
1671 - .n_8255 = 1,
1672 -+ .has_mite = 1,
1673 - },
1674 - [BOARD_NI_PXI_6503] = {
1675 - .name = "ni_pxi-6503",
1676 - .dio_badr = 1,
1677 - .n_8255 = 1,
1678 -+ .has_mite = 1,
1679 - },
1680 - };
1681 -
1682 -@@ -163,6 +172,25 @@ struct pci_8255_private {
1683 - void __iomem *mmio_base;
1684 - };
1685 -
1686 -+static int pci_8255_mite_init(struct pci_dev *pcidev)
1687 -+{
1688 -+ void __iomem *mite_base;
1689 -+ u32 main_phys_addr;
1690 -+
1691 -+ /* ioremap the MITE registers (BAR 0) temporarily */
1692 -+ mite_base = pci_ioremap_bar(pcidev, 0);
1693 -+ if (!mite_base)
1694 -+ return -ENOMEM;
1695 -+
1696 -+ /* set data window to main registers (BAR 1) */
1697 -+ main_phys_addr = pci_resource_start(pcidev, 1);
1698 -+ writel(main_phys_addr | WENAB, mite_base + MITE_IODWBSR);
1699 -+
1700 -+ /* finished with MITE registers */
1701 -+ iounmap(mite_base);
1702 -+ return 0;
1703 -+}
1704 -+
1705 - static int pci_8255_mmio(int dir, int port, int data, unsigned long iobase)
1706 - {
1707 - void __iomem *mmio_base = (void __iomem *)iobase;
1708 -@@ -201,6 +229,12 @@ static int pci_8255_auto_attach(struct comedi_device *dev,
1709 - if (ret)
1710 - return ret;
1711 -
1712 -+ if (board->has_mite) {
1713 -+ ret = pci_8255_mite_init(pcidev);
1714 -+ if (ret)
1715 -+ return ret;
1716 -+ }
1717 -+
1718 - is_mmio = (pci_resource_flags(pcidev, board->dio_badr) &
1719 - IORESOURCE_MEM) != 0;
1720 - if (is_mmio) {
1721 -diff --git a/drivers/tty/ipwireless/tty.c b/drivers/tty/ipwireless/tty.c
1722 -index ebd5bff0f5c1..17ee3bf0926b 100644
1723 ---- a/drivers/tty/ipwireless/tty.c
1724 -+++ b/drivers/tty/ipwireless/tty.c
1725 -@@ -176,9 +176,6 @@ void ipwireless_tty_received(struct ipw_tty *tty, unsigned char *data,
1726 - ": %d chars not inserted to flip buffer!\n",
1727 - length - work);
1728 -
1729 -- /*
1730 -- * This may sleep if ->low_latency is set
1731 -- */
1732 - if (work)
1733 - tty_flip_buffer_push(&tty->port);
1734 - }
1735 -diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
1736 -index 765125dff20e..8ebd9f88a6f6 100644
1737 ---- a/drivers/tty/tty_buffer.c
1738 -+++ b/drivers/tty/tty_buffer.c
1739 -@@ -351,14 +351,11 @@ EXPORT_SYMBOL(tty_insert_flip_string_flags);
1740 - * Takes any pending buffers and transfers their ownership to the
1741 - * ldisc side of the queue. It then schedules those characters for
1742 - * processing by the line discipline.
1743 -- * Note that this function can only be used when the low_latency flag
1744 -- * is unset. Otherwise the workqueue won't be flushed.
1745 - */
1746 -
1747 - void tty_schedule_flip(struct tty_port *port)
1748 - {
1749 - struct tty_bufhead *buf = &port->buf;
1750 -- WARN_ON(port->low_latency);
1751 -
1752 - buf->tail->commit = buf->tail->used;
1753 - schedule_work(&buf->work);
1754 -@@ -482,17 +479,15 @@ static void flush_to_ldisc(struct work_struct *work)
1755 - */
1756 - void tty_flush_to_ldisc(struct tty_struct *tty)
1757 - {
1758 -- if (!tty->port->low_latency)
1759 -- flush_work(&tty->port->buf.work);
1760 -+ flush_work(&tty->port->buf.work);
1761 - }
1762 -
1763 - /**
1764 - * tty_flip_buffer_push - terminal
1765 - * @port: tty port to push
1766 - *
1767 -- * Queue a push of the terminal flip buffers to the line discipline. This
1768 -- * function must not be called from IRQ context if port->low_latency is
1769 -- * set.
1770 -+ * Queue a push of the terminal flip buffers to the line discipline.
1771 -+ * Can be called from IRQ/atomic context.
1772 - *
1773 - * In the event of the queue being busy for flipping the work will be
1774 - * held off and retried later.
1775 -@@ -500,14 +495,7 @@ void tty_flush_to_ldisc(struct tty_struct *tty)
1776 -
1777 - void tty_flip_buffer_push(struct tty_port *port)
1778 - {
1779 -- struct tty_bufhead *buf = &port->buf;
1780 --
1781 -- buf->tail->commit = buf->tail->used;
1782 --
1783 -- if (port->low_latency)
1784 -- flush_to_ldisc(&buf->work);
1785 -- else
1786 -- schedule_work(&buf->work);
1787 -+ tty_schedule_flip(port);
1788 - }
1789 - EXPORT_SYMBOL(tty_flip_buffer_push);
1790 -
1791 -diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
1792 -index c74a00ad7add..d3448a90f0f9 100644
1793 ---- a/drivers/tty/tty_io.c
1794 -+++ b/drivers/tty/tty_io.c
1795 -@@ -1271,12 +1271,13 @@ static void pty_line_name(struct tty_driver *driver, int index, char *p)
1796 - *
1797 - * Locking: None
1798 - */
1799 --static void tty_line_name(struct tty_driver *driver, int index, char *p)
1800 -+static ssize_t tty_line_name(struct tty_driver *driver, int index, char *p)
1801 - {
1802 - if (driver->flags & TTY_DRIVER_UNNUMBERED_NODE)
1803 -- strcpy(p, driver->name);
1804 -+ return sprintf(p, "%s", driver->name);
1805 - else
1806 -- sprintf(p, "%s%d", driver->name, index + driver->name_base);
1807 -+ return sprintf(p, "%s%d", driver->name,
1808 -+ index + driver->name_base);
1809 - }
1810 -
1811 - /**
1812 -@@ -3545,9 +3546,19 @@ static ssize_t show_cons_active(struct device *dev,
1813 - if (i >= ARRAY_SIZE(cs))
1814 - break;
1815 - }
1816 -- while (i--)
1817 -- count += sprintf(buf + count, "%s%d%c",
1818 -- cs[i]->name, cs[i]->index, i ? ' ':'\n');
1819 -+ while (i--) {
1820 -+ int index = cs[i]->index;
1821 -+ struct tty_driver *drv = cs[i]->device(cs[i], &index);
1822 -+
1823 -+ /* don't resolve tty0 as some programs depend on it */
1824 -+ if (drv && (cs[i]->index > 0 || drv->major != TTY_MAJOR))
1825 -+ count += tty_line_name(drv, index, buf + count);
1826 -+ else
1827 -+ count += sprintf(buf + count, "%s%d",
1828 -+ cs[i]->name, cs[i]->index);
1829 -+
1830 -+ count += sprintf(buf + count, "%c", i ? ' ':'\n');
1831 -+ }
1832 - console_unlock();
1833 -
1834 - return count;
1835 -diff --git a/drivers/usb/gadget/u_serial.c b/drivers/usb/gadget/u_serial.c
1836 -index b369292d4b90..ad0aca812002 100644
1837 ---- a/drivers/usb/gadget/u_serial.c
1838 -+++ b/drivers/usb/gadget/u_serial.c
1839 -@@ -549,8 +549,8 @@ static void gs_rx_push(unsigned long _port)
1840 - port->read_started--;
1841 - }
1842 -
1843 -- /* Push from tty to ldisc; without low_latency set this is handled by
1844 -- * a workqueue, so we won't get callbacks and can hold port_lock
1845 -+ /* Push from tty to ldisc; this is handled by a workqueue,
1846 -+ * so we won't get callbacks and can hold port_lock
1847 - */
1848 - if (do_push)
1849 - tty_flip_buffer_push(&port->port);
1850 -diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
1851 -index 81ea55314b1f..9a527a1826df 100644
1852 ---- a/fs/btrfs/disk-io.c
1853 -+++ b/fs/btrfs/disk-io.c
1854 -@@ -3244,6 +3244,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
1855 - /* send down all the barriers */
1856 - head = &info->fs_devices->devices;
1857 - list_for_each_entry_rcu(dev, head, dev_list) {
1858 -+ if (dev->missing)
1859 -+ continue;
1860 - if (!dev->bdev) {
1861 - errors_send++;
1862 - continue;
1863 -@@ -3258,6 +3260,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
1864 -
1865 - /* wait for all the barriers */
1866 - list_for_each_entry_rcu(dev, head, dev_list) {
1867 -+ if (dev->missing)
1868 -+ continue;
1869 - if (!dev->bdev) {
1870 - errors_wait++;
1871 - continue;
1872 -diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
1873 -index 32312e09f0f5..3c8e68da9ef8 100644
1874 ---- a/fs/btrfs/extent-tree.c
1875 -+++ b/fs/btrfs/extent-tree.c
1876 -@@ -2444,7 +2444,8 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1877 - spin_unlock(&locked_ref->lock);
1878 - spin_lock(&delayed_refs->lock);
1879 - spin_lock(&locked_ref->lock);
1880 -- if (rb_first(&locked_ref->ref_root)) {
1881 -+ if (rb_first(&locked_ref->ref_root) ||
1882 -+ locked_ref->extent_op) {
1883 - spin_unlock(&locked_ref->lock);
1884 - spin_unlock(&delayed_refs->lock);
1885 - continue;
1886 -diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
1887 -index 34cd83184c4a..b05bf58b9395 100644
1888 ---- a/fs/btrfs/transaction.c
1889 -+++ b/fs/btrfs/transaction.c
1890 -@@ -683,7 +683,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
1891 - int lock = (trans->type != TRANS_JOIN_NOLOCK);
1892 - int err = 0;
1893 -
1894 -- if (--trans->use_count) {
1895 -+ if (trans->use_count > 1) {
1896 -+ trans->use_count--;
1897 - trans->block_rsv = trans->orig_rsv;
1898 - return 0;
1899 - }
1900 -@@ -731,17 +732,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
1901 - }
1902 -
1903 - if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
1904 -- if (throttle) {
1905 -- /*
1906 -- * We may race with somebody else here so end up having
1907 -- * to call end_transaction on ourselves again, so inc
1908 -- * our use_count.
1909 -- */
1910 -- trans->use_count++;
1911 -+ if (throttle)
1912 - return btrfs_commit_transaction(trans, root);
1913 -- } else {
1914 -+ else
1915 - wake_up_process(info->transaction_kthread);
1916 -- }
1917 - }
1918 -
1919 - if (trans->type & __TRANS_FREEZABLE)
1920 -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
1921 -index 74bc2d549c58..47188916dd8d 100644
1922 ---- a/fs/ext4/extents.c
1923 -+++ b/fs/ext4/extents.c
1924 -@@ -2585,6 +2585,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1925 - ex_ee_block = le32_to_cpu(ex->ee_block);
1926 - ex_ee_len = ext4_ext_get_actual_len(ex);
1927 -
1928 -+ /*
1929 -+ * If we're starting with an extent other than the last one in the
1930 -+ * node, we need to see if it shares a cluster with the extent to
1931 -+ * the right (towards the end of the file). If its leftmost cluster
1932 -+ * is this extent's rightmost cluster and it is not cluster aligned,
1933 -+ * we'll mark it as a partial that is not to be deallocated.
1934 -+ */
1935 -+
1936 -+ if (ex != EXT_LAST_EXTENT(eh)) {
1937 -+ ext4_fsblk_t current_pblk, right_pblk;
1938 -+ long long current_cluster, right_cluster;
1939 -+
1940 -+ current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
1941 -+ current_cluster = (long long)EXT4_B2C(sbi, current_pblk);
1942 -+ right_pblk = ext4_ext_pblock(ex + 1);
1943 -+ right_cluster = (long long)EXT4_B2C(sbi, right_pblk);
1944 -+ if (current_cluster == right_cluster &&
1945 -+ EXT4_PBLK_COFF(sbi, right_pblk))
1946 -+ *partial_cluster = -right_cluster;
1947 -+ }
1948 -+
1949 - trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
1950 -
1951 - while (ex >= EXT_FIRST_EXTENT(eh) &&
1952 -@@ -2710,10 +2731,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1953 - err = ext4_ext_correct_indexes(handle, inode, path);
1954 -
1955 - /*
1956 -- * Free the partial cluster only if the current extent does not
1957 -- * reference it. Otherwise we might free used cluster.
1958 -+ * If there's a partial cluster and at least one extent remains in
1959 -+ * the leaf, free the partial cluster if it isn't shared with the
1960 -+ * current extent. If there's a partial cluster and no extents
1961 -+ * remain in the leaf, it can't be freed here. It can only be
1962 -+ * freed when it's possible to determine if it's not shared with
1963 -+ * any other extent - when the next leaf is processed or when space
1964 -+ * removal is complete.
1965 - */
1966 -- if (*partial_cluster > 0 &&
1967 -+ if (*partial_cluster > 0 && eh->eh_entries &&
1968 - (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
1969 - *partial_cluster)) {
1970 - int flags = get_default_free_blocks_flags(inode);
1971 -@@ -4128,7 +4154,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
1972 - struct ext4_extent newex, *ex, *ex2;
1973 - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1974 - ext4_fsblk_t newblock = 0;
1975 -- int free_on_err = 0, err = 0, depth;
1976 -+ int free_on_err = 0, err = 0, depth, ret;
1977 - unsigned int allocated = 0, offset = 0;
1978 - unsigned int allocated_clusters = 0;
1979 - struct ext4_allocation_request ar;
1980 -@@ -4189,9 +4215,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
1981 - if (!ext4_ext_is_uninitialized(ex))
1982 - goto out;
1983 -
1984 -- allocated = ext4_ext_handle_uninitialized_extents(
1985 -+ ret = ext4_ext_handle_uninitialized_extents(
1986 - handle, inode, map, path, flags,
1987 - allocated, newblock);
1988 -+ if (ret < 0)
1989 -+ err = ret;
1990 -+ else
1991 -+ allocated = ret;
1992 - goto out3;
1993 - }
1994 - }
1995 -diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
1996 -index d754e3cf99a8..a16315957ef3 100644
1997 ---- a/fs/fs-writeback.c
1998 -+++ b/fs/fs-writeback.c
1999 -@@ -89,16 +89,29 @@ static inline struct inode *wb_inode(struct list_head *head)
2000 - #define CREATE_TRACE_POINTS
2001 - #include <trace/events/writeback.h>
2002 -
2003 -+static void bdi_wakeup_thread(struct backing_dev_info *bdi)
2004 -+{
2005 -+ spin_lock_bh(&bdi->wb_lock);
2006 -+ if (test_bit(BDI_registered, &bdi->state))
2007 -+ mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
2008 -+ spin_unlock_bh(&bdi->wb_lock);
2009 -+}
2010 -+
2011 - static void bdi_queue_work(struct backing_dev_info *bdi,
2012 - struct wb_writeback_work *work)
2013 - {
2014 - trace_writeback_queue(bdi, work);
2015 -
2016 - spin_lock_bh(&bdi->wb_lock);
2017 -+ if (!test_bit(BDI_registered, &bdi->state)) {
2018 -+ if (work->done)
2019 -+ complete(work->done);
2020 -+ goto out_unlock;
2021 -+ }
2022 - list_add_tail(&work->list, &bdi->work_list);
2023 -- spin_unlock_bh(&bdi->wb_lock);
2024 --
2025 - mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
2026 -+out_unlock:
2027 -+ spin_unlock_bh(&bdi->wb_lock);
2028 - }
2029 -
2030 - static void
2031 -@@ -114,7 +127,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
2032 - work = kzalloc(sizeof(*work), GFP_ATOMIC);
2033 - if (!work) {
2034 - trace_writeback_nowork(bdi);
2035 -- mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
2036 -+ bdi_wakeup_thread(bdi);
2037 - return;
2038 - }
2039 -
2040 -@@ -161,7 +174,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
2041 - * writeback as soon as there is no other work to do.
2042 - */
2043 - trace_writeback_wake_background(bdi);
2044 -- mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
2045 -+ bdi_wakeup_thread(bdi);
2046 - }
2047 -
2048 - /*
2049 -@@ -1017,7 +1030,7 @@ void bdi_writeback_workfn(struct work_struct *work)
2050 - current->flags |= PF_SWAPWRITE;
2051 -
2052 - if (likely(!current_is_workqueue_rescuer() ||
2053 -- list_empty(&bdi->bdi_list))) {
2054 -+ !test_bit(BDI_registered, &bdi->state))) {
2055 - /*
2056 - * The normal path. Keep writing back @bdi until its
2057 - * work_list is empty. Note that this path is also taken
2058 -@@ -1039,10 +1052,10 @@ void bdi_writeback_workfn(struct work_struct *work)
2059 - trace_writeback_pages_written(pages_written);
2060 - }
2061 -
2062 -- if (!list_empty(&bdi->work_list) ||
2063 -- (wb_has_dirty_io(wb) && dirty_writeback_interval))
2064 -- queue_delayed_work(bdi_wq, &wb->dwork,
2065 -- msecs_to_jiffies(dirty_writeback_interval * 10));
2066 -+ if (!list_empty(&bdi->work_list))
2067 -+ mod_delayed_work(bdi_wq, &wb->dwork, 0);
2068 -+ else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
2069 -+ bdi_wakeup_thread_delayed(bdi);
2070 -
2071 - current->flags &= ~PF_SWAPWRITE;
2072 - }
2073 -diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
2074 -index 16a5047903a6..406d9cc84ba8 100644
2075 ---- a/fs/jffs2/compr_rtime.c
2076 -+++ b/fs/jffs2/compr_rtime.c
2077 -@@ -33,7 +33,7 @@ static int jffs2_rtime_compress(unsigned char *data_in,
2078 - unsigned char *cpage_out,
2079 - uint32_t *sourcelen, uint32_t *dstlen)
2080 - {
2081 -- short positions[256];
2082 -+ unsigned short positions[256];
2083 - int outpos = 0;
2084 - int pos=0;
2085 -
2086 -@@ -74,7 +74,7 @@ static int jffs2_rtime_decompress(unsigned char *data_in,
2087 - unsigned char *cpage_out,
2088 - uint32_t srclen, uint32_t destlen)
2089 - {
2090 -- short positions[256];
2091 -+ unsigned short positions[256];
2092 - int outpos = 0;
2093 - int pos=0;
2094 -
2095 -diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
2096 -index e4619b00f7c5..fa35ff79ab35 100644
2097 ---- a/fs/jffs2/nodelist.h
2098 -+++ b/fs/jffs2/nodelist.h
2099 -@@ -231,7 +231,7 @@ struct jffs2_tmp_dnode_info
2100 - uint32_t version;
2101 - uint32_t data_crc;
2102 - uint32_t partial_crc;
2103 -- uint16_t csize;
2104 -+ uint32_t csize;
2105 - uint16_t overlapped;
2106 - };
2107 -
2108 -diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
2109 -index 03310721712f..b6bd4affd9ad 100644
2110 ---- a/fs/jffs2/nodemgmt.c
2111 -+++ b/fs/jffs2/nodemgmt.c
2112 -@@ -179,6 +179,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
2113 - spin_unlock(&c->erase_completion_lock);
2114 -
2115 - schedule();
2116 -+ remove_wait_queue(&c->erase_wait, &wait);
2117 - } else
2118 - spin_unlock(&c->erase_completion_lock);
2119 - } else if (ret)
2120 -@@ -211,20 +212,25 @@ out:
2121 - int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize,
2122 - uint32_t *len, uint32_t sumsize)
2123 - {
2124 -- int ret = -EAGAIN;
2125 -+ int ret;
2126 - minsize = PAD(minsize);
2127 -
2128 - jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize);
2129 -
2130 -- spin_lock(&c->erase_completion_lock);
2131 -- while(ret == -EAGAIN) {
2132 -+ while (true) {
2133 -+ spin_lock(&c->erase_completion_lock);
2134 - ret = jffs2_do_reserve_space(c, minsize, len, sumsize);
2135 - if (ret) {
2136 - jffs2_dbg(1, "%s(): looping, ret is %d\n",
2137 - __func__, ret);
2138 - }
2139 -+ spin_unlock(&c->erase_completion_lock);
2140 -+
2141 -+ if (ret == -EAGAIN)
2142 -+ cond_resched();
2143 -+ else
2144 -+ break;
2145 - }
2146 -- spin_unlock(&c->erase_completion_lock);
2147 - if (!ret)
2148 - ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1);
2149 -
2150 -diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
2151 -index bd6e18be6e1a..39c0143fb3af 100644
2152 ---- a/fs/kernfs/dir.c
2153 -+++ b/fs/kernfs/dir.c
2154 -@@ -37,7 +37,7 @@ static unsigned int kernfs_name_hash(const char *name, const void *ns)
2155 - hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
2156 - hash &= 0x7fffffffU;
2157 - /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
2158 -- if (hash < 1)
2159 -+ if (hash < 2)
2160 - hash += 2;
2161 - if (hash >= INT_MAX)
2162 - hash = INT_MAX - 1;
2163 -diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
2164 -index e55126f85bd2..553946c9d952 100644
2165 ---- a/fs/kernfs/inode.c
2166 -+++ b/fs/kernfs/inode.c
2167 -@@ -48,14 +48,18 @@ void __init kernfs_inode_init(void)
2168 -
2169 - static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn)
2170 - {
2171 -+ static DEFINE_MUTEX(iattr_mutex);
2172 -+ struct kernfs_iattrs *ret;
2173 - struct iattr *iattrs;
2174 -
2175 -+ mutex_lock(&iattr_mutex);
2176 -+
2177 - if (kn->iattr)
2178 -- return kn->iattr;
2179 -+ goto out_unlock;
2180 -
2181 - kn->iattr = kzalloc(sizeof(struct kernfs_iattrs), GFP_KERNEL);
2182 - if (!kn->iattr)
2183 -- return NULL;
2184 -+ goto out_unlock;
2185 - iattrs = &kn->iattr->ia_iattr;
2186 -
2187 - /* assign default attributes */
2188 -@@ -65,8 +69,10 @@ static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn)
2189 - iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
2190 -
2191 - simple_xattrs_init(&kn->iattr->xattrs);
2192 --
2193 -- return kn->iattr;
2194 -+out_unlock:
2195 -+ ret = kn->iattr;
2196 -+ mutex_unlock(&iattr_mutex);
2197 -+ return ret;
2198 - }
2199 -
2200 - static int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
2201 -diff --git a/fs/posix_acl.c b/fs/posix_acl.c
2202 -index 11c54fd51e16..9e363e41dacc 100644
2203 ---- a/fs/posix_acl.c
2204 -+++ b/fs/posix_acl.c
2205 -@@ -723,7 +723,7 @@ posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl,
2206 - void *buffer, size_t size)
2207 - {
2208 - posix_acl_xattr_header *ext_acl = (posix_acl_xattr_header *)buffer;
2209 -- posix_acl_xattr_entry *ext_entry = ext_acl->a_entries;
2210 -+ posix_acl_xattr_entry *ext_entry;
2211 - int real_size, n;
2212 -
2213 - real_size = posix_acl_xattr_size(acl->a_count);
2214 -@@ -731,7 +731,8 @@ posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl,
2215 - return real_size;
2216 - if (real_size > size)
2217 - return -ERANGE;
2218 --
2219 -+
2220 -+ ext_entry = ext_acl->a_entries;
2221 - ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
2222 -
2223 - for (n=0; n < acl->a_count; n++, ext_entry++) {
2224 -diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
2225 -index 796272a2e129..e69d57be866b 100644
2226 ---- a/fs/xfs/xfs_da_btree.c
2227 -+++ b/fs/xfs/xfs_da_btree.c
2228 -@@ -1295,7 +1295,7 @@ xfs_da3_fixhashpath(
2229 - node = blk->bp->b_addr;
2230 - dp->d_ops->node_hdr_from_disk(&nodehdr, node);
2231 - btree = dp->d_ops->node_tree_p(node);
2232 -- if (be32_to_cpu(btree->hashval) == lasthash)
2233 -+ if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
2234 - break;
2235 - blk->hashval = lasthash;
2236 - btree[blk->index].hashval = cpu_to_be32(lasthash);
2237 -diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
2238 -index 24819001f5c8..e488e9459a93 100644
2239 ---- a/include/linux/backing-dev.h
2240 -+++ b/include/linux/backing-dev.h
2241 -@@ -95,7 +95,7 @@ struct backing_dev_info {
2242 - unsigned int max_ratio, max_prop_frac;
2243 -
2244 - struct bdi_writeback wb; /* default writeback info for this bdi */
2245 -- spinlock_t wb_lock; /* protects work_list */
2246 -+ spinlock_t wb_lock; /* protects work_list & wb.dwork scheduling */
2247 -
2248 - struct list_head work_list;
2249 -
2250 -diff --git a/include/linux/tty.h b/include/linux/tty.h
2251 -index 90b4fdc8a61f..b90b5c221ff0 100644
2252 ---- a/include/linux/tty.h
2253 -+++ b/include/linux/tty.h
2254 -@@ -208,7 +208,7 @@ struct tty_port {
2255 - wait_queue_head_t delta_msr_wait; /* Modem status change */
2256 - unsigned long flags; /* TTY flags ASY_*/
2257 - unsigned char console:1, /* port is a console */
2258 -- low_latency:1; /* direct buffer flush */
2259 -+ low_latency:1; /* optional: tune for latency */
2260 - struct mutex mutex; /* Locking */
2261 - struct mutex buf_mutex; /* Buffer alloc lock */
2262 - unsigned char *xmit_buf; /* Optional buffer */
2263 -diff --git a/kernel/exit.c b/kernel/exit.c
2264 -index 1e77fc645317..81b3d6789ee8 100644
2265 ---- a/kernel/exit.c
2266 -+++ b/kernel/exit.c
2267 -@@ -560,9 +560,6 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,
2268 - struct list_head *dead)
2269 - {
2270 - list_move_tail(&p->sibling, &p->real_parent->children);
2271 --
2272 -- if (p->exit_state == EXIT_DEAD)
2273 -- return;
2274 - /*
2275 - * If this is a threaded reparent there is no need to
2276 - * notify anyone anything has happened.
2277 -@@ -570,9 +567,19 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,
2278 - if (same_thread_group(p->real_parent, father))
2279 - return;
2280 -
2281 -- /* We don't want people slaying init. */
2282 -+ /*
2283 -+ * We don't want people slaying init.
2284 -+ *
2285 -+ * Note: we do this even if it is EXIT_DEAD, wait_task_zombie()
2286 -+ * can change ->exit_state to EXIT_ZOMBIE. If this is the final
2287 -+ * state, do_notify_parent() was already called and ->exit_signal
2288 -+ * doesn't matter.
2289 -+ */
2290 - p->exit_signal = SIGCHLD;
2291 -
2292 -+ if (p->exit_state == EXIT_DEAD)
2293 -+ return;
2294 -+
2295 - /* If it has exited notify the new parent about this child's death. */
2296 - if (!p->ptrace &&
2297 - p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
2298 -@@ -784,6 +791,8 @@ void do_exit(long code)
2299 - exit_shm(tsk);
2300 - exit_files(tsk);
2301 - exit_fs(tsk);
2302 -+ if (group_dead)
2303 -+ disassociate_ctty(1);
2304 - exit_task_namespaces(tsk);
2305 - exit_task_work(tsk);
2306 - check_stack_usage();
2307 -@@ -799,13 +808,9 @@ void do_exit(long code)
2308 -
2309 - cgroup_exit(tsk, 1);
2310 -
2311 -- if (group_dead)
2312 -- disassociate_ctty(1);
2313 --
2314 - module_put(task_thread_info(tsk)->exec_domain->module);
2315 -
2316 - proc_exit_connector(tsk);
2317 --
2318 - /*
2319 - * FIXME: do that only when needed, using sched_exit tracepoint
2320 - */
2321 -diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
2322 -index 06c62de9c711..db95d8eb761b 100644
2323 ---- a/kernel/pid_namespace.c
2324 -+++ b/kernel/pid_namespace.c
2325 -@@ -318,7 +318,9 @@ static void *pidns_get(struct task_struct *task)
2326 - struct pid_namespace *ns;
2327 -
2328 - rcu_read_lock();
2329 -- ns = get_pid_ns(task_active_pid_ns(task));
2330 -+ ns = task_active_pid_ns(task);
2331 -+ if (ns)
2332 -+ get_pid_ns(ns);
2333 - rcu_read_unlock();
2334 -
2335 - return ns;
2336 -diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
2337 -index dd06439b9c84..80a57afd8647 100644
2338 ---- a/kernel/user_namespace.c
2339 -+++ b/kernel/user_namespace.c
2340 -@@ -152,7 +152,7 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
2341 -
2342 - /* Find the matching extent */
2343 - extents = map->nr_extents;
2344 -- smp_read_barrier_depends();
2345 -+ smp_rmb();
2346 - for (idx = 0; idx < extents; idx++) {
2347 - first = map->extent[idx].first;
2348 - last = first + map->extent[idx].count - 1;
2349 -@@ -176,7 +176,7 @@ static u32 map_id_down(struct uid_gid_map *map, u32 id)
2350 -
2351 - /* Find the matching extent */
2352 - extents = map->nr_extents;
2353 -- smp_read_barrier_depends();
2354 -+ smp_rmb();
2355 - for (idx = 0; idx < extents; idx++) {
2356 - first = map->extent[idx].first;
2357 - last = first + map->extent[idx].count - 1;
2358 -@@ -199,7 +199,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id)
2359 -
2360 - /* Find the matching extent */
2361 - extents = map->nr_extents;
2362 -- smp_read_barrier_depends();
2363 -+ smp_rmb();
2364 - for (idx = 0; idx < extents; idx++) {
2365 - first = map->extent[idx].lower_first;
2366 - last = first + map->extent[idx].count - 1;
2367 -@@ -615,9 +615,8 @@ static ssize_t map_write(struct file *file, const char __user *buf,
2368 - * were written before the count of the extents.
2369 - *
2370 - * To achieve this smp_wmb() is used on guarantee the write
2371 -- * order and smp_read_barrier_depends() is guaranteed that we
2372 -- * don't have crazy architectures returning stale data.
2373 -- *
2374 -+ * order and smp_rmb() is guaranteed that we don't have crazy
2375 -+ * architectures returning stale data.
2376 - */
2377 - mutex_lock(&id_map_mutex);
2378 -
2379 -diff --git a/mm/backing-dev.c b/mm/backing-dev.c
2380 -index ce682f7a4f29..09d9591b7708 100644
2381 ---- a/mm/backing-dev.c
2382 -+++ b/mm/backing-dev.c
2383 -@@ -288,13 +288,19 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
2384 - * Note, we wouldn't bother setting up the timer, but this function is on the
2385 - * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
2386 - * by delaying the wake-up.
2387 -+ *
2388 -+ * We have to be careful not to postpone flush work if it is scheduled for
2389 -+ * earlier. Thus we use queue_delayed_work().
2390 - */
2391 - void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
2392 - {
2393 - unsigned long timeout;
2394 -
2395 - timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
2396 -- mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);
2397 -+ spin_lock_bh(&bdi->wb_lock);
2398 -+ if (test_bit(BDI_registered, &bdi->state))
2399 -+ queue_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);
2400 -+ spin_unlock_bh(&bdi->wb_lock);
2401 - }
2402 -
2403 - /*
2404 -@@ -307,9 +313,6 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
2405 - spin_unlock_bh(&bdi_lock);
2406 -
2407 - synchronize_rcu_expedited();
2408 --
2409 -- /* bdi_list is now unused, clear it to mark @bdi dying */
2410 -- INIT_LIST_HEAD(&bdi->bdi_list);
2411 - }
2412 -
2413 - int bdi_register(struct backing_dev_info *bdi, struct device *parent,
2414 -@@ -360,6 +363,11 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
2415 - */
2416 - bdi_remove_from_list(bdi);
2417 -
2418 -+ /* Make sure nobody queues further work */
2419 -+ spin_lock_bh(&bdi->wb_lock);
2420 -+ clear_bit(BDI_registered, &bdi->state);
2421 -+ spin_unlock_bh(&bdi->wb_lock);
2422 -+
2423 - /*
2424 - * Drain work list and shutdown the delayed_work. At this point,
2425 - * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi
2426 -diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
2427 -index 5f812455a450..60828cf02eb8 100644
2428 ---- a/net/bluetooth/hci_event.c
2429 -+++ b/net/bluetooth/hci_event.c
2430 -@@ -3593,7 +3593,13 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
2431 -
2432 - hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp);
2433 -
2434 -- if (ltk->type & HCI_SMP_STK) {
2435 -+ /* Ref. Bluetooth Core SPEC pages 1975 and 2004. STK is a
2436 -+ * temporary key used to encrypt a connection following
2437 -+ * pairing. It is used during the Encrypted Session Setup to
2438 -+ * distribute the keys. Later, security can be re-established
2439 -+ * using a distributed LTK.
2440 -+ */
2441 -+ if (ltk->type == HCI_SMP_STK_SLAVE) {
2442 - list_del(&ltk->list);
2443 - kfree(ltk);
2444 - }
2445 -diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
2446 -index 0356e1d437ca..f79fa8be203c 100644
2447 ---- a/security/integrity/ima/ima.h
2448 -+++ b/security/integrity/ima/ima.h
2449 -@@ -27,7 +27,7 @@
2450 - #include "../integrity.h"
2451 -
2452 - enum ima_show_type { IMA_SHOW_BINARY, IMA_SHOW_BINARY_NO_FIELD_LEN,
2453 -- IMA_SHOW_ASCII };
2454 -+ IMA_SHOW_BINARY_OLD_STRING_FMT, IMA_SHOW_ASCII };
2455 - enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8 };
2456 -
2457 - /* digest size for IMA, fits SHA1 or MD5 */
2458 -diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
2459 -index db01125926bd..468a3ba3c539 100644
2460 ---- a/security/integrity/ima/ima_fs.c
2461 -+++ b/security/integrity/ima/ima_fs.c
2462 -@@ -160,6 +160,8 @@ static int ima_measurements_show(struct seq_file *m, void *v)
2463 -
2464 - if (is_ima_template && strcmp(field->field_id, "d") == 0)
2465 - show = IMA_SHOW_BINARY_NO_FIELD_LEN;
2466 -+ if (is_ima_template && strcmp(field->field_id, "n") == 0)
2467 -+ show = IMA_SHOW_BINARY_OLD_STRING_FMT;
2468 - field->field_show(m, show, &e->template_data[i]);
2469 - }
2470 - return 0;
2471 -diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
2472 -index 1683bbf289a4..e8592e7bfc21 100644
2473 ---- a/security/integrity/ima/ima_template_lib.c
2474 -+++ b/security/integrity/ima/ima_template_lib.c
2475 -@@ -109,13 +109,16 @@ static void ima_show_template_data_binary(struct seq_file *m,
2476 - enum data_formats datafmt,
2477 - struct ima_field_data *field_data)
2478 - {
2479 -+ u32 len = (show == IMA_SHOW_BINARY_OLD_STRING_FMT) ?
2480 -+ strlen(field_data->data) : field_data->len;
2481 -+
2482 - if (show != IMA_SHOW_BINARY_NO_FIELD_LEN)
2483 -- ima_putc(m, &field_data->len, sizeof(u32));
2484 -+ ima_putc(m, &len, sizeof(len));
2485 -
2486 -- if (!field_data->len)
2487 -+ if (!len)
2488 - return;
2489 -
2490 -- ima_putc(m, field_data->data, field_data->len);
2491 -+ ima_putc(m, field_data->data, len);
2492 - }
2493 -
2494 - static void ima_show_template_field_data(struct seq_file *m,
2495 -@@ -129,6 +132,7 @@ static void ima_show_template_field_data(struct seq_file *m,
2496 - break;
2497 - case IMA_SHOW_BINARY:
2498 - case IMA_SHOW_BINARY_NO_FIELD_LEN:
2499 -+ case IMA_SHOW_BINARY_OLD_STRING_FMT:
2500 - ima_show_template_data_binary(m, show, datafmt, field_data);
2501 - break;
2502 - default:
2503
2504 Deleted: genpatches-2.6/trunk/3.15/4200_fbcondecor-0.9.6.patch
2505 ===================================================================
2506 --- genpatches-2.6/trunk/3.15/4200_fbcondecor-0.9.6.patch 2014-05-05 14:22:27 UTC (rev 2771)
2507 +++ genpatches-2.6/trunk/3.15/4200_fbcondecor-0.9.6.patch 2014-05-05 14:28:48 UTC (rev 2772)
2508 @@ -1,2177 +0,0 @@
2509 -diff --git a/Documentation/fb/00-INDEX b/Documentation/fb/00-INDEX
2510 -index 30a7054..9b6a733 100644
2511 ---- a/Documentation/fb/00-INDEX
2512 -+++ b/Documentation/fb/00-INDEX
2513 -@@ -21,6 +21,8 @@ ep93xx-fb.txt
2514 - - info on the driver for EP93xx LCD controller.
2515 - fbcon.txt
2516 - - intro to and usage guide for the framebuffer console (fbcon).
2517 -+fbcondecor.txt
2518 -+ - info on the Framebuffer Console Decoration
2519 - framebuffer.txt
2520 - - introduction to frame buffer devices.
2521 - gxfb.txt
2522 -diff --git a/Documentation/fb/fbcondecor.txt b/Documentation/fb/fbcondecor.txt
2523 -new file mode 100644
2524 -index 0000000..15889f3
2525 ---- /dev/null
2526 -+++ b/Documentation/fb/fbcondecor.txt
2527 -@@ -0,0 +1,207 @@
2528 -+What is it?
2529 -+-----------
2530 -+
2531 -+The framebuffer decorations are a kernel feature which allows displaying a
2532 -+background picture on selected consoles.
2533 -+
2534 -+What do I need to get it to work?
2535 -+---------------------------------
2536 -+
2537 -+To get fbcondecor up-and-running you will have to:
2538 -+ 1) get a copy of splashutils [1] or a similar program
2539 -+ 2) get some fbcondecor themes
2540 -+ 3) build the kernel helper program
2541 -+ 4) build your kernel with the FB_CON_DECOR option enabled.
2542 -+
2543 -+To get fbcondecor operational right after fbcon initialization is finished, you
2544 -+will have to include a theme and the kernel helper into your initramfs image.
2545 -+Please refer to splashutils documentation for instructions on how to do that.
2546 -+
2547 -+[1] The splashutils package can be downloaded from:
2548 -+ http://dev.gentoo.org/~spock/projects/splashutils/
2549 -+
2550 -+The userspace helper
2551 -+--------------------
2552 -+
2553 -+The userspace fbcondecor helper (by default: /sbin/fbcondecor_helper) is called by the
2554 -+kernel whenever an important event occurs and the kernel needs some kind of
2555 -+job to be carried out. Important events include console switches and video
2556 -+mode switches (the kernel requests background images and configuration
2557 -+parameters for the current console). The fbcondecor helper must be accessible at
2558 -+all times. If it's not, fbcondecor will be switched off automatically.
2559 -+
2560 -+It's possible to set path to the fbcondecor helper by writing it to
2561 -+/proc/sys/kernel/fbcondecor.
2562 -+
2563 -+*****************************************************************************
2564 -+
2565 -+The information below is mostly technical stuff. There's probably no need to
2566 -+read it unless you plan to develop a userspace helper.
2567 -+
2568 -+The fbcondecor protocol
2569 -+-----------------------
2570 -+
2571 -+The fbcondecor protocol defines a communication interface between the kernel and
2572 -+the userspace fbcondecor helper.
2573 -+
2574 -+The kernel side is responsible for:
2575 -+
2576 -+ * rendering console text, using an image as a background (instead of a
2577 -+ standard solid color fbcon uses),
2578 -+ * accepting commands from the user via ioctls on the fbcondecor device,
2579 -+ * calling the userspace helper to set things up as soon as the fb subsystem
2580 -+ is initialized.
2581 -+
2582 -+The userspace helper is responsible for everything else, including parsing
2583 -+configuration files, decompressing the image files whenever the kernel needs
2584 -+it, and communicating with the kernel if necessary.
2585 -+
2586 -+The fbcondecor protocol specifies how communication is done in both ways:
2587 -+kernel->userspace and userspace->helper.
2588 -+
2589 -+Kernel -> Userspace
2590 -+-------------------
2591 -+
2592 -+The kernel communicates with the userspace helper by calling it and specifying
2593 -+the task to be done in a series of arguments.
2594 -+
2595 -+The arguments follow the pattern:
2596 -+<fbcondecor protocol version> <command> <parameters>
2597 -+
2598 -+All commands defined in fbcondecor protocol v2 have the following parameters:
2599 -+ virtual console
2600 -+ framebuffer number
2601 -+ theme
2602 -+
2603 -+Fbcondecor protocol v1 specified an additional 'fbcondecor mode' after the
2604 -+framebuffer number. Fbcondecor protocol v1 is deprecated and should not be used.
2605 -+
2606 -+Fbcondecor protocol v2 specifies the following commands:
2607 -+
2608 -+getpic
2609 -+------
2610 -+ The kernel issues this command to request image data. It's up to the
2611 -+ userspace helper to find a background image appropriate for the specified
2612 -+ theme and the current resolution. The userspace helper should respond by
2613 -+ issuing the FBIOCONDECOR_SETPIC ioctl.
2614 -+
2615 -+init
2616 -+----
2617 -+ The kernel issues this command after the fbcondecor device is created and
2618 -+ the fbcondecor interface is initialized. Upon receiving 'init', the userspace
2619 -+ helper should parse the kernel command line (/proc/cmdline) or otherwise
2620 -+ decide whether fbcondecor is to be activated.
2621 -+
2622 -+ To activate fbcondecor on the first console the helper should issue the
2623 -+ FBIOCONDECOR_SETCFG, FBIOCONDECOR_SETPIC and FBIOCONDECOR_SETSTATE commands,
2624 -+ in the above-mentioned order.
2625 -+
2626 -+ When the userspace helper is called in an early phase of the boot process
2627 -+ (right after the initialization of fbcon), no filesystems will be mounted.
2628 -+ The helper program should mount sysfs and then create the appropriate
2629 -+ framebuffer, fbcondecor and tty0 devices (if they don't already exist) to get
2630 -+ current display settings and to be able to communicate with the kernel side.
2631 -+ It should probably also mount the procfs to be able to parse the kernel
2632 -+ command line parameters.
2633 -+
2634 -+ Note that the console sem is not held when the kernel calls fbcondecor_helper
2635 -+ with the 'init' command. The fbcondecor helper should perform all ioctls with
2636 -+ origin set to FBCON_DECOR_IO_ORIG_USER.
2637 -+
2638 -+modechange
2639 -+----------
2640 -+ The kernel issues this command on a mode change. The helper's response should
2641 -+ be similar to the response to the 'init' command. Note that this time the
2642 -+ console sem is held and all ioctls must be performed with origin set to
2643 -+ FBCON_DECOR_IO_ORIG_KERNEL.
2644 -+
2645 -+
2646 -+Userspace -> Kernel
2647 -+-------------------
2648 -+
2649 -+Userspace programs can communicate with fbcondecor via ioctls on the
2650 -+fbcondecor device. These ioctls are to be used by both the userspace helper
2651 -+(called only by the kernel) and userspace configuration tools (run by the users).
2652 -+
2653 -+The fbcondecor helper should set the origin field to FBCON_DECOR_IO_ORIG_KERNEL
2654 -+when doing the appropriate ioctls. All userspace configuration tools should
2655 -+use FBCON_DECOR_IO_ORIG_USER. Failure to set the appropriate value in the origin
2656 -+field when performing ioctls from the kernel helper will most likely result
2657 -+in a console deadlock.
2658 -+
2659 -+FBCON_DECOR_IO_ORIG_KERNEL instructs fbcondecor not to try to acquire the console
2660 -+semaphore. Not surprisingly, FBCON_DECOR_IO_ORIG_USER instructs it to acquire
2661 -+the console sem.
2662 -+
2663 -+The framebuffer console decoration provides the following ioctls (all defined in
2664 -+linux/fb.h):
2665 -+
2666 -+FBIOCONDECOR_SETPIC
2667 -+description: loads a background picture for a virtual console
2668 -+argument: struct fbcon_decor_iowrapper*; data: struct fb_image*
2669 -+notes:
2670 -+If called for consoles other than the current foreground one, the picture data
2671 -+will be ignored.
2672 -+
2673 -+If the current virtual console is running in a 8-bpp mode, the cmap substruct
2674 -+of fb_image has to be filled appropriately: start should be set to 16 (first
2675 -+16 colors are reserved for fbcon), len to a value <= 240 and red, green and
2676 -+blue should point to valid cmap data. The transp field is ingored. The fields
2677 -+dx, dy, bg_color, fg_color in fb_image are ignored as well.
2678 -+
2679 -+FBIOCONDECOR_SETCFG
2680 -+description: sets the fbcondecor config for a virtual console
2681 -+argument: struct fbcon_decor_iowrapper*; data: struct vc_decor*
2682 -+notes: The structure has to be filled with valid data.
2683 -+
2684 -+FBIOCONDECOR_GETCFG
2685 -+description: gets the fbcondecor config for a virtual console
2686 -+argument: struct fbcon_decor_iowrapper*; data: struct vc_decor*
2687 -+
2688 -+FBIOCONDECOR_SETSTATE
2689 -+description: sets the fbcondecor state for a virtual console
2690 -+argument: struct fbcon_decor_iowrapper*; data: unsigned int*
2691 -+ values: 0 = disabled, 1 = enabled.
2692 -+
2693 -+FBIOCONDECOR_GETSTATE
2694 -+description: gets the fbcondecor state for a virtual console
2695 -+argument: struct fbcon_decor_iowrapper*; data: unsigned int*
2696 -+ values: as in FBIOCONDECOR_SETSTATE
2697 -+
2698 -+Info on used structures:
2699 -+
2700 -+Definition of struct vc_decor can be found in linux/console_decor.h. It's
2701 -+heavily commented. Note that the 'theme' field should point to a string
2702 -+no longer than FBCON_DECOR_THEME_LEN. When FBIOCONDECOR_GETCFG call is
2703 -+performed, the theme field should point to a char buffer of length
2704 -+FBCON_DECOR_THEME_LEN.
2705 -+
2706 -+Definition of struct fbcon_decor_iowrapper can be found in linux/fb.h.
2707 -+The fields in this struct have the following meaning:
2708 -+
2709 -+vc:
2710 -+Virtual console number.
2711 -+
2712 -+origin:
2713 -+Specifies if the ioctl is performed as a response to a kernel request. The
2714 -+fbcondecor helper should set this field to FBCON_DECOR_IO_ORIG_KERNEL, userspace
2715 -+programs should set it to FBCON_DECOR_IO_ORIG_USER. This field is necessary to
2716 -+avoid console semaphore deadlocks.
2717 -+
2718 -+data:
2719 -+Pointer to a data structure appropriate for the performed ioctl. Type of
2720 -+the data struct is specified in the ioctls description.
2721 -+
2722 -+*****************************************************************************
2723 -+
2724 -+Credit
2725 -+------
2726 -+
2727 -+Original 'bootsplash' project & implementation by:
2728 -+ Volker Poplawski <volker@×××××××××.de>, Stefan Reinauer <stepan@××××.de>,
2729 -+ Steffen Winterfeldt <snwint@××××.de>, Michael Schroeder <mls@××××.de>,
2730 -+ Ken Wimer <wimer@××××.de>.
2731 -+
2732 -+Fbcondecor, fbcondecor protocol design, current implementation & docs by:
2733 -+ Michal Januszewski <spock@g.o>
2734 -+
2735 -diff --git a/drivers/Makefile b/drivers/Makefile
2736 -index 95952c8..b55db6d 100644
2737 ---- a/drivers/Makefile
2738 -+++ b/drivers/Makefile
2739 -@@ -16,4 +16,8 @@ obj-$(CONFIG_PCI) += pci/
2740 - obj-$(CONFIG_PARISC) += parisc/
2741 - obj-$(CONFIG_RAPIDIO) += rapidio/
2742 -+# tty/ comes before char/ so that the VT console is the boot-time
2743 -+# default.
2744 -+obj-y += tty/
2745 -+obj-y += char/
2746 - obj-y += video/
2747 - obj-y += idle/
2748 -@@ -37,11 +41,6 @@ obj-$(CONFIG_XEN) += xen/
2749 - # regulators early, since some subsystems rely on them to initialize
2750 - obj-$(CONFIG_REGULATOR) += regulator/
2751 -
2752 --# tty/ comes before char/ so that the VT console is the boot-time
2753 --# default.
2754 --obj-y += tty/
2755 --obj-y += char/
2756 --
2757 - # gpu/ comes after char for AGP vs DRM startup
2758 - obj-y += gpu/
2759 -
2760 -diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
2761 -index a290be5..3a4ca32 100644
2762 ---- a/drivers/video/Kconfig
2763 -+++ b/drivers/video/Kconfig
2764 -@@ -1229,7 +1229,6 @@ config FB_MATROX
2765 - select FB_CFB_FILLRECT
2766 - select FB_CFB_COPYAREA
2767 - select FB_CFB_IMAGEBLIT
2768 -- select FB_TILEBLITTING
2769 - select FB_MACMODES if PPC_PMAC
2770 - ---help---
2771 - Say Y here if you have a Matrox Millennium, Matrox Millennium II,
2772 -diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
2773 -index c2d11fe..1be9de4 100644
2774 ---- a/drivers/video/console/Kconfig
2775 -+++ b/drivers/video/console/Kconfig
2776 -@@ -120,6 +120,19 @@ config FRAMEBUFFER_CONSOLE_ROTATION
2777 - such that other users of the framebuffer will remain normally
2778 - oriented.
2779 -
2780 -+config FB_CON_DECOR
2781 -+ bool "Support for the Framebuffer Console Decorations"
2782 -+ depends on FRAMEBUFFER_CONSOLE=y && !FB_TILEBLITTING
2783 -+ default n
2784 -+ ---help---
2785 -+ This option enables support for framebuffer console decorations which
2786 -+ makes it possible to display images in the background of the system
2787 -+ consoles. Note that userspace utilities are necessary in order to take
2788 -+ advantage of these features. Refer to Documentation/fb/fbcondecor.txt
2789 -+ for more information.
2790 -+
2791 -+ If unsure, say N.
2792 -+
2793 - config STI_CONSOLE
2794 - bool "STI text console"
2795 - depends on PARISC
2796 ---- a/drivers/video/console/Makefile 2013-08-26 14:02:39.905817618 -0400
2797 -+++ b/drivers/video/console/Makefile 2013-08-26 14:05:06.258848595 -0400
2798 -@@ -16,4 +16,5 @@ obj-$(CONFIG_FRAMEBUFFER_CONSOLE) +=
2799 - fbcon_ccw.o
2800 - endif
2801 -
2802 -+obj-$(CONFIG_FB_CON_DECOR) += fbcondecor.o cfbcondecor.o
2803 - obj-$(CONFIG_FB_STI) += sticore.o
2804 -diff --git a/drivers/video/console/bitblit.c b/drivers/video/console/bitblit.c
2805 -index 28b1a83..33712c0 100644
2806 ---- a/drivers/video/console/bitblit.c
2807 -+++ b/drivers/video/console/bitblit.c
2808 -@@ -18,6 +18,7 @@
2809 - #include <linux/console.h>
2810 - #include <asm/types.h>
2811 - #include "fbcon.h"
2812 -+#include "fbcondecor.h"
2813 -
2814 - /*
2815 - * Accelerated handlers.
2816 -@@ -55,6 +56,13 @@ static void bit_bmove(struct vc_data *vc, struct fb_info *info, int sy,
2817 - area.height = height * vc->vc_font.height;
2818 - area.width = width * vc->vc_font.width;
2819 -
2820 -+ if (fbcon_decor_active(info, vc)) {
2821 -+ area.sx += vc->vc_decor.tx;
2822 -+ area.sy += vc->vc_decor.ty;
2823 -+ area.dx += vc->vc_decor.tx;
2824 -+ area.dy += vc->vc_decor.ty;
2825 -+ }
2826 -+
2827 - info->fbops->fb_copyarea(info, &area);
2828 - }
2829 -
2830 -@@ -380,11 +388,15 @@ static void bit_cursor(struct vc_data *vc, struct fb_info *info, int mode,
2831 - cursor.image.depth = 1;
2832 - cursor.rop = ROP_XOR;
2833 -
2834 -- if (info->fbops->fb_cursor)
2835 -- err = info->fbops->fb_cursor(info, &cursor);
2836 -+ if (fbcon_decor_active(info, vc)) {
2837 -+ fbcon_decor_cursor(info, &cursor);
2838 -+ } else {
2839 -+ if (info->fbops->fb_cursor)
2840 -+ err = info->fbops->fb_cursor(info, &cursor);
2841 -
2842 -- if (err)
2843 -- soft_cursor(info, &cursor);
2844 -+ if (err)
2845 -+ soft_cursor(info, &cursor);
2846 -+ }
2847 -
2848 - ops->cursor_reset = 0;
2849 - }
2850 -diff --git a/drivers/video/console/cfbcondecor.c b/drivers/video/console/cfbcondecor.c
2851 -new file mode 100644
2852 -index 0000000..09381d3
2853 ---- /dev/null
2854 -+++ b/drivers/video/console/cfbcondecor.c
2855 -@@ -0,0 +1,471 @@
2856 -+/*
2857 -+ * linux/drivers/video/cfbcon_decor.c -- Framebuffer decor render functions
2858 -+ *
2859 -+ * Copyright (C) 2004 Michal Januszewski <spock@g.o>
2860 -+ *
2861 -+ * Code based upon "Bootdecor" (C) 2001-2003
2862 -+ * Volker Poplawski <volker@×××××××××.de>,
2863 -+ * Stefan Reinauer <stepan@××××.de>,
2864 -+ * Steffen Winterfeldt <snwint@××××.de>,
2865 -+ * Michael Schroeder <mls@××××.de>,
2866 -+ * Ken Wimer <wimer@××××.de>.
2867 -+ *
2868 -+ * This file is subject to the terms and conditions of the GNU General Public
2869 -+ * License. See the file COPYING in the main directory of this archive for
2870 -+ * more details.
2871 -+ */
2872 -+#include <linux/module.h>
2873 -+#include <linux/types.h>
2874 -+#include <linux/fb.h>
2875 -+#include <linux/selection.h>
2876 -+#include <linux/slab.h>
2877 -+#include <linux/vt_kern.h>
2878 -+#include <asm/irq.h>
2879 -+
2880 -+#include "fbcon.h"
2881 -+#include "fbcondecor.h"
2882 -+
2883 -+#define parse_pixel(shift,bpp,type) \
2884 -+ do { \
2885 -+ if (d & (0x80 >> (shift))) \
2886 -+ dd2[(shift)] = fgx; \
2887 -+ else \
2888 -+ dd2[(shift)] = transparent ? *(type *)decor_src : bgx; \
2889 -+ decor_src += (bpp); \
2890 -+ } while (0) \
2891 -+
2892 -+extern int get_color(struct vc_data *vc, struct fb_info *info,
2893 -+ u16 c, int is_fg);
2894 -+
2895 -+void fbcon_decor_fix_pseudo_pal(struct fb_info *info, struct vc_data *vc)
2896 -+{
2897 -+ int i, j, k;
2898 -+ int minlen = min(min(info->var.red.length, info->var.green.length),
2899 -+ info->var.blue.length);
2900 -+ u32 col;
2901 -+
2902 -+ for (j = i = 0; i < 16; i++) {
2903 -+ k = color_table[i];
2904 -+
2905 -+ col = ((vc->vc_palette[j++] >> (8-minlen))
2906 -+ << info->var.red.offset);
2907 -+ col |= ((vc->vc_palette[j++] >> (8-minlen))
2908 -+ << info->var.green.offset);
2909 -+ col |= ((vc->vc_palette[j++] >> (8-minlen))
2910 -+ << info->var.blue.offset);
2911 -+ ((u32 *)info->pseudo_palette)[k] = col;
2912 -+ }
2913 -+}
2914 -+
2915 -+void fbcon_decor_renderc(struct fb_info *info, int ypos, int xpos, int height,
2916 -+ int width, u8* src, u32 fgx, u32 bgx, u8 transparent)
2917 -+{
2918 -+ unsigned int x, y;
2919 -+ u32 dd;
2920 -+ int bytespp = ((info->var.bits_per_pixel + 7) >> 3);
2921 -+ unsigned int d = ypos * info->fix.line_length + xpos * bytespp;
2922 -+ unsigned int ds = (ypos * info->var.xres + xpos) * bytespp;
2923 -+ u16 dd2[4];
2924 -+
2925 -+ u8* decor_src = (u8 *)(info->bgdecor.data + ds);
2926 -+ u8* dst = (u8 *)(info->screen_base + d);
2927 -+
2928 -+ if ((ypos + height) > info->var.yres || (xpos + width) > info->var.xres)
2929 -+ return;
2930 -+
2931 -+ for (y = 0; y < height; y++) {
2932 -+ switch (info->var.bits_per_pixel) {
2933 -+
2934 -+ case 32:
2935 -+ for (x = 0; x < width; x++) {
2936 -+
2937 -+ if ((x & 7) == 0)
2938 -+ d = *src++;
2939 -+ if (d & 0x80)
2940 -+ dd = fgx;
2941 -+ else
2942 -+ dd = transparent ?
2943 -+ *(u32 *)decor_src : bgx;
2944 -+
2945 -+ d <<= 1;
2946 -+ decor_src += 4;
2947 -+ fb_writel(dd, dst);
2948 -+ dst += 4;
2949 -+ }
2950 -+ break;
2951 -+ case 24:
2952 -+ for (x = 0; x < width; x++) {
2953 -+
2954 -+ if ((x & 7) == 0)
2955 -+ d = *src++;
2956 -+ if (d & 0x80)
2957 -+ dd = fgx;
2958 -+ else
2959 -+ dd = transparent ?
2960 -+ (*(u32 *)decor_src & 0xffffff) : bgx;
2961 -+
2962 -+ d <<= 1;
2963 -+ decor_src += 3;
2964 -+#ifdef __LITTLE_ENDIAN
2965 -+ fb_writew(dd & 0xffff, dst);
2966 -+ dst += 2;
2967 -+ fb_writeb((dd >> 16), dst);
2968 -+#else
2969 -+ fb_writew(dd >> 8, dst);
2970 -+ dst += 2;
2971 -+ fb_writeb(dd & 0xff, dst);
2972 -+#endif
2973 -+ dst++;
2974 -+ }
2975 -+ break;
2976 -+ case 16:
2977 -+ for (x = 0; x < width; x += 2) {
2978 -+ if ((x & 7) == 0)
2979 -+ d = *src++;
2980 -+
2981 -+ parse_pixel(0, 2, u16);
2982 -+ parse_pixel(1, 2, u16);
2983 -+#ifdef __LITTLE_ENDIAN
2984 -+ dd = dd2[0] | (dd2[1] << 16);
2985 -+#else
2986 -+ dd = dd2[1] | (dd2[0] << 16);
2987 -+#endif
2988 -+ d <<= 2;
2989 -+ fb_writel(dd, dst);
2990 -+ dst += 4;
2991 -+ }
2992 -+ break;
2993 -+
2994 -+ case 8:
2995 -+ for (x = 0; x < width; x += 4) {
2996 -+ if ((x & 7) == 0)
2997 -+ d = *src++;
2998 -+
2999 -+ parse_pixel(0, 1, u8);
3000 -+ parse_pixel(1, 1, u8);
3001 -+ parse_pixel(2, 1, u8);
3002 -+ parse_pixel(3, 1, u8);
3003 -+
3004 -+#ifdef __LITTLE_ENDIAN
3005 -+ dd = dd2[0] | (dd2[1] << 8) | (dd2[2] << 16) | (dd2[3] << 24);
3006 -+#else
3007 -+ dd = dd2[3] | (dd2[2] << 8) | (dd2[1] << 16) | (dd2[0] << 24);
3008 -+#endif
3009 -+ d <<= 4;
3010 -+ fb_writel(dd, dst);
3011 -+ dst += 4;
3012 -+ }
3013 -+ }
3014 -+
3015 -+ dst += info->fix.line_length - width * bytespp;
3016 -+ decor_src += (info->var.xres - width) * bytespp;
3017 -+ }
3018 -+}
3019 -+
3020 -+#define cc2cx(a) \
3021 -+ ((info->fix.visual == FB_VISUAL_TRUECOLOR || \
3022 -+ info->fix.visual == FB_VISUAL_DIRECTCOLOR) ? \
3023 -+ ((u32*)info->pseudo_palette)[a] : a)
3024 -+
3025 -+void fbcon_decor_putcs(struct vc_data *vc, struct fb_info *info,
3026 -+ const unsigned short *s, int count, int yy, int xx)
3027 -+{
3028 -+ unsigned short charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff;
3029 -+ struct fbcon_ops *ops = info->fbcon_par;
3030 -+ int fg_color, bg_color, transparent;
3031 -+ u8 *src;
3032 -+ u32 bgx, fgx;
3033 -+ u16 c = scr_readw(s);
3034 -+
3035 -+ fg_color = get_color(vc, info, c, 1);
3036 -+ bg_color = get_color(vc, info, c, 0);
3037 -+
3038 -+ /* Don't paint the background image if console is blanked */
3039 -+ transparent = ops->blank_state ? 0 :
3040 -+ (vc->vc_decor.bg_color == bg_color);
3041 -+
3042 -+ xx = xx * vc->vc_font.width + vc->vc_decor.tx;
3043 -+ yy = yy * vc->vc_font.height + vc->vc_decor.ty;
3044 -+
3045 -+ fgx = cc2cx(fg_color);
3046 -+ bgx = cc2cx(bg_color);
3047 -+
3048 -+ while (count--) {
3049 -+ c = scr_readw(s++);
3050 -+ src = vc->vc_font.data + (c & charmask) * vc->vc_font.height *
3051 -+ ((vc->vc_font.width + 7) >> 3);
3052 -+
3053 -+ fbcon_decor_renderc(info, yy, xx, vc->vc_font.height,
3054 -+ vc->vc_font.width, src, fgx, bgx, transparent);
3055 -+ xx += vc->vc_font.width;
3056 -+ }
3057 -+}
3058 -+
3059 -+void fbcon_decor_cursor(struct fb_info *info, struct fb_cursor *cursor)
3060 -+{
3061 -+ int i;
3062 -+ unsigned int dsize, s_pitch;
3063 -+ struct fbcon_ops *ops = info->fbcon_par;
3064 -+ struct vc_data* vc;
3065 -+ u8 *src;
3066 -+
3067 -+ /* we really don't need any cursors while the console is blanked */
3068 -+ if (info->state != FBINFO_STATE_RUNNING || ops->blank_state)
3069 -+ return;
3070 -+
3071 -+ vc = vc_cons[ops->currcon].d;
3072 -+
3073 -+ src = kmalloc(64 + sizeof(struct fb_image), GFP_ATOMIC);
3074 -+ if (!src)
3075 -+ return;
3076 -+
3077 -+ s_pitch = (cursor->image.width + 7) >> 3;
3078 -+ dsize = s_pitch * cursor->image.height;
3079 -+ if (cursor->enable) {
3080 -+ switch (cursor->rop) {
3081 -+ case ROP_XOR:
3082 -+ for (i = 0; i < dsize; i++)
3083 -+ src[i] = cursor->image.data[i] ^ cursor->mask[i];
3084 -+ break;
3085 -+ case ROP_COPY:
3086 -+ default:
3087 -+ for (i = 0; i < dsize; i++)
3088 -+ src[i] = cursor->image.data[i] & cursor->mask[i];
3089 -+ break;
3090 -+ }
3091 -+ } else
3092 -+ memcpy(src, cursor->image.data, dsize);
3093 -+
3094 -+ fbcon_decor_renderc(info,
3095 -+ cursor->image.dy + vc->vc_decor.ty,
3096 -+ cursor->image.dx + vc->vc_decor.tx,
3097 -+ cursor->image.height,
3098 -+ cursor->image.width,
3099 -+ (u8*)src,
3100 -+ cc2cx(cursor->image.fg_color),
3101 -+ cc2cx(cursor->image.bg_color),
3102 -+ cursor->image.bg_color == vc->vc_decor.bg_color);
3103 -+
3104 -+ kfree(src);
3105 -+}
3106 -+
3107 -+static void decorset(u8 *dst, int height, int width, int dstbytes,
3108 -+ u32 bgx, int bpp)
3109 -+{
3110 -+ int i;
3111 -+
3112 -+ if (bpp == 8)
3113 -+ bgx |= bgx << 8;
3114 -+ if (bpp == 16 || bpp == 8)
3115 -+ bgx |= bgx << 16;
3116 -+
3117 -+ while (height-- > 0) {
3118 -+ u8 *p = dst;
3119 -+
3120 -+ switch (bpp) {
3121 -+
3122 -+ case 32:
3123 -+ for (i=0; i < width; i++) {
3124 -+ fb_writel(bgx, p); p += 4;
3125 -+ }
3126 -+ break;
3127 -+ case 24:
3128 -+ for (i=0; i < width; i++) {
3129 -+#ifdef __LITTLE_ENDIAN
3130 -+ fb_writew((bgx & 0xffff),(u16*)p); p += 2;
3131 -+ fb_writeb((bgx >> 16),p++);
3132 -+#else
3133 -+ fb_writew((bgx >> 8),(u16*)p); p += 2;
3134 -+ fb_writeb((bgx & 0xff),p++);
3135 -+#endif
3136 -+ }
3137 -+ case 16:
3138 -+ for (i=0; i < width/4; i++) {
3139 -+ fb_writel(bgx,p); p += 4;
3140 -+ fb_writel(bgx,p); p += 4;
3141 -+ }
3142 -+ if (width & 2) {
3143 -+ fb_writel(bgx,p); p += 4;
3144 -+ }
3145 -+ if (width & 1)
3146 -+ fb_writew(bgx,(u16*)p);
3147 -+ break;
3148 -+ case 8:
3149 -+ for (i=0; i < width/4; i++) {
3150 -+ fb_writel(bgx,p); p += 4;
3151 -+ }
3152 -+
3153 -+ if (width & 2) {
3154 -+ fb_writew(bgx,p); p += 2;
3155 -+ }
3156 -+ if (width & 1)
3157 -+ fb_writeb(bgx,(u8*)p);
3158 -+ break;
3159 -+
3160 -+ }
3161 -+ dst += dstbytes;
3162 -+ }
3163 -+}
3164 -+
3165 -+void fbcon_decor_copy(u8 *dst, u8 *src, int height, int width, int linebytes,
3166 -+ int srclinebytes, int bpp)
3167 -+{
3168 -+ int i;
3169 -+
3170 -+ while (height-- > 0) {
3171 -+ u32 *p = (u32 *)dst;
3172 -+ u32 *q = (u32 *)src;
3173 -+
3174 -+ switch (bpp) {
3175 -+
3176 -+ case 32:
3177 -+ for (i=0; i < width; i++)
3178 -+ fb_writel(*q++, p++);
3179 -+ break;
3180 -+ case 24:
3181 -+ for (i=0; i < (width*3/4); i++)
3182 -+ fb_writel(*q++, p++);
3183 -+ if ((width*3) % 4) {
3184 -+ if (width & 2) {
3185 -+ fb_writeb(*(u8*)q, (u8*)p);
3186 -+ } else if (width & 1) {
3187 -+ fb_writew(*(u16*)q, (u16*)p);
3188 -+ fb_writeb(*(u8*)((u16*)q+1),(u8*)((u16*)p+2));
3189 -+ }
3190 -+ }
3191 -+ break;
3192 -+ case 16:
3193 -+ for (i=0; i < width/4; i++) {
3194 -+ fb_writel(*q++, p++);
3195 -+ fb_writel(*q++, p++);
3196 -+ }
3197 -+ if (width & 2)
3198 -+ fb_writel(*q++, p++);
3199 -+ if (width & 1)
3200 -+ fb_writew(*(u16*)q, (u16*)p);
3201 -+ break;
3202 -+ case 8:
3203 -+ for (i=0; i < width/4; i++)
3204 -+ fb_writel(*q++, p++);
3205 -+
3206 -+ if (width & 2) {
3207 -+ fb_writew(*(u16*)q, (u16*)p);
3208 -+ q = (u32*) ((u16*)q + 1);
3209 -+ p = (u32*) ((u16*)p + 1);
3210 -+ }
3211 -+ if (width & 1)
3212 -+ fb_writeb(*(u8*)q, (u8*)p);
3213 -+ break;
3214 -+ }
3215 -+
3216 -+ dst += linebytes;
3217 -+ src += srclinebytes;
3218 -+ }
3219 -+}
3220 -+
3221 -+static void decorfill(struct fb_info *info, int sy, int sx, int height,
3222 -+ int width)
3223 -+{
3224 -+ int bytespp = ((info->var.bits_per_pixel + 7) >> 3);
3225 -+ int d = sy * info->fix.line_length + sx * bytespp;
3226 -+ int ds = (sy * info->var.xres + sx) * bytespp;
3227 -+
3228 -+ fbcon_decor_copy((u8 *)(info->screen_base + d), (u8 *)(info->bgdecor.data + ds),
3229 -+ height, width, info->fix.line_length, info->var.xres * bytespp,
3230 -+ info->var.bits_per_pixel);
3231 -+}
3232 -+
3233 -+void fbcon_decor_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx,
3234 -+ int height, int width)
3235 -+{
3236 -+ int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
3237 -+ struct fbcon_ops *ops = info->fbcon_par;
3238 -+ u8 *dst;
3239 -+ int transparent, bg_color = attr_bgcol_ec(bgshift, vc, info);
3240 -+
3241 -+ transparent = (vc->vc_decor.bg_color == bg_color);
3242 -+ sy = sy * vc->vc_font.height + vc->vc_decor.ty;
3243 -+ sx = sx * vc->vc_font.width + vc->vc_decor.tx;
3244 -+ height *= vc->vc_font.height;
3245 -+ width *= vc->vc_font.width;
3246 -+
3247 -+ /* Don't paint the background image if console is blanked */
3248 -+ if (transparent && !ops->blank_state) {
3249 -+ decorfill(info, sy, sx, height, width);
3250 -+ } else {
3251 -+ dst = (u8 *)(info->screen_base + sy * info->fix.line_length +
3252 -+ sx * ((info->var.bits_per_pixel + 7) >> 3));
3253 -+ decorset(dst, height, width, info->fix.line_length, cc2cx(bg_color),
3254 -+ info->var.bits_per_pixel);
3255 -+ }
3256 -+}
3257 -+
3258 -+void fbcon_decor_clear_margins(struct vc_data *vc, struct fb_info *info,
3259 -+ int bottom_only)
3260 -+{
3261 -+ unsigned int tw = vc->vc_cols*vc->vc_font.width;
3262 -+ unsigned int th = vc->vc_rows*vc->vc_font.height;
3263 -+
3264 -+ if (!bottom_only) {
3265 -+ /* top margin */
3266 -+ decorfill(info, 0, 0, vc->vc_decor.ty, info->var.xres);
3267 -+ /* left margin */
3268 -+ decorfill(info, vc->vc_decor.ty, 0, th, vc->vc_decor.tx);
3269 -+ /* right margin */
3270 -+ decorfill(info, vc->vc_decor.ty, vc->vc_decor.tx + tw, th,
3271 -+ info->var.xres - vc->vc_decor.tx - tw);
3272 -+ }
3273 -+ decorfill(info, vc->vc_decor.ty + th, 0,
3274 -+ info->var.yres - vc->vc_decor.ty - th, info->var.xres);
3275 -+}
3276 -+
3277 -+void fbcon_decor_bmove_redraw(struct vc_data *vc, struct fb_info *info, int y,
3278 -+ int sx, int dx, int width)
3279 -+{
3280 -+ u16 *d = (u16 *) (vc->vc_origin + vc->vc_size_row * y + dx * 2);
3281 -+ u16 *s = d + (dx - sx);
3282 -+ u16 *start = d;
3283 -+ u16 *ls = d;
3284 -+ u16 *le = d + width;
3285 -+ u16 c;
3286 -+ int x = dx;
3287 -+ u16 attr = 1;
3288 -+
3289 -+ do {
3290 -+ c = scr_readw(d);
3291 -+ if (attr != (c & 0xff00)) {
3292 -+ attr = c & 0xff00;
3293 -+ if (d > start) {
3294 -+ fbcon_decor_putcs(vc, info, start, d - start, y, x);
3295 -+ x += d - start;
3296 -+ start = d;
3297 -+ }
3298 -+ }
3299 -+ if (s >= ls && s < le && c == scr_readw(s)) {
3300 -+ if (d > start) {
3301 -+ fbcon_decor_putcs(vc, info, start, d - start, y, x);
3302 -+ x += d - start + 1;
3303 -+ start = d + 1;
3304 -+ } else {
3305 -+ x++;
3306 -+ start++;
3307 -+ }
3308 -+ }
3309 -+ s++;
3310 -+ d++;
3311 -+ } while (d < le);
3312 -+ if (d > start)
3313 -+ fbcon_decor_putcs(vc, info, start, d - start, y, x);
3314 -+}
3315 -+
3316 -+void fbcon_decor_blank(struct vc_data *vc, struct fb_info *info, int blank)
3317 -+{
3318 -+ if (blank) {
3319 -+ decorset((u8 *)info->screen_base, info->var.yres, info->var.xres,
3320 -+ info->fix.line_length, 0, info->var.bits_per_pixel);
3321 -+ } else {
3322 -+ update_screen(vc);
3323 -+ fbcon_decor_clear_margins(vc, info, 0);
3324 -+ }
3325 -+}
3326 -+
3327 -From ea6ca92753106f1e0773acd1f18c71ae79a6f9b0 Mon Sep 17 00:00:00 2001
3328 -From: Mike Pagano <mpagano@g.o>
3329 -Date: Tue, 27 Aug 2013 07:58:05 -0400
3330 -Subject: [PATCH] gbcondecor port
3331 -
3332 ----
3333 - drivers/video/console/fbcon.c | 167 ++++++++++++++++++++++++++++++++++++------
3334 - 1 file changed, 143 insertions(+), 24 deletions(-)
3335 -
3336 -diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
3337 -index cd8a802..666556c 100644
3338 ---- a/drivers/video/console/fbcon.c
3339 -+++ b/drivers/video/console/fbcon.c
3340 -@@ -79,6 +79,7 @@
3341 - #include <asm/irq.h>
3342 -
3343 - #include "fbcon.h"
3344 -+#include "fbcondecor.h"
3345 -
3346 - #ifdef FBCONDEBUG
3347 - # define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt, __func__ , ## args)
3348 -@@ -94,7 +95,7 @@ enum {
3349 -
3350 - static struct display fb_display[MAX_NR_CONSOLES];
3351 -
3352 --static signed char con2fb_map[MAX_NR_CONSOLES];
3353 -+signed char con2fb_map[MAX_NR_CONSOLES];
3354 - static signed char con2fb_map_boot[MAX_NR_CONSOLES];
3355 -
3356 - static int logo_lines;
3357 -@@ -286,7 +287,7 @@ static inline int fbcon_is_inactive(struct vc_data *vc, struct fb_info *info)
3358 - !vt_force_oops_output(vc);
3359 - }
3360 -
3361 --static int get_color(struct vc_data *vc, struct fb_info *info,
3362 -+int get_color(struct vc_data *vc, struct fb_info *info,
3363 - u16 c, int is_fg)
3364 - {
3365 - int depth = fb_get_color_depth(&info->var, &info->fix);
3366 -@@ -551,6 +552,9 @@ static int do_fbcon_takeover(int show_logo)
3367 - info_idx = -1;
3368 - } else {
3369 - fbcon_has_console_bind = 1;
3370 -+#ifdef CONFIG_FB_CON_DECOR
3371 -+ fbcon_decor_init();
3372 -+#endif
3373 - }
3374 -
3375 - return err;
3376 -@@ -1007,6 +1011,12 @@ static const char *fbcon_startup(void)
3377 - rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
3378 - cols /= vc->vc_font.width;
3379 - rows /= vc->vc_font.height;
3380 -+
3381 -+ if (fbcon_decor_active(info, vc)) {
3382 -+ cols = vc->vc_decor.twidth / vc->vc_font.width;
3383 -+ rows = vc->vc_decor.theight / vc->vc_font.height;
3384 -+ }
3385 -+
3386 - vc_resize(vc, cols, rows);
3387 -
3388 - DPRINTK("mode: %s\n", info->fix.id);
3389 -@@ -1036,7 +1046,7 @@ static void fbcon_init(struct vc_data *vc, int init)
3390 - cap = info->flags;
3391 -
3392 - if (vc != svc || logo_shown == FBCON_LOGO_DONTSHOW ||
3393 -- (info->fix.type == FB_TYPE_TEXT))
3394 -+ (info->fix.type == FB_TYPE_TEXT) || fbcon_decor_active(info, vc))
3395 - logo = 0;
3396 -
3397 - if (var_to_display(p, &info->var, info))
3398 -@@ -1260,6 +1270,11 @@ static void fbcon_clear(struct vc_data *vc, int sy, int sx, int height,
3399 - fbcon_clear_margins(vc, 0);
3400 - }
3401 -
3402 -+ if (fbcon_decor_active(info, vc)) {
3403 -+ fbcon_decor_clear(vc, info, sy, sx, height, width);
3404 -+ return;
3405 -+ }
3406 -+
3407 - /* Split blits that cross physical y_wrap boundary */
3408 -
3409 - y_break = p->vrows - p->yscroll;
3410 -@@ -1279,10 +1294,15 @@ static void fbcon_putcs(struct vc_data *vc, const unsigned short *s,
3411 - struct display *p = &fb_display[vc->vc_num];
3412 - struct fbcon_ops *ops = info->fbcon_par;
3413 -
3414 -- if (!fbcon_is_inactive(vc, info))
3415 -- ops->putcs(vc, info, s, count, real_y(p, ypos), xpos,
3416 -- get_color(vc, info, scr_readw(s), 1),
3417 -- get_color(vc, info, scr_readw(s), 0));
3418 -+ if (!fbcon_is_inactive(vc, info)) {
3419 -+
3420 -+ if (fbcon_decor_active(info, vc))
3421 -+ fbcon_decor_putcs(vc, info, s, count, ypos, xpos);
3422 -+ else
3423 -+ ops->putcs(vc, info, s, count, real_y(p, ypos), xpos,
3424 -+ get_color(vc, info, scr_readw(s), 1),
3425 -+ get_color(vc, info, scr_readw(s), 0));
3426 -+ }
3427 - }
3428 -
3429 - static void fbcon_putc(struct vc_data *vc, int c, int ypos, int xpos)
3430 -@@ -1297,9 +1317,6 @@ static void fbcon_clear_margins(struct vc_data *vc, int bottom_only)
3431 - {
3432 - struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
3433 - struct fbcon_ops *ops = info->fbcon_par;
3434 --
3435 -- if (!fbcon_is_inactive(vc, info))
3436 -- ops->clear_margins(vc, info, bottom_only);
3437 - }
3438 -
3439 - static void fbcon_cursor(struct vc_data *vc, int mode)
3440 -@@ -1819,7 +1836,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir,
3441 - count = vc->vc_rows;
3442 - if (softback_top)
3443 - fbcon_softback_note(vc, t, count);
3444 -- if (logo_shown >= 0)
3445 -+ if (logo_shown >= 0 || fbcon_decor_active(info, vc))
3446 - goto redraw_up;
3447 - switch (p->scrollmode) {
3448 - case SCROLL_MOVE:
3449 -@@ -1912,6 +1929,8 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir,
3450 - count = vc->vc_rows;
3451 - if (logo_shown >= 0)
3452 - goto redraw_down;
3453 -+ if (fbcon_decor_active(info, vc))
3454 -+ goto redraw_down;
3455 - switch (p->scrollmode) {
3456 - case SCROLL_MOVE:
3457 - fbcon_redraw_blit(vc, info, p, b - 1, b - t - count,
3458 -@@ -2060,6 +2079,13 @@ static void fbcon_bmove_rec(struct vc_data *vc, struct display *p, int sy, int s
3459 - }
3460 - return;
3461 - }
3462 -+
3463 -+ if (fbcon_decor_active(info, vc) && sy == dy && height == 1) {
3464 -+ /* must use slower redraw bmove to keep background pic intact */
3465 -+ fbcon_decor_bmove_redraw(vc, info, sy, sx, dx, width);
3466 -+ return;
3467 -+ }
3468 -+
3469 - ops->bmove(vc, info, real_y(p, sy), sx, real_y(p, dy), dx,
3470 - height, width);
3471 - }
3472 -@@ -2130,8 +2156,8 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width,
3473 - var.yres = virt_h * virt_fh;
3474 - x_diff = info->var.xres - var.xres;
3475 - y_diff = info->var.yres - var.yres;
3476 -- if (x_diff < 0 || x_diff > virt_fw ||
3477 -- y_diff < 0 || y_diff > virt_fh) {
3478 -+ if ((x_diff < 0 || x_diff > virt_fw ||
3479 -+ y_diff < 0 || y_diff > virt_fh) && !vc->vc_decor.state) {
3480 - const struct fb_videomode *mode;
3481 -
3482 - DPRINTK("attempting resize %ix%i\n", var.xres, var.yres);
3483 -@@ -2168,6 +2194,22 @@ static int fbcon_switch(struct vc_data *vc)
3484 - info = registered_fb[con2fb_map[vc->vc_num]];
3485 - ops = info->fbcon_par;
3486 -
3487 -+ prev_console = ops->currcon;
3488 -+ if (prev_console != -1)
3489 -+ old_info = registered_fb[con2fb_map[prev_console]];
3490 -+
3491 -+#ifdef CONFIG_FB_CON_DECOR
3492 -+ if (!fbcon_decor_active_vc(vc) && info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
3493 -+ struct vc_data *vc_curr = vc_cons[prev_console].d;
3494 -+ if (vc_curr && fbcon_decor_active_vc(vc_curr)) {
3495 -+ /* Clear the screen to avoid displaying funky colors during
3496 -+ * * palette updates. */
3497 -+ memset((u8*)info->screen_base + info->fix.line_length * info->var.yoffset,
3498 -+ 0, info->var.yres * info->fix.line_length);
3499 -+ }
3500 -+ }
3501 -+#endif
3502 -+
3503 - if (softback_top) {
3504 - if (softback_lines)
3505 - fbcon_set_origin(vc);
3506 -@@ -2185,9 +2227,6 @@ static int fbcon_switch(struct vc_data *vc)
3507 - logo_shown = FBCON_LOGO_CANSHOW;
3508 - }
3509 -
3510 -- prev_console = ops->currcon;
3511 -- if (prev_console != -1)
3512 -- old_info = registered_fb[con2fb_map[prev_console]];
3513 - /*
3514 - * FIXME: If we have multiple fbdev's loaded, we need to
3515 - * update all info->currcon. Perhaps, we can place this
3516 -@@ -2231,6 +2270,18 @@ static int fbcon_switch(struct vc_data *vc)
3517 - fbcon_del_cursor_timer(old_info);
3518 - }
3519 -
3520 -+ if (fbcon_decor_active_vc(vc)) {
3521 -+ struct vc_data *vc_curr = vc_cons[prev_console].d;
3522 -+
3523 -+ if (!vc_curr->vc_decor.theme ||
3524 -+ strcmp(vc->vc_decor.theme, vc_curr->vc_decor.theme) ||
3525 -+ (fbcon_decor_active_nores(info, vc_curr) &&
3526 -+ !fbcon_decor_active(info, vc_curr))) {
3527 -+ fbcon_decor_disable(vc, 0);
3528 -+ fbcon_decor_call_helper("modechange", vc->vc_num);
3529 -+ }
3530 -+ }
3531 -+
3532 - if (fbcon_is_inactive(vc, info) ||
3533 - ops->blank_state != FB_BLANK_UNBLANK)
3534 - fbcon_del_cursor_timer(info);
3535 -@@ -2344,10 +2395,14 @@ static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch)
3536 - ops->blank_state = blank;
3537 - fbcon_cursor(vc, blank ? CM_ERASE : CM_DRAW);
3538 - ops->cursor_flash = (!blank);
3539 --
3540 -- if (!(info->flags & FBINFO_MISC_USEREVENT))
3541 -- if (fb_blank(info, blank))
3542 -- fbcon_generic_blank(vc, info, blank);
3543 -+ if (!(info->flags & FBINFO_MISC_USEREVENT)) {
3544 -+ if (fb_blank(info, blank)) {
3545 -+ if (fbcon_decor_active(info, vc))
3546 -+ fbcon_decor_blank(vc, info, blank);
3547 -+ else
3548 -+ fbcon_generic_blank(vc, info, blank);
3549 -+ }
3550 -+ }
3551 - }
3552 -
3553 - if (!blank)
3554 -@@ -2522,10 +2577,18 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h,
3555 - }
3556 -
3557 - if (resize) {
3558 -+ /* reset wrap/pan */
3559 - int cols, rows;
3560 -
3561 - cols = FBCON_SWAP(ops->rotate, info->var.xres, info->var.yres);
3562 - rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
3563 -+
3564 -+ if (fbcon_decor_active(info, vc)) {
3565 -+ info->var.xoffset = info->var.yoffset = p->yscroll = 0;
3566 -+ cols = vc->vc_decor.twidth;
3567 -+ rows = vc->vc_decor.theight;
3568 -+ }
3569 -+
3570 - cols /= w;
3571 - rows /= h;
3572 - vc_resize(vc, cols, rows);
3573 -@@ -2657,7 +2720,11 @@ static int fbcon_set_palette(struct vc_data *vc, unsigned char *table)
3574 - int i, j, k, depth;
3575 - u8 val;
3576 -
3577 -- if (fbcon_is_inactive(vc, info))
3578 -+ if (fbcon_is_inactive(vc, info)
3579 -+#ifdef CONFIG_FB_CON_DECOR
3580 -+ || vc->vc_num != fg_console
3581 -+#endif
3582 -+ )
3583 - return -EINVAL;
3584 -
3585 - if (!CON_IS_VISIBLE(vc))
3586 -@@ -2683,7 +2750,49 @@ static int fbcon_set_palette(struct vc_data *vc, unsigned char *table)
3587 - } else
3588 - fb_copy_cmap(fb_default_cmap(1 << depth), &palette_cmap);
3589 -
3590 -- return fb_set_cmap(&palette_cmap, info);
3591 -+ if (fbcon_decor_active(info, vc_cons[fg_console].d) &&
3592 -+ info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
3593 -+
3594 -+ u16 *red, *green, *blue;
3595 -+ int minlen = min(min(info->var.red.length, info->var.green.length),
3596 -+ info->var.blue.length);
3597 -+ int h;
3598 -+
3599 -+ struct fb_cmap cmap = {
3600 -+ .start = 0,
3601 -+ .len = (1 << minlen),
3602 -+ .red = NULL,
3603 -+ .green = NULL,
3604 -+ .blue = NULL,
3605 -+ .transp = NULL
3606 -+ };
3607 -+
3608 -+ red = kmalloc(256 * sizeof(u16) * 3, GFP_KERNEL);
3609 -+
3610 -+ if (!red)
3611 -+ goto out;
3612 -+
3613 -+ green = red + 256;
3614 -+ blue = green + 256;
3615 -+ cmap.red = red;
3616 -+ cmap.green = green;
3617 -+ cmap.blue = blue;
3618 -+
3619 -+ for (i = 0; i < cmap.len; i++) {
3620 -+ red[i] = green[i] = blue[i] = (0xffff * i)/(cmap.len-1);
3621 -+ }
3622 -+
3623 -+ h = fb_set_cmap(&cmap, info);
3624 -+ fbcon_decor_fix_pseudo_pal(info, vc_cons[fg_console].d);
3625 -+ kfree(red);
3626 -+
3627 -+ return h;
3628 -+
3629 -+ } else if (fbcon_decor_active(info, vc_cons[fg_console].d) &&
3630 -+ info->var.bits_per_pixel == 8 && info->bgdecor.cmap.red != NULL)
3631 -+ fb_set_cmap(&info->bgdecor.cmap, info);
3632 -+
3633 -+out: return fb_set_cmap(&palette_cmap, info);
3634 - }
3635 -
3636 - static u16 *fbcon_screen_pos(struct vc_data *vc, int offset)
3637 -@@ -2909,7 +3018,13 @@ static void fbcon_modechanged(struct fb_info *info)
3638 - rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
3639 - cols /= vc->vc_font.width;
3640 - rows /= vc->vc_font.height;
3641 -- vc_resize(vc, cols, rows);
3642 -+ if (!fbcon_decor_active_nores(info, vc)) {
3643 -+ vc_resize(vc, cols, rows);
3644 -+ } else {
3645 -+ fbcon_decor_disable(vc, 0);
3646 -+ fbcon_decor_call_helper("modechange", vc->vc_num);
3647 -+ }
3648 -+
3649 - updatescrollmode(p, info, vc);
3650 - scrollback_max = 0;
3651 - scrollback_current = 0;
3652 -@@ -2954,7 +3069,10 @@ static void fbcon_set_all_vcs(struct fb_info *info)
3653 - rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
3654 - cols /= vc->vc_font.width;
3655 - rows /= vc->vc_font.height;
3656 -- vc_resize(vc, cols, rows);
3657 -+ if (!fbcon_decor_active_nores(info, vc)) {
3658 -+ vc_resize(vc, cols, rows);
3659 -+ }
3660 -+
3661 - }
3662 -
3663 - if (fg != -1)
3664 -@@ -3570,6 +3688,7 @@ static void fbcon_exit(void)
3665 - }
3666 - }
3667 -
3668 -+ fbcon_decor_exit();
3669 - fbcon_has_exited = 1;
3670 - }
3671 -
3672 ---
3673 -1.8.1.5
3674 -
3675 -diff --git a/drivers/video/console/fbcondecor.c b/drivers/video/console/fbcondecor.c
3676 -new file mode 100644
3677 -index 0000000..7189ce6
3678 ---- /dev/null
3679 -+++ b/drivers/video/console/fbcondecor.c
3680 -@@ -0,0 +1,555 @@
3681 -+/*
3682 -+ * linux/drivers/video/console/fbcondecor.c -- Framebuffer console decorations
3683 -+ *
3684 -+ * Copyright (C) 2004-2009 Michal Januszewski <spock@g.o>
3685 -+ *
3686 -+ * Code based upon "Bootsplash" (C) 2001-2003
3687 -+ * Volker Poplawski <volker@×××××××××.de>,
3688 -+ * Stefan Reinauer <stepan@××××.de>,
3689 -+ * Steffen Winterfeldt <snwint@××××.de>,
3690 -+ * Michael Schroeder <mls@××××.de>,
3691 -+ * Ken Wimer <wimer@××××.de>.
3692 -+ *
3693 -+ * Compat ioctl support by Thorsten Klein <TK@××××××××××××××.de>.
3694 -+ *
3695 -+ * This file is subject to the terms and conditions of the GNU General Public
3696 -+ * License. See the file COPYING in the main directory of this archive for
3697 -+ * more details.
3698 -+ *
3699 -+ */
3700 -+#include <linux/module.h>
3701 -+#include <linux/kernel.h>
3702 -+#include <linux/string.h>
3703 -+#include <linux/types.h>
3704 -+#include <linux/fb.h>
3705 -+#include <linux/vt_kern.h>
3706 -+#include <linux/vmalloc.h>
3707 -+#include <linux/unistd.h>
3708 -+#include <linux/syscalls.h>
3709 -+#include <linux/init.h>
3710 -+#include <linux/proc_fs.h>
3711 -+#include <linux/workqueue.h>
3712 -+#include <linux/kmod.h>
3713 -+#include <linux/miscdevice.h>
3714 -+#include <linux/device.h>
3715 -+#include <linux/fs.h>
3716 -+#include <linux/compat.h>
3717 -+#include <linux/console.h>
3718 -+
3719 -+#include <asm/uaccess.h>
3720 -+#include <asm/irq.h>
3721 -+
3722 -+#include "fbcon.h"
3723 -+#include "fbcondecor.h"
3724 -+
3725 -+extern signed char con2fb_map[];
3726 -+static int fbcon_decor_enable(struct vc_data *vc);
3727 -+char fbcon_decor_path[KMOD_PATH_LEN] = "/sbin/fbcondecor_helper";
3728 -+static int initialized = 0;
3729 -+
3730 -+int fbcon_decor_call_helper(char* cmd, unsigned short vc)
3731 -+{
3732 -+ char *envp[] = {
3733 -+ "HOME=/",
3734 -+ "PATH=/sbin:/bin",
3735 -+ NULL
3736 -+ };
3737 -+
3738 -+ char tfb[5];
3739 -+ char tcons[5];
3740 -+ unsigned char fb = (int) con2fb_map[vc];
3741 -+
3742 -+ char *argv[] = {
3743 -+ fbcon_decor_path,
3744 -+ "2",
3745 -+ cmd,
3746 -+ tcons,
3747 -+ tfb,
3748 -+ vc_cons[vc].d->vc_decor.theme,
3749 -+ NULL
3750 -+ };
3751 -+
3752 -+ snprintf(tfb,5,"%d",fb);
3753 -+ snprintf(tcons,5,"%d",vc);
3754 -+
3755 -+ return call_usermodehelper(fbcon_decor_path, argv, envp, UMH_WAIT_EXEC);
3756 -+}
3757 -+
3758 -+/* Disables fbcondecor on a virtual console; called with console sem held. */
3759 -+int fbcon_decor_disable(struct vc_data *vc, unsigned char redraw)
3760 -+{
3761 -+ struct fb_info* info;
3762 -+
3763 -+ if (!vc->vc_decor.state)
3764 -+ return -EINVAL;
3765 -+
3766 -+ info = registered_fb[(int) con2fb_map[vc->vc_num]];
3767 -+
3768 -+ if (info == NULL)
3769 -+ return -EINVAL;
3770 -+
3771 -+ vc->vc_decor.state = 0;
3772 -+ vc_resize(vc, info->var.xres / vc->vc_font.width,
3773 -+ info->var.yres / vc->vc_font.height);
3774 -+
3775 -+ if (fg_console == vc->vc_num && redraw) {
3776 -+ redraw_screen(vc, 0);
3777 -+ update_region(vc, vc->vc_origin +
3778 -+ vc->vc_size_row * vc->vc_top,
3779 -+ vc->vc_size_row * (vc->vc_bottom - vc->vc_top) / 2);
3780 -+ }
3781 -+
3782 -+ printk(KERN_INFO "fbcondecor: switched decor state to 'off' on console %d\n",
3783 -+ vc->vc_num);
3784 -+
3785 -+ return 0;
3786 -+}
3787 -+
3788 -+/* Enables fbcondecor on a virtual console; called with console sem held. */
3789 -+static int fbcon_decor_enable(struct vc_data *vc)
3790 -+{
3791 -+ struct fb_info* info;
3792 -+
3793 -+ info = registered_fb[(int) con2fb_map[vc->vc_num]];
3794 -+
3795 -+ if (vc->vc_decor.twidth == 0 || vc->vc_decor.theight == 0 ||
3796 -+ info == NULL || vc->vc_decor.state || (!info->bgdecor.data &&
3797 -+ vc->vc_num == fg_console))
3798 -+ return -EINVAL;
3799 -+
3800 -+ vc->vc_decor.state = 1;
3801 -+ vc_resize(vc, vc->vc_decor.twidth / vc->vc_font.width,
3802 -+ vc->vc_decor.theight / vc->vc_font.height);
3803 -+
3804 -+ if (fg_console == vc->vc_num) {
3805 -+ redraw_screen(vc, 0);
3806 -+ update_region(vc, vc->vc_origin +
3807 -+ vc->vc_size_row * vc->vc_top,
3808 -+ vc->vc_size_row * (vc->vc_bottom - vc->vc_top) / 2);
3809 -+ fbcon_decor_clear_margins(vc, info, 0);
3810 -+ }
3811 -+
3812 -+ printk(KERN_INFO "fbcondecor: switched decor state to 'on' on console %d\n",
3813 -+ vc->vc_num);
3814 -+
3815 -+ return 0;
3816 -+}
3817 -+
3818 -+static inline int fbcon_decor_ioctl_dosetstate(struct vc_data *vc, unsigned int state, unsigned char origin)
3819 -+{
3820 -+ int ret;
3821 -+
3822 -+// if (origin == FBCON_DECOR_IO_ORIG_USER)
3823 -+ console_lock();
3824 -+ if (!state)
3825 -+ ret = fbcon_decor_disable(vc, 1);
3826 -+ else
3827 -+ ret = fbcon_decor_enable(vc);
3828 -+// if (origin == FBCON_DECOR_IO_ORIG_USER)
3829 -+ console_unlock();
3830 -+
3831 -+ return ret;
3832 -+}
3833 -+
3834 -+static inline void fbcon_decor_ioctl_dogetstate(struct vc_data *vc, unsigned int *state)
3835 -+{
3836 -+ *state = vc->vc_decor.state;
3837 -+}
3838 -+
3839 -+static int fbcon_decor_ioctl_dosetcfg(struct vc_data *vc, struct vc_decor *cfg, unsigned char origin)
3840 -+{
3841 -+ struct fb_info *info;
3842 -+ int len;
3843 -+ char *tmp;
3844 -+
3845 -+ info = registered_fb[(int) con2fb_map[vc->vc_num]];
3846 -+
3847 -+ if (info == NULL || !cfg->twidth || !cfg->theight ||
3848 -+ cfg->tx + cfg->twidth > info->var.xres ||
3849 -+ cfg->ty + cfg->theight > info->var.yres)
3850 -+ return -EINVAL;
3851 -+
3852 -+ len = strlen_user(cfg->theme);
3853 -+ if (!len || len > FBCON_DECOR_THEME_LEN)
3854 -+ return -EINVAL;
3855 -+ tmp = kmalloc(len, GFP_KERNEL);
3856 -+ if (!tmp)
3857 -+ return -ENOMEM;
3858 -+ if (copy_from_user(tmp, (void __user *)cfg->theme, len))
3859 -+ return -EFAULT;
3860 -+ cfg->theme = tmp;
3861 -+ cfg->state = 0;
3862 -+
3863 -+ /* If this ioctl is a response to a request from kernel, the console sem
3864 -+ * is already held; we also don't need to disable decor because either the
3865 -+ * new config and background picture will be successfully loaded, and the
3866 -+ * decor will stay on, or in case of a failure it'll be turned off in fbcon. */
3867 -+// if (origin == FBCON_DECOR_IO_ORIG_USER) {
3868 -+ console_lock();
3869 -+ if (vc->vc_decor.state)
3870 -+ fbcon_decor_disable(vc, 1);
3871 -+// }
3872 -+
3873 -+ if (vc->vc_decor.theme)
3874 -+ kfree(vc->vc_decor.theme);
3875 -+
3876 -+ vc->vc_decor = *cfg;
3877 -+
3878 -+// if (origin == FBCON_DECOR_IO_ORIG_USER)
3879 -+ console_unlock();
3880 -+
3881 -+ printk(KERN_INFO "fbcondecor: console %d using theme '%s'\n",
3882 -+ vc->vc_num, vc->vc_decor.theme);
3883 -+ return 0;
3884 -+}
3885 -+
3886 -+static int fbcon_decor_ioctl_dogetcfg(struct vc_data *vc, struct vc_decor *decor)
3887 -+{
3888 -+ char __user *tmp;
3889 -+
3890 -+ tmp = decor->theme;
3891 -+ *decor = vc->vc_decor;
3892 -+ decor->theme = tmp;
3893 -+
3894 -+ if (vc->vc_decor.theme) {
3895 -+ if (copy_to_user(tmp, vc->vc_decor.theme, strlen(vc->vc_decor.theme) + 1))
3896 -+ return -EFAULT;
3897 -+ } else
3898 -+ if (put_user(0, tmp))
3899 -+ return -EFAULT;
3900 -+
3901 -+ return 0;
3902 -+}
3903 -+
3904 -+static int fbcon_decor_ioctl_dosetpic(struct vc_data *vc, struct fb_image *img, unsigned char origin)
3905 -+{
3906 -+ struct fb_info *info;
3907 -+ int len;
3908 -+ u8 *tmp;
3909 -+
3910 -+ if (vc->vc_num != fg_console)
3911 -+ return -EINVAL;
3912 -+
3913 -+ info = registered_fb[(int) con2fb_map[vc->vc_num]];
3914 -+
3915 -+ if (info == NULL)
3916 -+ return -EINVAL;
3917 -+
3918 -+ if (img->width != info->var.xres || img->height != info->var.yres) {
3919 -+ printk(KERN_ERR "fbcondecor: picture dimensions mismatch\n");
3920 -+ printk(KERN_ERR "%dx%d vs %dx%d\n", img->width, img->height, info->var.xres, info->var.yres);
3921 -+ return -EINVAL;
3922 -+ }
3923 -+
3924 -+ if (img->depth != info->var.bits_per_pixel) {
3925 -+ printk(KERN_ERR "fbcondecor: picture depth mismatch\n");
3926 -+ return -EINVAL;
3927 -+ }
3928 -+
3929 -+ if (img->depth == 8) {
3930 -+ if (!img->cmap.len || !img->cmap.red || !img->cmap.green ||
3931 -+ !img->cmap.blue)
3932 -+ return -EINVAL;
3933 -+
3934 -+ tmp = vmalloc(img->cmap.len * 3 * 2);
3935 -+ if (!tmp)
3936 -+ return -ENOMEM;
3937 -+
3938 -+ if (copy_from_user(tmp,
3939 -+ (void __user*)img->cmap.red, (img->cmap.len << 1)) ||
3940 -+ copy_from_user(tmp + (img->cmap.len << 1),
3941 -+ (void __user*)img->cmap.green, (img->cmap.len << 1)) ||
3942 -+ copy_from_user(tmp + (img->cmap.len << 2),
3943 -+ (void __user*)img->cmap.blue, (img->cmap.len << 1))) {
3944 -+ vfree(tmp);
3945 -+ return -EFAULT;
3946 -+ }
3947 -+
3948 -+ img->cmap.transp = NULL;
3949 -+ img->cmap.red = (u16*)tmp;
3950 -+ img->cmap.green = img->cmap.red + img->cmap.len;
3951 -+ img->cmap.blue = img->cmap.green + img->cmap.len;
3952 -+ } else {
3953 -+ img->cmap.red = NULL;
3954 -+ }
3955 -+
3956 -+ len = ((img->depth + 7) >> 3) * img->width * img->height;
3957 -+
3958 -+ /*
3959 -+ * Allocate an additional byte so that we never go outside of the
3960 -+ * buffer boundaries in the rendering functions in a 24 bpp mode.
3961 -+ */
3962 -+ tmp = vmalloc(len + 1);
3963 -+
3964 -+ if (!tmp)
3965 -+ goto out;
3966 -+
3967 -+ if (copy_from_user(tmp, (void __user*)img->data, len))
3968 -+ goto out;
3969 -+
3970 -+ img->data = tmp;
3971 -+
3972 -+ /* If this ioctl is a response to a request from kernel, the console sem
3973 -+ * is already held. */
3974 -+// if (origin == FBCON_DECOR_IO_ORIG_USER)
3975 -+ console_lock();
3976 -+
3977 -+ if (info->bgdecor.data)
3978 -+ vfree((u8*)info->bgdecor.data);
3979 -+ if (info->bgdecor.cmap.red)
3980 -+ vfree(info->bgdecor.cmap.red);
3981 -+
3982 -+ info->bgdecor = *img;
3983 -+
3984 -+ if (fbcon_decor_active_vc(vc) && fg_console == vc->vc_num) {
3985 -+ redraw_screen(vc, 0);
3986 -+ update_region(vc, vc->vc_origin +
3987 -+ vc->vc_size_row * vc->vc_top,
3988 -+ vc->vc_size_row * (vc->vc_bottom - vc->vc_top) / 2);
3989 -+ fbcon_decor_clear_margins(vc, info, 0);
3990 -+ }
3991 -+
3992 -+// if (origin == FBCON_DECOR_IO_ORIG_USER)
3993 -+ console_unlock();
3994 -+
3995 -+ return 0;
3996 -+
3997 -+out: if (img->cmap.red)
3998 -+ vfree(img->cmap.red);
3999 -+
4000 -+ if (tmp)
4001 -+ vfree(tmp);
4002 -+ return -ENOMEM;
4003 -+}
4004 -+
4005 -+static long fbcon_decor_ioctl(struct file *filp, u_int cmd, u_long arg)
4006 -+{
4007 -+ struct fbcon_decor_iowrapper __user *wrapper = (void __user*) arg;
4008 -+ struct vc_data *vc = NULL;
4009 -+ unsigned short vc_num = 0;
4010 -+ unsigned char origin = 0;
4011 -+ void __user *data = NULL;
4012 -+
4013 -+ if (!access_ok(VERIFY_READ, wrapper,
4014 -+ sizeof(struct fbcon_decor_iowrapper)))
4015 -+ return -EFAULT;
4016 -+
4017 -+ __get_user(vc_num, &wrapper->vc);
4018 -+ __get_user(origin, &wrapper->origin);
4019 -+ __get_user(data, &wrapper->data);
4020 -+
4021 -+ if (!vc_cons_allocated(vc_num))
4022 -+ return -EINVAL;
4023 -+
4024 -+ vc = vc_cons[vc_num].d;
4025 -+
4026 -+ switch (cmd) {
4027 -+ case FBIOCONDECOR_SETPIC:
4028 -+ {
4029 -+ struct fb_image img;
4030 -+ if (copy_from_user(&img, (struct fb_image __user *)data, sizeof(struct fb_image)))
4031 -+ return -EFAULT;
4032 -+
4033 -+ return fbcon_decor_ioctl_dosetpic(vc, &img, origin);
4034 -+ }
4035 -+ case FBIOCONDECOR_SETCFG:
4036 -+ {
4037 -+ struct vc_decor cfg;
4038 -+ if (copy_from_user(&cfg, (struct vc_decor __user *)data, sizeof(struct vc_decor)))
4039 -+ return -EFAULT;
4040 -+
4041 -+ return fbcon_decor_ioctl_dosetcfg(vc, &cfg, origin);
4042 -+ }
4043 -+ case FBIOCONDECOR_GETCFG:
4044 -+ {
4045 -+ int rval;
4046 -+ struct vc_decor cfg;
4047 -+
4048 -+ if (copy_from_user(&cfg, (struct vc_decor __user *)data, sizeof(struct vc_decor)))
4049 -+ return -EFAULT;
4050 -+
4051 -+ rval = fbcon_decor_ioctl_dogetcfg(vc, &cfg);
4052 -+
4053 -+ if (copy_to_user(data, &cfg, sizeof(struct vc_decor)))
4054 -+ return -EFAULT;
4055 -+ return rval;
4056 -+ }
4057 -+ case FBIOCONDECOR_SETSTATE:
4058 -+ {
4059 -+ unsigned int state = 0;
4060 -+ if (get_user(state, (unsigned int __user *)data))
4061 -+ return -EFAULT;
4062 -+ return fbcon_decor_ioctl_dosetstate(vc, state, origin);
4063 -+ }
4064 -+ case FBIOCONDECOR_GETSTATE:
4065 -+ {
4066 -+ unsigned int state = 0;
4067 -+ fbcon_decor_ioctl_dogetstate(vc, &state);
4068 -+ return put_user(state, (unsigned int __user *)data);
4069 -+ }
4070 -+
4071 -+ default:
4072 -+ return -ENOIOCTLCMD;
4073 -+ }
4074 -+}
4075 -+
4076 -+#ifdef CONFIG_COMPAT
4077 -+
4078 -+static long fbcon_decor_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) {
4079 -+
4080 -+ struct fbcon_decor_iowrapper32 __user *wrapper = (void __user *)arg;
4081 -+ struct vc_data *vc = NULL;
4082 -+ unsigned short vc_num = 0;
4083 -+ unsigned char origin = 0;
4084 -+ compat_uptr_t data_compat = 0;
4085 -+ void __user *data = NULL;
4086 -+
4087 -+ if (!access_ok(VERIFY_READ, wrapper,
4088 -+ sizeof(struct fbcon_decor_iowrapper32)))
4089 -+ return -EFAULT;
4090 -+
4091 -+ __get_user(vc_num, &wrapper->vc);
4092 -+ __get_user(origin, &wrapper->origin);
4093 -+ __get_user(data_compat, &wrapper->data);
4094 -+ data = compat_ptr(data_compat);
4095 -+
4096 -+ if (!vc_cons_allocated(vc_num))
4097 -+ return -EINVAL;
4098 -+
4099 -+ vc = vc_cons[vc_num].d;
4100 -+
4101 -+ switch (cmd) {
4102 -+ case FBIOCONDECOR_SETPIC32:
4103 -+ {
4104 -+ struct fb_image32 img_compat;
4105 -+ struct fb_image img;
4106 -+
4107 -+ if (copy_from_user(&img_compat, (struct fb_image32 __user *)data, sizeof(struct fb_image32)))
4108 -+ return -EFAULT;
4109 -+
4110 -+ fb_image_from_compat(img, img_compat);
4111 -+
4112 -+ return fbcon_decor_ioctl_dosetpic(vc, &img, origin);
4113 -+ }
4114 -+
4115 -+ case FBIOCONDECOR_SETCFG32:
4116 -+ {
4117 -+ struct vc_decor32 cfg_compat;
4118 -+ struct vc_decor cfg;
4119 -+
4120 -+ if (copy_from_user(&cfg_compat, (struct vc_decor32 __user *)data, sizeof(struct vc_decor32)))
4121 -+ return -EFAULT;
4122 -+
4123 -+ vc_decor_from_compat(cfg, cfg_compat);
4124 -+
4125 -+ return fbcon_decor_ioctl_dosetcfg(vc, &cfg, origin);
4126 -+ }
4127 -+
4128 -+ case FBIOCONDECOR_GETCFG32:
4129 -+ {
4130 -+ int rval;
4131 -+ struct vc_decor32 cfg_compat;
4132 -+ struct vc_decor cfg;
4133 -+
4134 -+ if (copy_from_user(&cfg_compat, (struct vc_decor32 __user *)data, sizeof(struct vc_decor32)))
4135 -+ return -EFAULT;
4136 -+ cfg.theme = compat_ptr(cfg_compat.theme);
4137 -+
4138 -+ rval = fbcon_decor_ioctl_dogetcfg(vc, &cfg);
4139 -+
4140 -+ vc_decor_to_compat(cfg_compat, cfg);
4141 -+
4142 -+ if (copy_to_user((struct vc_decor32 __user *)data, &cfg_compat, sizeof(struct vc_decor32)))
4143 -+ return -EFAULT;
4144 -+ return rval;
4145 -+ }
4146 -+
4147 -+ case FBIOCONDECOR_SETSTATE32:
4148 -+ {
4149 -+ compat_uint_t state_compat = 0;
4150 -+ unsigned int state = 0;
4151 -+
4152 -+ if (get_user(state_compat, (compat_uint_t __user *)data))
4153 -+ return -EFAULT;
4154 -+
4155 -+ state = (unsigned int)state_compat;
4156 -+
4157 -+ return fbcon_decor_ioctl_dosetstate(vc, state, origin);
4158 -+ }
4159 -+
4160 -+ case FBIOCONDECOR_GETSTATE32:
4161 -+ {
4162 -+ compat_uint_t state_compat = 0;
4163 -+ unsigned int state = 0;
4164 -+
4165 -+ fbcon_decor_ioctl_dogetstate(vc, &state);
4166 -+ state_compat = (compat_uint_t)state;
4167 -+
4168 -+ return put_user(state_compat, (compat_uint_t __user *)data);
4169 -+ }
4170 -+
4171 -+ default:
4172 -+ return -ENOIOCTLCMD;
4173 -+ }
4174 -+}
4175 -+#else
4176 -+ #define fbcon_decor_compat_ioctl NULL
4177 -+#endif
4178 -+
4179 -+static struct file_operations fbcon_decor_ops = {
4180 -+ .owner = THIS_MODULE,
4181 -+ .unlocked_ioctl = fbcon_decor_ioctl,
4182 -+ .compat_ioctl = fbcon_decor_compat_ioctl
4183 -+};
4184 -+
4185 -+static struct miscdevice fbcon_decor_dev = {
4186 -+ .minor = MISC_DYNAMIC_MINOR,
4187 -+ .name = "fbcondecor",
4188 -+ .fops = &fbcon_decor_ops
4189 -+};
4190 -+
4191 -+void fbcon_decor_reset()
4192 -+{
4193 -+ int i;
4194 -+
4195 -+ for (i = 0; i < num_registered_fb; i++) {
4196 -+ registered_fb[i]->bgdecor.data = NULL;
4197 -+ registered_fb[i]->bgdecor.cmap.red = NULL;
4198 -+ }
4199 -+
4200 -+ for (i = 0; i < MAX_NR_CONSOLES && vc_cons[i].d; i++) {
4201 -+ vc_cons[i].d->vc_decor.state = vc_cons[i].d->vc_decor.twidth =
4202 -+ vc_cons[i].d->vc_decor.theight = 0;
4203 -+ vc_cons[i].d->vc_decor.theme = NULL;
4204 -+ }
4205 -+
4206 -+ return;
4207 -+}
4208 -+
4209 -+int fbcon_decor_init()
4210 -+{
4211 -+ int i;
4212 -+
4213 -+ fbcon_decor_reset();
4214 -+
4215 -+ if (initialized)
4216 -+ return 0;
4217 -+
4218 -+ i = misc_register(&fbcon_decor_dev);
4219 -+ if (i) {
4220 -+ printk(KERN_ERR "fbcondecor: failed to register device\n");
4221 -+ return i;
4222 -+ }
4223 -+
4224 -+ fbcon_decor_call_helper("init", 0);
4225 -+ initialized = 1;
4226 -+ return 0;
4227 -+}
4228 -+
4229 -+int fbcon_decor_exit(void)
4230 -+{
4231 -+ fbcon_decor_reset();
4232 -+ return 0;
4233 -+}
4234 -+
4235 -+EXPORT_SYMBOL(fbcon_decor_path);
4236 ---- a/drivers/video/console/fbcondecor.h 2014-01-21 09:50:42.229900176 -0500
4237 -+++ b/drivers/video/console/fbcondecor.h 2014-01-21 09:50:21.489900340 -0500
4238 -@@ -0,0 +1,79 @@
4239 -+/*
4240 -+ * linux/drivers/video/console/fbcondecor.h -- Framebuffer Console Decoration headers
4241 -+ *
4242 -+ * Copyright (C) 2004 Michal Januszewski <spock@g.o>
4243 -+ *
4244 -+ */
4245 -+
4246 -+#ifndef __FBCON_DECOR_H
4247 -+#define __FBCON_DECOR_H
4248 -+
4249 -+#ifndef _LINUX_FB_H
4250 -+#include <linux/fb.h>
4251 -+#endif
4252 -+
4253 -+/* This is needed for vc_cons in fbcmap.c */
4254 -+#include <linux/vt_kern.h>
4255 -+
4256 -+struct fb_cursor;
4257 -+struct fb_info;
4258 -+struct vc_data;
4259 -+
4260 -+#ifdef CONFIG_FB_CON_DECOR
4261 -+/* fbcondecor.c */
4262 -+int fbcon_decor_init(void);
4263 -+int fbcon_decor_exit(void);
4264 -+int fbcon_decor_call_helper(char* cmd, unsigned short cons);
4265 -+int fbcon_decor_disable(struct vc_data *vc, unsigned char redraw);
4266 -+void fbcon_decor_reset(void);
4267 -+
4268 -+/* cfbcondecor.c */
4269 -+void fbcon_decor_putcs(struct vc_data *vc, struct fb_info *info, const unsigned short *s, int count, int yy, int xx);
4270 -+void fbcon_decor_cursor(struct fb_info *info, struct fb_cursor *cursor);
4271 -+void fbcon_decor_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width);
4272 -+void fbcon_decor_clear_margins(struct vc_data *vc, struct fb_info *info, int bottom_only);
4273 -+void fbcon_decor_blank(struct vc_data *vc, struct fb_info *info, int blank);
4274 -+void fbcon_decor_bmove_redraw(struct vc_data *vc, struct fb_info *info, int y, int sx, int dx, int width);
4275 -+void fbcon_decor_copy(u8 *dst, u8 *src, int height, int width, int linebytes, int srclinesbytes, int bpp);
4276 -+void fbcon_decor_fix_pseudo_pal(struct fb_info *info, struct vc_data *vc);
4277 -+
4278 -+/* vt.c */
4279 -+void acquire_console_sem(void);
4280 -+void release_console_sem(void);
4281 -+void do_unblank_screen(int entering_gfx);
4282 -+
4283 -+/* struct vc_data *y */
4284 -+#define fbcon_decor_active_vc(y) (y->vc_decor.state && y->vc_decor.theme)
4285 -+
4286 -+/* struct fb_info *x, struct vc_data *y */
4287 -+#define fbcon_decor_active_nores(x,y) (x->bgdecor.data && fbcon_decor_active_vc(y))
4288 -+
4289 -+/* struct fb_info *x, struct vc_data *y */
4290 -+#define fbcon_decor_active(x,y) (fbcon_decor_active_nores(x,y) && \
4291 -+ x->bgdecor.width == x->var.xres && \
4292 -+ x->bgdecor.height == x->var.yres && \
4293 -+ x->bgdecor.depth == x->var.bits_per_pixel)
4294 -+
4295 -+
4296 -+#else /* CONFIG_FB_CON_DECOR */
4297 -+
4298 -+static inline void fbcon_decor_putcs(struct vc_data *vc, struct fb_info *info, const unsigned short *s, int count, int yy, int xx) {}
4299 -+static inline void fbcon_decor_putc(struct vc_data *vc, struct fb_info *info, int c, int ypos, int xpos) {}
4300 -+static inline void fbcon_decor_cursor(struct fb_info *info, struct fb_cursor *cursor) {}
4301 -+static inline void fbcon_decor_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width) {}
4302 -+static inline void fbcon_decor_clear_margins(struct vc_data *vc, struct fb_info *info, int bottom_only) {}
4303 -+static inline void fbcon_decor_blank(struct vc_data *vc, struct fb_info *info, int blank) {}
4304 -+static inline void fbcon_decor_bmove_redraw(struct vc_data *vc, struct fb_info *info, int y, int sx, int dx, int width) {}
4305 -+static inline void fbcon_decor_fix_pseudo_pal(struct fb_info *info, struct vc_data *vc) {}
4306 -+static inline int fbcon_decor_call_helper(char* cmd, unsigned short cons) { return 0; }
4307 -+static inline int fbcon_decor_init(void) { return 0; }
4308 -+static inline int fbcon_decor_exit(void) { return 0; }
4309 -+static inline int fbcon_decor_disable(struct vc_data *vc, unsigned char redraw) { return 0; }
4310 -+
4311 -+#define fbcon_decor_active_vc(y) (0)
4312 -+#define fbcon_decor_active_nores(x,y) (0)
4313 -+#define fbcon_decor_active(x,y) (0)
4314 -+
4315 -+#endif /* CONFIG_FB_CON_DECOR */
4316 -+
4317 -+#endif /* __FBCON_DECOR_H */
4318 -diff --git a/drivers/video/fbcmap.c b/drivers/video/fbcmap.c
4319 -index 5c3960d..162b5f4 100644
4320 ---- a/drivers/video/fbcmap.c
4321 -+++ b/drivers/video/fbcmap.c
4322 -@@ -17,6 +17,8 @@
4323 - #include <linux/slab.h>
4324 - #include <linux/uaccess.h>
4325 -
4326 -+#include "console/fbcondecor.h"
4327 -+
4328 - static u16 red2[] __read_mostly = {
4329 - 0x0000, 0xaaaa
4330 - };
4331 -@@ -249,14 +251,17 @@ int fb_set_cmap(struct fb_cmap *cmap, struct fb_info *info)
4332 - if (transp)
4333 - htransp = *transp++;
4334 - if (info->fbops->fb_setcolreg(start++,
4335 -- hred, hgreen, hblue,
4336 -+ hred, hgreen, hblue,
4337 - htransp, info))
4338 - break;
4339 - }
4340 - }
4341 -- if (rc == 0)
4342 -+ if (rc == 0) {
4343 - fb_copy_cmap(cmap, &info->cmap);
4344 --
4345 -+ if (fbcon_decor_active(info, vc_cons[fg_console].d) &&
4346 -+ info->fix.visual == FB_VISUAL_DIRECTCOLOR)
4347 -+ fbcon_decor_fix_pseudo_pal(info, vc_cons[fg_console].d);
4348 -+ }
4349 - return rc;
4350 - }
4351 -
4352 -diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
4353 -index c6ce416..7ce6640 100644
4354 ---- a/drivers/video/fbmem.c
4355 -+++ b/drivers/video/fbmem.c
4356 -@@ -1231,15 +1231,6 @@ struct fb_fix_screeninfo32 {
4357 - u16 reserved[3];
4358 - };
4359 -
4360 --struct fb_cmap32 {
4361 -- u32 start;
4362 -- u32 len;
4363 -- compat_caddr_t red;
4364 -- compat_caddr_t green;
4365 -- compat_caddr_t blue;
4366 -- compat_caddr_t transp;
4367 --};
4368 --
4369 - static int fb_getput_cmap(struct fb_info *info, unsigned int cmd,
4370 - unsigned long arg)
4371 - {
4372 -diff --git a/include/linux/console_decor.h b/include/linux/console_decor.h
4373 -new file mode 100644
4374 -index 0000000..04b8d80
4375 ---- /dev/null
4376 -+++ b/include/linux/console_decor.h
4377 -@@ -0,0 +1,46 @@
4378 -+#ifndef _LINUX_CONSOLE_DECOR_H_
4379 -+#define _LINUX_CONSOLE_DECOR_H_ 1
4380 -+
4381 -+/* A structure used by the framebuffer console decorations (drivers/video/console/fbcondecor.c) */
4382 -+struct vc_decor {
4383 -+ __u8 bg_color; /* The color that is to be treated as transparent */
4384 -+ __u8 state; /* Current decor state: 0 = off, 1 = on */
4385 -+ __u16 tx, ty; /* Top left corner coordinates of the text field */
4386 -+ __u16 twidth, theight; /* Width and height of the text field */
4387 -+ char* theme;
4388 -+};
4389 -+
4390 -+#ifdef __KERNEL__
4391 -+#ifdef CONFIG_COMPAT
4392 -+#include <linux/compat.h>
4393 -+
4394 -+struct vc_decor32 {
4395 -+ __u8 bg_color; /* The color that is to be treated as transparent */
4396 -+ __u8 state; /* Current decor state: 0 = off, 1 = on */
4397 -+ __u16 tx, ty; /* Top left corner coordinates of the text field */
4398 -+ __u16 twidth, theight; /* Width and height of the text field */
4399 -+ compat_uptr_t theme;
4400 -+};
4401 -+
4402 -+#define vc_decor_from_compat(to, from) \
4403 -+ (to).bg_color = (from).bg_color; \
4404 -+ (to).state = (from).state; \
4405 -+ (to).tx = (from).tx; \
4406 -+ (to).ty = (from).ty; \
4407 -+ (to).twidth = (from).twidth; \
4408 -+ (to).theight = (from).theight; \
4409 -+ (to).theme = compat_ptr((from).theme)
4410 -+
4411 -+#define vc_decor_to_compat(to, from) \
4412 -+ (to).bg_color = (from).bg_color; \
4413 -+ (to).state = (from).state; \
4414 -+ (to).tx = (from).tx; \
4415 -+ (to).ty = (from).ty; \
4416 -+ (to).twidth = (from).twidth; \
4417 -+ (to).theight = (from).theight; \
4418 -+ (to).theme = ptr_to_compat((from).theme)
4419 -+
4420 -+#endif /* CONFIG_COMPAT */
4421 -+#endif /* __KERNEL__ */
4422 -+
4423 -+#endif
4424 -diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
4425 -index 7f0c329..98f5d60 100644
4426 ---- a/include/linux/console_struct.h
4427 -+++ b/include/linux/console_struct.h
4428 -@@ -19,6 +19,7 @@
4429 - struct vt_struct;
4430 -
4431 - #define NPAR 16
4432 -+#include <linux/console_decor.h>
4433 -
4434 - struct vc_data {
4435 - struct tty_port port; /* Upper level data */
4436 -@@ -107,6 +108,8 @@ struct vc_data {
4437 - unsigned long vc_uni_pagedir;
4438 - unsigned long *vc_uni_pagedir_loc; /* [!] Location of uni_pagedir variable for this console */
4439 - bool vc_panic_force_write; /* when oops/panic this VC can accept forced output/blanking */
4440 -+
4441 -+ struct vc_decor vc_decor;
4442 - /* additional information is in vt_kern.h */
4443 - };
4444 -
4445 -diff --git a/include/uapi/linux/fb.h b/include/uapi/linux/fb.h
4446 -index d31cb68..ad161bb 100644
4447 ---- a/include/uapi/linux/fb.h
4448 -+++ b/include/uapi/linux/fb.h
4449 -@@ -8,6 +8,25 @@
4450 -
4451 - #define FB_MAX 32 /* sufficient for now */
4452 -
4453 -+struct fbcon_decor_iowrapper
4454 -+{
4455 -+ unsigned short vc; /* Virtual console */
4456 -+ unsigned char origin; /* Point of origin of the request */
4457 -+ void *data;
4458 -+};
4459 -+
4460 -+#ifdef __KERNEL__
4461 -+#ifdef CONFIG_COMPAT
4462 -+#include <linux/compat.h>
4463 -+struct fbcon_decor_iowrapper32
4464 -+{
4465 -+ unsigned short vc; /* Virtual console */
4466 -+ unsigned char origin; /* Point of origin of the request */
4467 -+ compat_uptr_t data;
4468 -+};
4469 -+#endif /* CONFIG_COMPAT */
4470 -+#endif /* __KERNEL__ */
4471 -+
4472 - /* ioctls
4473 - 0x46 is 'F' */
4474 - #define FBIOGET_VSCREENINFO 0x4600
4475 -@@ -34,6 +53,24 @@
4476 - #define FBIOPUT_MODEINFO 0x4617
4477 - #define FBIOGET_DISPINFO 0x4618
4478 - #define FBIO_WAITFORVSYNC _IOW('F', 0x20, __u32)
4479 -+#define FBIOCONDECOR_SETCFG _IOWR('F', 0x19, struct fbcon_decor_iowrapper)
4480 -+#define FBIOCONDECOR_GETCFG _IOR('F', 0x1A, struct fbcon_decor_iowrapper)
4481 -+#define FBIOCONDECOR_SETSTATE _IOWR('F', 0x1B, struct fbcon_decor_iowrapper)
4482 -+#define FBIOCONDECOR_GETSTATE _IOR('F', 0x1C, struct fbcon_decor_iowrapper)
4483 -+#define FBIOCONDECOR_SETPIC _IOWR('F', 0x1D, struct fbcon_decor_iowrapper)
4484 -+#ifdef __KERNEL__
4485 -+#ifdef CONFIG_COMPAT
4486 -+#define FBIOCONDECOR_SETCFG32 _IOWR('F', 0x19, struct fbcon_decor_iowrapper32)
4487 -+#define FBIOCONDECOR_GETCFG32 _IOR('F', 0x1A, struct fbcon_decor_iowrapper32)
4488 -+#define FBIOCONDECOR_SETSTATE32 _IOWR('F', 0x1B, struct fbcon_decor_iowrapper32)
4489 -+#define FBIOCONDECOR_GETSTATE32 _IOR('F', 0x1C, struct fbcon_decor_iowrapper32)
4490 -+#define FBIOCONDECOR_SETPIC32 _IOWR('F', 0x1D, struct fbcon_decor_iowrapper32)
4491 -+#endif /* CONFIG_COMPAT */
4492 -+#endif /* __KERNEL__ */
4493 -+
4494 -+#define FBCON_DECOR_THEME_LEN 128 /* Maximum lenght of a theme name */
4495 -+#define FBCON_DECOR_IO_ORIG_KERNEL 0 /* Kernel ioctl origin */
4496 -+#define FBCON_DECOR_IO_ORIG_USER 1 /* User ioctl origin */
4497 -
4498 - #define FB_TYPE_PACKED_PIXELS 0 /* Packed Pixels */
4499 - #define FB_TYPE_PLANES 1 /* Non interleaved planes */
4500 -@@ -286,6 +323,28 @@ struct fb_cmap {
4501 - __u16 *transp; /* transparency, can be NULL */
4502 - };
4503 -
4504 -+#ifdef __KERNEL__
4505 -+#ifdef CONFIG_COMPAT
4506 -+struct fb_cmap32 {
4507 -+ __u32 start;
4508 -+ __u32 len; /* Number of entries */
4509 -+ compat_uptr_t red; /* Red values */
4510 -+ compat_uptr_t green;
4511 -+ compat_uptr_t blue;
4512 -+ compat_uptr_t transp; /* transparency, can be NULL */
4513 -+};
4514 -+
4515 -+#define fb_cmap_from_compat(to, from) \
4516 -+ (to).start = (from).start; \
4517 -+ (to).len = (from).len; \
4518 -+ (to).red = compat_ptr((from).red); \
4519 -+ (to).green = compat_ptr((from).green); \
4520 -+ (to).blue = compat_ptr((from).blue); \
4521 -+ (to).transp = compat_ptr((from).transp)
4522 -+
4523 -+#endif /* CONFIG_COMPAT */
4524 -+#endif /* __KERNEL__ */
4525 -+
4526 - struct fb_con2fbmap {
4527 - __u32 console;
4528 - __u32 framebuffer;
4529 -@@ -367,6 +426,34 @@ struct fb_image {
4530 - struct fb_cmap cmap; /* color map info */
4531 - };
4532 -
4533 -+#ifdef __KERNEL__
4534 -+#ifdef CONFIG_COMPAT
4535 -+struct fb_image32 {
4536 -+ __u32 dx; /* Where to place image */
4537 -+ __u32 dy;
4538 -+ __u32 width; /* Size of image */
4539 -+ __u32 height;
4540 -+ __u32 fg_color; /* Only used when a mono bitmap */
4541 -+ __u32 bg_color;
4542 -+ __u8 depth; /* Depth of the image */
4543 -+ const compat_uptr_t data; /* Pointer to image data */
4544 -+ struct fb_cmap32 cmap; /* color map info */
4545 -+};
4546 -+
4547 -+#define fb_image_from_compat(to, from) \
4548 -+ (to).dx = (from).dx; \
4549 -+ (to).dy = (from).dy; \
4550 -+ (to).width = (from).width; \
4551 -+ (to).height = (from).height; \
4552 -+ (to).fg_color = (from).fg_color; \
4553 -+ (to).bg_color = (from).bg_color; \
4554 -+ (to).depth = (from).depth; \
4555 -+ (to).data = compat_ptr((from).data); \
4556 -+ fb_cmap_from_compat((to).cmap, (from).cmap)
4557 -+
4558 -+#endif /* CONFIG_COMPAT */
4559 -+#endif /* __KERNEL__ */
4560 -+
4561 - /*
4562 - * hardware cursor control
4563 - */
4564 -
4565 -diff --git a/include/linux/fb.h b/include/linux/fb.h
4566 -index d31cb68..ad161bb 100644
4567 ---- a/include/linux/fb.h
4568 -+++ b/include/linux/fb.h
4569 -@@ -488,5 +488,8 @@ #define FBINFO_STATE_SUSPENDED 1
4570 - u32 state; /* Hardware state i.e suspend */
4571 - void *fbcon_par; /* fbcon use-only private area */
4572 -+
4573 -+ struct fb_image bgdecor;
4574 -+
4575 - /* From here on everything is device dependent */
4576 - void *par;
4577 - /* we need the PCI or similar aperture base/size not
4578 -
4579 -diff --git a/kernel/sysctl.c b/kernel/sysctl.c
4580 -index 4ab1187..6561627 100644
4581 ---- a/kernel/sysctl.c
4582 -+++ b/kernel/sysctl.c
4583 -@@ -145,6 +145,10 @@ static int min_percpu_pagelist_fract = 8;
4584 - static int ngroups_max = NGROUPS_MAX;
4585 - static const int cap_last_cap = CAP_LAST_CAP;
4586 -
4587 -+#ifdef CONFIG_FB_CON_DECOR
4588 -+extern char fbcon_decor_path[];
4589 -+#endif
4590 -+
4591 - #ifdef CONFIG_INOTIFY_USER
4592 - #include <linux/inotify.h>
4593 - #endif
4594 -@@ -248,6 +252,15 @@ static struct ctl_table sysctl_base_table[] = {
4595 - .mode = 0555,
4596 - .child = dev_table,
4597 - },
4598 -+#ifdef CONFIG_FB_CON_DECOR
4599 -+ {
4600 -+ .procname = "fbcondecor",
4601 -+ .data = &fbcon_decor_path,
4602 -+ .maxlen = KMOD_PATH_LEN,
4603 -+ .mode = 0644,
4604 -+ .proc_handler = &proc_dostring,
4605 -+ },
4606 -+#endif
4607 - { }
4608 - };
4609 -
4610 -@@ -1091,7 +1104,7 @@ static struct ctl_table vm_table[] = {
4611 - .proc_handler = proc_dointvec,
4612 - },
4613 - {
4614 -- .procname = "page-cluster",
4615 -+ .procname = "page-cluster",
4616 - .data = &page_cluster,
4617 - .maxlen = sizeof(int),
4618 - .mode = 0644,
4619 -@@ -1535,7 +1548,7 @@ static struct ctl_table fs_table[] = {
4620 - .mode = 0555,
4621 - .child = inotify_table,
4622 - },
4623 --#endif
4624 -+#endif
4625 - #ifdef CONFIG_EPOLL
4626 - {
4627 - .procname = "epoll",
4628 -@@ -1873,12 +1886,12 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
4629 - unsigned long page = 0;
4630 - size_t left;
4631 - char *kbuf;
4632 --
4633 -+
4634 - if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
4635 - *lenp = 0;
4636 - return 0;
4637 - }
4638 --
4639 -+
4640 - i = (int *) tbl_data;
4641 - vleft = table->maxlen / sizeof(*i);
4642 - left = *lenp;
4643 -@@ -1967,7 +1980,7 @@ static int do_proc_dointvec(struct ctl_table *table, int write,
4644 - * @ppos: file position
4645 - *
4646 - * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
4647 -- * values from/to the user buffer, treated as an ASCII string.
4648 -+ * values from/to the user buffer, treated as an ASCII string.
4649 - *
4650 - * Returns 0 on success.
4651 - */
4652 -@@ -2326,7 +2339,7 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
4653 - * @ppos: file position
4654 - *
4655 - * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
4656 -- * values from/to the user buffer, treated as an ASCII string.
4657 -+ * values from/to the user buffer, treated as an ASCII string.
4658 - * The values read are assumed to be in seconds, and are converted into
4659 - * jiffies.
4660 - *
4661 -@@ -2348,8 +2361,8 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write,
4662 - * @ppos: pointer to the file position
4663 - *
4664 - * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
4665 -- * values from/to the user buffer, treated as an ASCII string.
4666 -- * The values read are assumed to be in 1/USER_HZ seconds, and
4667 -+ * values from/to the user buffer, treated as an ASCII string.
4668 -+ * The values read are assumed to be in 1/USER_HZ seconds, and
4669 - * are converted into jiffies.
4670 - *
4671 - * Returns 0 on success.
4672 -@@ -2371,8 +2384,8 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
4673 - * @ppos: the current position in the file
4674 - *
4675 - * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
4676 -- * values from/to the user buffer, treated as an ASCII string.
4677 -- * The values read are assumed to be in 1/1000 seconds, and
4678 -+ * values from/to the user buffer, treated as an ASCII string.
4679 -+ * The values read are assumed to be in 1/1000 seconds, and
4680 - * are converted into jiffies.
4681 - *
4682 - * Returns 0 on success.
4683 ---
4684 -1.7.10
4685 -
4686
4687 Deleted: genpatches-2.6/trunk/3.15/5000_enable-additional-cpu-optimizations-for-gcc.patch
4688 ===================================================================
4689 --- genpatches-2.6/trunk/3.15/5000_enable-additional-cpu-optimizations-for-gcc.patch 2014-05-05 14:22:27 UTC (rev 2771)
4690 +++ genpatches-2.6/trunk/3.15/5000_enable-additional-cpu-optimizations-for-gcc.patch 2014-05-05 14:28:48 UTC (rev 2772)
4691 @@ -1,325 +0,0 @@
4692 -This patch has been tested on and known to work with kernel versions from 3.2
4693 -up to the latest git version (pulled on 12/14/2013).
4694 -
4695 -This patch will expand the number of microarchitectures to include new
4696 -processors including: AMD K10-family, AMD Family 10h (Barcelona), AMD Family
4697 -14h (Bobcat), AMD Family 15h (Bulldozer), AMD Family 15h (Piledriver), AMD
4698 -Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7 (Nehalem), Intel 2nd Gen Core
4699 -i3/i5/i7 (Sandybridge), Intel 3rd Gen Core i3/i5/i7 (Ivybridge), and Intel 4th
4700 -Gen Core i3/i5/i7 (Haswell). It also offers the compiler the 'native' flag.
4701 -
4702 -Small but real speed increases are measurable using a make endpoint comparing
4703 -a generic kernel to one built with one of the respective microarchs.
4704 -
4705 -See the following experimental evidence supporting this statement:
4706 -https://github.com/graysky2/kernel_gcc_patch
4707 -
4708 ----
4709 -diff -uprN a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
4710 ---- a/arch/x86/include/asm/module.h 2013-11-03 18:41:51.000000000 -0500
4711 -+++ b/arch/x86/include/asm/module.h 2013-12-15 06:21:24.351122516 -0500
4712 -@@ -15,6 +15,16 @@
4713 - #define MODULE_PROC_FAMILY "586MMX "
4714 - #elif defined CONFIG_MCORE2
4715 - #define MODULE_PROC_FAMILY "CORE2 "
4716 -+#elif defined CONFIG_MNATIVE
4717 -+#define MODULE_PROC_FAMILY "NATIVE "
4718 -+#elif defined CONFIG_MCOREI7
4719 -+#define MODULE_PROC_FAMILY "COREI7 "
4720 -+#elif defined CONFIG_MCOREI7AVX
4721 -+#define MODULE_PROC_FAMILY "COREI7AVX "
4722 -+#elif defined CONFIG_MCOREAVXI
4723 -+#define MODULE_PROC_FAMILY "COREAVXI "
4724 -+#elif defined CONFIG_MCOREAVX2
4725 -+#define MODULE_PROC_FAMILY "COREAVX2 "
4726 - #elif defined CONFIG_MATOM
4727 - #define MODULE_PROC_FAMILY "ATOM "
4728 - #elif defined CONFIG_M686
4729 -@@ -33,6 +43,18 @@
4730 - #define MODULE_PROC_FAMILY "K7 "
4731 - #elif defined CONFIG_MK8
4732 - #define MODULE_PROC_FAMILY "K8 "
4733 -+#elif defined CONFIG_MK10
4734 -+#define MODULE_PROC_FAMILY "K10 "
4735 -+#elif defined CONFIG_MBARCELONA
4736 -+#define MODULE_PROC_FAMILY "BARCELONA "
4737 -+#elif defined CONFIG_MBOBCAT
4738 -+#define MODULE_PROC_FAMILY "BOBCAT "
4739 -+#elif defined CONFIG_MBULLDOZER
4740 -+#define MODULE_PROC_FAMILY "BULLDOZER "
4741 -+#elif defined CONFIG_MPILEDRIVER
4742 -+#define MODULE_PROC_FAMILY "PILEDRIVER "
4743 -+#elif defined CONFIG_MJAGUAR
4744 -+#define MODULE_PROC_FAMILY "JAGUAR "
4745 - #elif defined CONFIG_MELAN
4746 - #define MODULE_PROC_FAMILY "ELAN "
4747 - #elif defined CONFIG_MCRUSOE
4748 -diff -uprN a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
4749 ---- a/arch/x86/Kconfig.cpu 2013-11-03 18:41:51.000000000 -0500
4750 -+++ b/arch/x86/Kconfig.cpu 2013-12-15 06:21:24.351122516 -0500
4751 -@@ -139,7 +139,7 @@ config MPENTIUM4
4752 -
4753 -
4754 - config MK6
4755 -- bool "K6/K6-II/K6-III"
4756 -+ bool "AMD K6/K6-II/K6-III"
4757 - depends on X86_32
4758 - ---help---
4759 - Select this for an AMD K6-family processor. Enables use of
4760 -@@ -147,7 +147,7 @@ config MK6
4761 - flags to GCC.
4762 -
4763 - config MK7
4764 -- bool "Athlon/Duron/K7"
4765 -+ bool "AMD Athlon/Duron/K7"
4766 - depends on X86_32
4767 - ---help---
4768 - Select this for an AMD Athlon K7-family processor. Enables use of
4769 -@@ -155,12 +155,55 @@ config MK7
4770 - flags to GCC.
4771 -
4772 - config MK8
4773 -- bool "Opteron/Athlon64/Hammer/K8"
4774 -+ bool "AMD Opteron/Athlon64/Hammer/K8"
4775 - ---help---
4776 - Select this for an AMD Opteron or Athlon64 Hammer-family processor.
4777 - Enables use of some extended instructions, and passes appropriate
4778 - optimization flags to GCC.
4779 -
4780 -+config MK10
4781 -+ bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"
4782 -+ ---help---
4783 -+ Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,
4784 -+ Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor.
4785 -+ Enables use of some extended instructions, and passes appropriate
4786 -+ optimization flags to GCC.
4787 -+
4788 -+config MBARCELONA
4789 -+ bool "AMD Barcelona"
4790 -+ ---help---
4791 -+ Select this for AMD Barcelona and newer processors.
4792 -+
4793 -+ Enables -march=barcelona
4794 -+
4795 -+config MBOBCAT
4796 -+ bool "AMD Bobcat"
4797 -+ ---help---
4798 -+ Select this for AMD Bobcat processors.
4799 -+
4800 -+ Enables -march=btver1
4801 -+
4802 -+config MBULLDOZER
4803 -+ bool "AMD Bulldozer"
4804 -+ ---help---
4805 -+ Select this for AMD Bulldozer processors.
4806 -+
4807 -+ Enables -march=bdver1
4808 -+
4809 -+config MPILEDRIVER
4810 -+ bool "AMD Piledriver"
4811 -+ ---help---
4812 -+ Select this for AMD Piledriver processors.
4813 -+
4814 -+ Enables -march=bdver2
4815 -+
4816 -+config MJAGUAR
4817 -+ bool "AMD Jaguar"
4818 -+ ---help---
4819 -+ Select this for AMD Jaguar processors.
4820 -+
4821 -+ Enables -march=btver2
4822 -+
4823 - config MCRUSOE
4824 - bool "Crusoe"
4825 - depends on X86_32
4826 -@@ -251,8 +294,17 @@ config MPSC
4827 - using the cpu family field
4828 - in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
4829 -
4830 -+config MATOM
4831 -+ bool "Intel Atom"
4832 -+ ---help---
4833 -+
4834 -+ Select this for the Intel Atom platform. Intel Atom CPUs have an
4835 -+ in-order pipelining architecture and thus can benefit from
4836 -+ accordingly optimized code. Use a recent GCC with specific Atom
4837 -+ support in order to fully benefit from selecting this option.
4838 -+
4839 - config MCORE2
4840 -- bool "Core 2/newer Xeon"
4841 -+ bool "Intel Core 2"
4842 - ---help---
4843 -
4844 - Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
4845 -@@ -260,14 +312,40 @@ config MCORE2
4846 - family in /proc/cpuinfo. Newer ones have 6 and older ones 15
4847 - (not a typo)
4848 -
4849 --config MATOM
4850 -- bool "Intel Atom"
4851 -+ Enables -march=core2
4852 -+
4853 -+config MCOREI7
4854 -+ bool "Intel Core i7"
4855 - ---help---
4856 -
4857 -- Select this for the Intel Atom platform. Intel Atom CPUs have an
4858 -- in-order pipelining architecture and thus can benefit from
4859 -- accordingly optimized code. Use a recent GCC with specific Atom
4860 -- support in order to fully benefit from selecting this option.
4861 -+ Select this for the Intel Nehalem platform. Intel Nehalem proecessors
4862 -+ include Core i3, i5, i7, Xeon: 34xx, 35xx, 55xx, 56xx, 75xx processors.
4863 -+
4864 -+ Enables -march=corei7
4865 -+
4866 -+config MCOREI7AVX
4867 -+ bool "Intel Core 2nd Gen AVX"
4868 -+ ---help---
4869 -+
4870 -+ Select this for 2nd Gen Core processors including Sandy Bridge.
4871 -+
4872 -+ Enables -march=corei7-avx
4873 -+
4874 -+config MCOREAVXI
4875 -+ bool "Intel Core 3rd Gen AVX"
4876 -+ ---help---
4877 -+
4878 -+ Select this for 3rd Gen Core processors including Ivy Bridge.
4879 -+
4880 -+ Enables -march=core-avx-i
4881 -+
4882 -+config MCOREAVX2
4883 -+ bool "Intel Core AVX2"
4884 -+ ---help---
4885 -+
4886 -+ Select this for AVX2 enabled processors including Haswell.
4887 -+
4888 -+ Enables -march=core-avx2
4889 -
4890 - config GENERIC_CPU
4891 - bool "Generic-x86-64"
4892 -@@ -276,6 +354,19 @@ config GENERIC_CPU
4893 - Generic x86-64 CPU.
4894 - Run equally well on all x86-64 CPUs.
4895 -
4896 -+config MNATIVE
4897 -+ bool "Native optimizations autodetected by GCC"
4898 -+ ---help---
4899 -+
4900 -+ GCC 4.2 and above support -march=native, which automatically detects
4901 -+ the optimum settings to use based on your processor. -march=native
4902 -+ also detects and applies additional settings beyond -march specific
4903 -+ to your CPU, (eg. -msse4). Unless you have a specific reason not to
4904 -+ (e.g. distcc cross-compiling), you should probably be using
4905 -+ -march=native rather than anything listed below.
4906 -+
4907 -+ Enables -march=native
4908 -+
4909 - endchoice
4910 -
4911 - config X86_GENERIC
4912 -@@ -300,7 +391,7 @@ config X86_INTERNODE_CACHE_SHIFT
4913 - config X86_L1_CACHE_SHIFT
4914 - int
4915 - default "7" if MPENTIUM4 || MPSC
4916 -- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
4917 -+ default "6" if MK7 || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MPENTIUMM || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MVIAC7 || X86_GENERIC || MNATIVE || GENERIC_CPU
4918 - default "4" if MELAN || M486 || MGEODEGX1
4919 - default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
4920 -
4921 -@@ -331,11 +422,11 @@ config X86_ALIGNMENT_16
4922 -
4923 - config X86_INTEL_USERCOPY
4924 - def_bool y
4925 -- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
4926 -+ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || MNATIVE || X86_GENERIC || MK8 || MK7 || MK10 || MBARCELONA || MEFFICEON || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2
4927 -
4928 - config X86_USE_PPRO_CHECKSUM
4929 - def_bool y
4930 -- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
4931 -+ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MNATIVE
4932 -
4933 - config X86_USE_3DNOW
4934 - def_bool y
4935 -@@ -363,17 +454,17 @@ config X86_P6_NOP
4936 -
4937 - config X86_TSC
4938 - def_bool y
4939 -- depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64
4940 -+ depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7-AVX || MATOM) && !X86_NUMAQ) || X86_64 || MNATIVE
4941 -
4942 - config X86_CMPXCHG64
4943 - def_bool y
4944 -- depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
4945 -+ depends on X86_PAE || X86_64 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM || MNATIVE
4946 -
4947 - # this should be set for all -march=.. options where the compiler
4948 - # generates cmov.
4949 - config X86_CMOV
4950 - def_bool y
4951 -- depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
4952 -+ depends on (MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MK7 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
4953 -
4954 - config X86_MINIMUM_CPU_FAMILY
4955 - int
4956 -diff -uprN a/arch/x86/Makefile b/arch/x86/Makefile
4957 ---- a/arch/x86/Makefile 2013-11-03 18:41:51.000000000 -0500
4958 -+++ b/arch/x86/Makefile 2013-12-15 06:21:24.354455723 -0500
4959 -@@ -61,11 +61,26 @@ else
4960 - KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3)
4961 -
4962 - # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
4963 -+ cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
4964 - cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
4965 -+ cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10)
4966 -+ cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona)
4967 -+ cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1)
4968 -+ cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1)
4969 -+ cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2)
4970 -+ cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)
4971 - cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
4972 -
4973 - cflags-$(CONFIG_MCORE2) += \
4974 -- $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
4975 -+ $(call cc-option,-march=core2,$(call cc-option,-mtune=core2))
4976 -+ cflags-$(CONFIG_MCOREI7) += \
4977 -+ $(call cc-option,-march=corei7,$(call cc-option,-mtune=corei7))
4978 -+ cflags-$(CONFIG_MCOREI7AVX) += \
4979 -+ $(call cc-option,-march=corei7-avx,$(call cc-option,-mtune=corei7-avx))
4980 -+ cflags-$(CONFIG_MCOREAVXI) += \
4981 -+ $(call cc-option,-march=core-avx-i,$(call cc-option,-mtune=core-avx-i))
4982 -+ cflags-$(CONFIG_MCOREAVX2) += \
4983 -+ $(call cc-option,-march=core-avx2,$(call cc-option,-mtune=core-avx2))
4984 - cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
4985 - $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
4986 - cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
4987 -diff -uprN a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
4988 ---- a/arch/x86/Makefile_32.cpu 2013-11-03 18:41:51.000000000 -0500
4989 -+++ b/arch/x86/Makefile_32.cpu 2013-12-15 06:21:24.354455723 -0500
4990 -@@ -23,7 +23,14 @@ cflags-$(CONFIG_MK6) += -march=k6
4991 - # Please note, that patches that add -march=athlon-xp and friends are pointless.
4992 - # They make zero difference whatsosever to performance at this time.
4993 - cflags-$(CONFIG_MK7) += -march=athlon
4994 -+cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
4995 - cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon)
4996 -+cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10,-march=athlon)
4997 -+cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona,-march=athlon)
4998 -+cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1,-march=athlon)
4999 -+cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1,-march=athlon)
5000 -+cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2,-march=athlon)
5001 -+cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon)
5002 - cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
5003 - cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
5004 - cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
5005 -@@ -32,6 +39,10 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-
5006 - cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
5007 - cflags-$(CONFIG_MVIAC7) += -march=i686
5008 - cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
5009 -+cflags-$(CONFIG_MCOREI7) += -march=i686 $(call tune,corei7)
5010 -+cflags-$(CONFIG_MCOREI7AVX) += -march=i686 $(call tune,corei7-avx)
5011 -+cflags-$(CONFIG_MCOREAVXI) += -march=i686 $(call tune,core-avx-i)
5012 -+cflags-$(CONFIG_MCOREAVX2) += -march=i686 $(call tune,core-avx2)
5013 - cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
5014 - $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
5015 -
5016 -
5017
5018 Deleted: genpatches-2.6/trunk/3.15/5001_BFQ-1-block-cgroups-kconfig-build-bits-for-BFQ-v7r2-3.14.patch
5019 ===================================================================
5020 --- genpatches-2.6/trunk/3.15/5001_BFQ-1-block-cgroups-kconfig-build-bits-for-BFQ-v7r2-3.14.patch 2014-05-05 14:22:27 UTC (rev 2771)
5021 +++ genpatches-2.6/trunk/3.15/5001_BFQ-1-block-cgroups-kconfig-build-bits-for-BFQ-v7r2-3.14.patch 2014-05-05 14:28:48 UTC (rev 2772)
5022 @@ -1,104 +0,0 @@
5023 -From c3280db98437c9520f04ecacfdf1a868d7a4b7b3 Mon Sep 17 00:00:00 2001
5024 -From: Paolo Valente <paolo.valente@×××××××.it>
5025 -Date: Tue, 3 Sep 2013 16:50:42 +0200
5026 -Subject: [PATCH 1/3] block: cgroups, kconfig, build bits for BFQ-v7r2-3.14
5027 -
5028 -Update Kconfig.iosched and do the related Makefile changes to include
5029 -kernel configuration options for BFQ. Also add the bfqio controller
5030 -to the cgroups subsystem.
5031 -
5032 -Signed-off-by: Paolo Valente <paolo.valente@×××××××.it>
5033 -Signed-off-by: Arianna Avanzini <avanzini.arianna@×××××.com>
5034 ----
5035 - block/Kconfig.iosched | 32 ++++++++++++++++++++++++++++++++
5036 - block/Makefile | 1 +
5037 - include/linux/cgroup_subsys.h | 4 ++++
5038 - 3 files changed, 37 insertions(+)
5039 -
5040 -diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
5041 -index 421bef9..0ee5f0f 100644
5042 ---- a/block/Kconfig.iosched
5043 -+++ b/block/Kconfig.iosched
5044 -@@ -39,6 +39,27 @@ config CFQ_GROUP_IOSCHED
5045 - ---help---
5046 - Enable group IO scheduling in CFQ.
5047 -
5048 -+config IOSCHED_BFQ
5049 -+ tristate "BFQ I/O scheduler"
5050 -+ default n
5051 -+ ---help---
5052 -+ The BFQ I/O scheduler tries to distribute bandwidth among
5053 -+ all processes according to their weights.
5054 -+ It aims at distributing the bandwidth as desired, independently of
5055 -+ the disk parameters and with any workload. It also tries to
5056 -+ guarantee low latency to interactive and soft real-time
5057 -+ applications. If compiled built-in (saying Y here), BFQ can
5058 -+ be configured to support hierarchical scheduling.
5059 -+
5060 -+config CGROUP_BFQIO
5061 -+ bool "BFQ hierarchical scheduling support"
5062 -+ depends on CGROUPS && IOSCHED_BFQ=y
5063 -+ default n
5064 -+ ---help---
5065 -+ Enable hierarchical scheduling in BFQ, using the cgroups
5066 -+ filesystem interface. The name of the subsystem will be
5067 -+ bfqio.
5068 -+
5069 - choice
5070 - prompt "Default I/O scheduler"
5071 - default DEFAULT_CFQ
5072 -@@ -52,6 +73,16 @@ choice
5073 - config DEFAULT_CFQ
5074 - bool "CFQ" if IOSCHED_CFQ=y
5075 -
5076 -+ config DEFAULT_BFQ
5077 -+ bool "BFQ" if IOSCHED_BFQ=y
5078 -+ help
5079 -+ Selects BFQ as the default I/O scheduler which will be
5080 -+ used by default for all block devices.
5081 -+ The BFQ I/O scheduler aims at distributing the bandwidth
5082 -+ as desired, independently of the disk parameters and with
5083 -+ any workload. It also tries to guarantee low latency to
5084 -+ interactive and soft real-time applications.
5085 -+
5086 - config DEFAULT_NOOP
5087 - bool "No-op"
5088 -
5089 -@@ -61,6 +92,7 @@ config DEFAULT_IOSCHED
5090 - string
5091 - default "deadline" if DEFAULT_DEADLINE
5092 - default "cfq" if DEFAULT_CFQ
5093 -+ default "bfq" if DEFAULT_BFQ
5094 - default "noop" if DEFAULT_NOOP
5095 -
5096 - endmenu
5097 -diff --git a/block/Makefile b/block/Makefile
5098 -index 20645e8..cbd83fb 100644
5099 ---- a/block/Makefile
5100 -+++ b/block/Makefile
5101 -@@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
5102 - obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
5103 - obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
5104 - obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
5105 -+obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o
5106 -
5107 - obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
5108 - obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
5109 -diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
5110 -index 7b99d71..4e8c0ff 100644
5111 ---- a/include/linux/cgroup_subsys.h
5112 -+++ b/include/linux/cgroup_subsys.h
5113 -@@ -39,6 +39,10 @@ SUBSYS(net_cls)
5114 - SUBSYS(blkio)
5115 - #endif
5116 -
5117 -+#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_BFQIO)
5118 -+SUBSYS(bfqio)
5119 -+#endif
5120 -+
5121 - #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_PERF)
5122 - SUBSYS(perf)
5123 - #endif
5124 ---
5125 -1.9.0
5126 -
5127
5128 Deleted: genpatches-2.6/trunk/3.15/5002_BFQ-2-block-introduce-the-BFQ-v7r2-I-O-sched-for-3.14.patch1
5129 ===================================================================
5130 --- genpatches-2.6/trunk/3.15/5002_BFQ-2-block-introduce-the-BFQ-v7r2-I-O-sched-for-3.14.patch1 2014-05-05 14:22:27 UTC (rev 2771)
5131 +++ genpatches-2.6/trunk/3.15/5002_BFQ-2-block-introduce-the-BFQ-v7r2-I-O-sched-for-3.14.patch1 2014-05-05 14:28:48 UTC (rev 2772)
5132 @@ -1,6065 +0,0 @@
5133 -From 5055277df59d9280da6b60cf90bed8e5e57dc44d Mon Sep 17 00:00:00 2001
5134 -From: Paolo Valente <paolo.valente@×××××××.it>
5135 -Date: Thu, 9 May 2013 19:10:02 +0200
5136 -Subject: [PATCH 2/3] block: introduce the BFQ-v7r2 I/O sched for 3.14
5137 -
5138 -Add the BFQ-v7r2 I/O scheduler to 3.14.
5139 -The general structure is borrowed from CFQ, as much of the code for
5140 -handling I/O contexts. Over time, several useful features have been
5141 -ported from CFQ as well (details in the changelog in README.BFQ). A
5142 -(bfq_)queue is associated to each task doing I/O on a device, and each
5143 -time a scheduling decision has to be made a queue is selected and served
5144 -until it expires.
5145 -
5146 - - Slices are given in the service domain: tasks are assigned
5147 - budgets, measured in number of sectors. Once got the disk, a task
5148 - must however consume its assigned budget within a configurable
5149 - maximum time (by default, the maximum possible value of the
5150 - budgets is automatically computed to comply with this timeout).
5151 - This allows the desired latency vs "throughput boosting" tradeoff
5152 - to be set.
5153 -
5154 - - Budgets are scheduled according to a variant of WF2Q+, implemented
5155 - using an augmented rb-tree to take eligibility into account while
5156 - preserving an O(log N) overall complexity.
5157 -
5158 - - A low-latency tunable is provided; if enabled, both interactive
5159 - and soft real-time applications are guaranteed a very low latency.
5160 -
5161 - - Latency guarantees are preserved also in the presence of NCQ.
5162 -
5163 - - Also with flash-based devices, a high throughput is achieved
5164 - while still preserving latency guarantees.
5165 -
5166 - - BFQ features Early Queue Merge (EQM), a sort of fusion of the
5167 - cooperating-queue-merging and the preemption mechanisms present
5168 - in CFQ. EQM is in fact a unified mechanism that tries to get a
5169 - sequential read pattern, and hence a high throughput, with any
5170 - set of processes performing interleaved I/O over a contiguous
5171 - sequence of sectors.
5172 -
5173 - - BFQ supports full hierarchical scheduling, exporting a cgroups
5174 - interface. Since each node has a full scheduler, each group can
5175 - be assigned its own weight.
5176 -
5177 - - If the cgroups interface is not used, only I/O priorities can be
5178 - assigned to processes, with ioprio values mapped to weights
5179 - with the relation weight = IOPRIO_BE_NR - ioprio.
5180 -
5181 - - ioprio classes are served in strict priority order, i.e., lower
5182 - priority queues are not served as long as there are higher
5183 - priority queues. Among queues in the same class the bandwidth is
5184 - distributed in proportion to the weight of each queue. A very
5185 - thin extra bandwidth is however guaranteed to the Idle class, to
5186 - prevent it from starving.
5187 -
5188 -Signed-off-by: Paolo Valente <paolo.valente@×××××××.it>
5189 -Signed-off-by: Arianna Avanzini <avanzini.arianna@×××××.com>
5190 ----
5191 - block/bfq-cgroup.c | 926 +++++++++++++++
5192 - block/bfq-ioc.c | 36 +
5193 - block/bfq-iosched.c | 3300 +++++++++++++++++++++++++++++++++++++++++++++++++++
5194 - block/bfq-sched.c | 1078 +++++++++++++++++
5195 - block/bfq.h | 622 ++++++++++
5196 - 5 files changed, 5962 insertions(+)
5197 - create mode 100644 block/bfq-cgroup.c
5198 - create mode 100644 block/bfq-ioc.c
5199 - create mode 100644 block/bfq-iosched.c
5200 - create mode 100644 block/bfq-sched.c
5201 - create mode 100644 block/bfq.h
5202 -
5203 -diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
5204 -new file mode 100644
5205 -index 0000000..bcecdb4
5206 ---- /dev/null
5207 -+++ b/block/bfq-cgroup.c
5208 -@@ -0,0 +1,926 @@
5209 -+/*
5210 -+ * BFQ: CGROUPS support.
5211 -+ *
5212 -+ * Based on ideas and code from CFQ:
5213 -+ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk>
5214 -+ *
5215 -+ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it>
5216 -+ * Paolo Valente <paolo.valente@×××××××.it>
5217 -+ *
5218 -+ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it>
5219 -+ *
5220 -+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file.
5221 -+ */
5222 -+
5223 -+#ifdef CONFIG_CGROUP_BFQIO
5224 -+
5225 -+static DEFINE_MUTEX(bfqio_mutex);
5226 -+
5227 -+static bool bfqio_is_removed(struct bfqio_cgroup *bgrp)
5228 -+{
5229 -+ return bgrp ? !bgrp->online : false;
5230 -+}
5231 -+
5232 -+static struct bfqio_cgroup bfqio_root_cgroup = {
5233 -+ .weight = BFQ_DEFAULT_GRP_WEIGHT,
5234 -+ .ioprio = BFQ_DEFAULT_GRP_IOPRIO,
5235 -+ .ioprio_class = BFQ_DEFAULT_GRP_CLASS,
5236 -+};
5237 -+
5238 -+static inline void bfq_init_entity(struct bfq_entity *entity,
5239 -+ struct bfq_group *bfqg)
5240 -+{
5241 -+ entity->weight = entity->new_weight;
5242 -+ entity->orig_weight = entity->new_weight;
5243 -+ entity->ioprio = entity->new_ioprio;
5244 -+ entity->ioprio_class = entity->new_ioprio_class;
5245 -+ entity->parent = bfqg->my_entity;
5246 -+ entity->sched_data = &bfqg->sched_data;
5247 -+}
5248 -+
5249 -+static struct bfqio_cgroup *css_to_bfqio(struct cgroup_subsys_state *css)
5250 -+{
5251 -+ return css ? container_of(css, struct bfqio_cgroup, css) : NULL;
5252 -+}
5253 -+
5254 -+/*
5255 -+ * Search the bfq_group for bfqd into the hash table (by now only a list)
5256 -+ * of bgrp. Must be called under rcu_read_lock().
5257 -+ */
5258 -+static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp,
5259 -+ struct bfq_data *bfqd)
5260 -+{
5261 -+ struct bfq_group *bfqg;
5262 -+ void *key;
5263 -+
5264 -+ hlist_for_each_entry_rcu(bfqg, &bgrp->group_data, group_node) {
5265 -+ key = rcu_dereference(bfqg->bfqd);
5266 -+ if (key == bfqd)
5267 -+ return bfqg;
5268 -+ }
5269 -+
5270 -+ return NULL;
5271 -+}
5272 -+
5273 -+static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp,
5274 -+ struct bfq_group *bfqg)
5275 -+{
5276 -+ struct bfq_entity *entity = &bfqg->entity;
5277 -+
5278 -+ /*
5279 -+ * If the weight of the entity has never been set via the sysfs
5280 -+ * interface, then bgrp->weight == 0. In this case we initialize
5281 -+ * the weight from the current ioprio value. Otherwise, the group
5282 -+ * weight, if set, has priority over the ioprio value.
5283 -+ */
5284 -+ if (bgrp->weight == 0) {
5285 -+ entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio);
5286 -+ entity->new_ioprio = bgrp->ioprio;
5287 -+ } else {
5288 -+ entity->new_weight = bgrp->weight;
5289 -+ entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight);
5290 -+ }
5291 -+ entity->orig_weight = entity->weight = entity->new_weight;
5292 -+ entity->ioprio = entity->new_ioprio;
5293 -+ entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class;
5294 -+ entity->my_sched_data = &bfqg->sched_data;
5295 -+}
5296 -+
5297 -+static inline void bfq_group_set_parent(struct bfq_group *bfqg,
5298 -+ struct bfq_group *parent)
5299 -+{
5300 -+ struct bfq_entity *entity;
5301 -+
5302 -+ BUG_ON(parent == NULL);
5303 -+ BUG_ON(bfqg == NULL);
5304 -+
5305 -+ entity = &bfqg->entity;
5306 -+ entity->parent = parent->my_entity;
5307 -+ entity->sched_data = &parent->sched_data;
5308 -+}
5309 -+
5310 -+/**
5311 -+ * bfq_group_chain_alloc - allocate a chain of groups.
5312 -+ * @bfqd: queue descriptor.
5313 -+ * @css: the leaf cgroup_subsys_state this chain starts from.
5314 -+ *
5315 -+ * Allocate a chain of groups starting from the one belonging to
5316 -+ * @cgroup up to the root cgroup. Stop if a cgroup on the chain
5317 -+ * to the root has already an allocated group on @bfqd.
5318 -+ */
5319 -+static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd,
5320 -+ struct cgroup_subsys_state *css)
5321 -+{
5322 -+ struct bfqio_cgroup *bgrp;
5323 -+ struct bfq_group *bfqg, *prev = NULL, *leaf = NULL;
5324 -+
5325 -+ for (; css != NULL; css = css->parent) {
5326 -+ bgrp = css_to_bfqio(css);
5327 -+
5328 -+ bfqg = bfqio_lookup_group(bgrp, bfqd);
5329 -+ if (bfqg != NULL) {
5330 -+ /*
5331 -+ * All the cgroups in the path from there to the
5332 -+ * root must have a bfq_group for bfqd, so we don't
5333 -+ * need any more allocations.
5334 -+ */
5335 -+ break;
5336 -+ }
5337 -+
5338 -+ bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC);
5339 -+ if (bfqg == NULL)
5340 -+ goto cleanup;
5341 -+
5342 -+ bfq_group_init_entity(bgrp, bfqg);
5343 -+ bfqg->my_entity = &bfqg->entity;
5344 -+
5345 -+ if (leaf == NULL) {
5346 -+ leaf = bfqg;
5347 -+ prev = leaf;
5348 -+ } else {
5349 -+ bfq_group_set_parent(prev, bfqg);
5350 -+ /*
5351 -+ * Build a list of allocated nodes using the bfqd
5352 -+ * filed, that is still unused and will be initialized
5353 -+ * only after the node will be connected.
5354 -+ */
5355 -+ prev->bfqd = bfqg;
5356 -+ prev = bfqg;
5357 -+ }
5358 -+ }
5359 -+
5360 -+ return leaf;
5361 -+
5362 -+cleanup:
5363 -+ while (leaf != NULL) {
5364 -+ prev = leaf;
5365 -+ leaf = leaf->bfqd;
5366 -+ kfree(prev);
5367 -+ }
5368 -+
5369 -+ return NULL;
5370 -+}
5371 -+
5372 -+/**
5373 -+ * bfq_group_chain_link - link an allocated group chain to a cgroup hierarchy.
5374 -+ * @bfqd: the queue descriptor.
5375 -+ * @css: the leaf cgroup_subsys_state to start from.
5376 -+ * @leaf: the leaf group (to be associated to @cgroup).
5377 -+ *
5378 -+ * Try to link a chain of groups to a cgroup hierarchy, connecting the
5379 -+ * nodes bottom-up, so we can be sure that when we find a cgroup in the
5380 -+ * hierarchy that already as a group associated to @bfqd all the nodes
5381 -+ * in the path to the root cgroup have one too.
5382 -+ *
5383 -+ * On locking: the queue lock protects the hierarchy (there is a hierarchy
5384 -+ * per device) while the bfqio_cgroup lock protects the list of groups
5385 -+ * belonging to the same cgroup.
5386 -+ */
5387 -+static void bfq_group_chain_link(struct bfq_data *bfqd,
5388 -+ struct cgroup_subsys_state *css,
5389 -+ struct bfq_group *leaf)
5390 -+{
5391 -+ struct bfqio_cgroup *bgrp;
5392 -+ struct bfq_group *bfqg, *next, *prev = NULL;
5393 -+ unsigned long flags;
5394 -+
5395 -+ assert_spin_locked(bfqd->queue->queue_lock);
5396 -+
5397 -+ for (; css != NULL && leaf != NULL; css = css->parent) {
5398 -+ bgrp = css_to_bfqio(css);
5399 -+ next = leaf->bfqd;
5400 -+
5401 -+ bfqg = bfqio_lookup_group(bgrp, bfqd);
5402 -+ BUG_ON(bfqg != NULL);
5403 -+
5404 -+ spin_lock_irqsave(&bgrp->lock, flags);
5405 -+
5406 -+ rcu_assign_pointer(leaf->bfqd, bfqd);
5407 -+ hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data);
5408 -+ hlist_add_head(&leaf->bfqd_node, &bfqd->group_list);
5409 -+
5410 -+ spin_unlock_irqrestore(&bgrp->lock, flags);
5411 -+
5412 -+ prev = leaf;
5413 -+ leaf = next;
5414 -+ }
5415 -+
5416 -+ BUG_ON(css == NULL && leaf != NULL);
5417 -+ if (css != NULL && prev != NULL) {
5418 -+ bgrp = css_to_bfqio(css);
5419 -+ bfqg = bfqio_lookup_group(bgrp, bfqd);
5420 -+ bfq_group_set_parent(prev, bfqg);
5421 -+ }
5422 -+}
5423 -+
5424 -+/**
5425 -+ * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup.
5426 -+ * @bfqd: queue descriptor.
5427 -+ * @cgroup: cgroup being searched for.
5428 -+ *
5429 -+ * Return a group associated to @bfqd in @cgroup, allocating one if
5430 -+ * necessary. When a group is returned all the cgroups in the path
5431 -+ * to the root have a group associated to @bfqd.
5432 -+ *
5433 -+ * If the allocation fails, return the root group: this breaks guarantees
5434 -+ * but is a safe fallback. If this loss becomes a problem it can be
5435 -+ * mitigated using the equivalent weight (given by the product of the
5436 -+ * weights of the groups in the path from @group to the root) in the
5437 -+ * root scheduler.
5438 -+ *
5439 -+ * We allocate all the missing nodes in the path from the leaf cgroup
5440 -+ * to the root and we connect the nodes only after all the allocations
5441 -+ * have been successful.
5442 -+ */
5443 -+static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
5444 -+ struct cgroup_subsys_state *css)
5445 -+{
5446 -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css);
5447 -+ struct bfq_group *bfqg;
5448 -+
5449 -+ bfqg = bfqio_lookup_group(bgrp, bfqd);
5450 -+ if (bfqg != NULL)
5451 -+ return bfqg;
5452 -+
5453 -+ bfqg = bfq_group_chain_alloc(bfqd, css);
5454 -+ if (bfqg != NULL)
5455 -+ bfq_group_chain_link(bfqd, css, bfqg);
5456 -+ else
5457 -+ bfqg = bfqd->root_group;
5458 -+
5459 -+ return bfqg;
5460 -+}
5461 -+
5462 -+/**
5463 -+ * bfq_bfqq_move - migrate @bfqq to @bfqg.
5464 -+ * @bfqd: queue descriptor.
5465 -+ * @bfqq: the queue to move.
5466 -+ * @entity: @bfqq's entity.
5467 -+ * @bfqg: the group to move to.
5468 -+ *
5469 -+ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
5470 -+ * it on the new one. Avoid putting the entity on the old group idle tree.
5471 -+ *
5472 -+ * Must be called under the queue lock; the cgroup owning @bfqg must
5473 -+ * not disappear (by now this just means that we are called under
5474 -+ * rcu_read_lock()).
5475 -+ */
5476 -+static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
5477 -+ struct bfq_entity *entity, struct bfq_group *bfqg)
5478 -+{
5479 -+ int busy, resume;
5480 -+
5481 -+ busy = bfq_bfqq_busy(bfqq);
5482 -+ resume = !RB_EMPTY_ROOT(&bfqq->sort_list);
5483 -+
5484 -+ BUG_ON(resume && !entity->on_st);
5485 -+ BUG_ON(busy && !resume && entity->on_st &&
5486 -+ bfqq != bfqd->in_service_queue);
5487 -+
5488 -+ if (busy) {
5489 -+ BUG_ON(atomic_read(&bfqq->ref) < 2);
5490 -+
5491 -+ if (!resume)
5492 -+ bfq_del_bfqq_busy(bfqd, bfqq, 0);
5493 -+ else
5494 -+ bfq_deactivate_bfqq(bfqd, bfqq, 0);
5495 -+ } else if (entity->on_st)
5496 -+ bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
5497 -+
5498 -+ /*
5499 -+ * Here we use a reference to bfqg. We don't need a refcounter
5500 -+ * as the cgroup reference will not be dropped, so that its
5501 -+ * destroy() callback will not be invoked.
5502 -+ */
5503 -+ entity->parent = bfqg->my_entity;
5504 -+ entity->sched_data = &bfqg->sched_data;
5505 -+
5506 -+ if (busy && resume)
5507 -+ bfq_activate_bfqq(bfqd, bfqq);
5508 -+
5509 -+ if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver)
5510 -+ bfq_schedule_dispatch(bfqd);
5511 -+}
5512 -+
5513 -+/**
5514 -+ * __bfq_bic_change_cgroup - move @bic to @cgroup.
5515 -+ * @bfqd: the queue descriptor.
5516 -+ * @bic: the bic to move.
5517 -+ * @cgroup: the cgroup to move to.
5518 -+ *
5519 -+ * Move bic to cgroup, assuming that bfqd->queue is locked; the caller
5520 -+ * has to make sure that the reference to cgroup is valid across the call.
5521 -+ *
5522 -+ * NOTE: an alternative approach might have been to store the current
5523 -+ * cgroup in bfqq and getting a reference to it, reducing the lookup
5524 -+ * time here, at the price of slightly more complex code.
5525 -+ */
5526 -+static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
5527 -+ struct bfq_io_cq *bic,
5528 -+ struct cgroup_subsys_state *css)
5529 -+{
5530 -+ struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
5531 -+ struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
5532 -+ struct bfq_entity *entity;
5533 -+ struct bfq_group *bfqg;
5534 -+ struct bfqio_cgroup *bgrp;
5535 -+
5536 -+ bgrp = css_to_bfqio(css);
5537 -+
5538 -+ bfqg = bfq_find_alloc_group(bfqd, css);
5539 -+ if (async_bfqq != NULL) {
5540 -+ entity = &async_bfqq->entity;
5541 -+
5542 -+ if (entity->sched_data != &bfqg->sched_data) {
5543 -+ bic_set_bfqq(bic, NULL, 0);
5544 -+ bfq_log_bfqq(bfqd, async_bfqq,
5545 -+ "bic_change_group: %p %d",
5546 -+ async_bfqq, atomic_read(&async_bfqq->ref));
5547 -+ bfq_put_queue(async_bfqq);
5548 -+ }
5549 -+ }
5550 -+
5551 -+ if (sync_bfqq != NULL) {
5552 -+ entity = &sync_bfqq->entity;
5553 -+ if (entity->sched_data != &bfqg->sched_data)
5554 -+ bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);
5555 -+ }
5556 -+
5557 -+ return bfqg;
5558 -+}
5559 -+
5560 -+/**
5561 -+ * bfq_bic_change_cgroup - move @bic to @cgroup.
5562 -+ * @bic: the bic being migrated.
5563 -+ * @cgroup: the destination cgroup.
5564 -+ *
5565 -+ * When the task owning @bic is moved to @cgroup, @bic is immediately
5566 -+ * moved into its new parent group.
5567 -+ */
5568 -+static void bfq_bic_change_cgroup(struct bfq_io_cq *bic,
5569 -+ struct cgroup_subsys_state *css)
5570 -+{
5571 -+ struct bfq_data *bfqd;
5572 -+ unsigned long uninitialized_var(flags);
5573 -+
5574 -+ bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),
5575 -+ &flags);
5576 -+ if (bfqd != NULL) {
5577 -+ __bfq_bic_change_cgroup(bfqd, bic, css);
5578 -+ bfq_put_bfqd_unlock(bfqd, &flags);
5579 -+ }
5580 -+}
5581 -+
5582 -+/**
5583 -+ * bfq_bic_update_cgroup - update the cgroup of @bic.
5584 -+ * @bic: the @bic to update.
5585 -+ *
5586 -+ * Make sure that @bic is enqueued in the cgroup of the current task.
5587 -+ * We need this in addition to moving bics during the cgroup attach
5588 -+ * phase because the task owning @bic could be at its first disk
5589 -+ * access or we may end up in the root cgroup as the result of a
5590 -+ * memory allocation failure and here we try to move to the right
5591 -+ * group.
5592 -+ *
5593 -+ * Must be called under the queue lock. It is safe to use the returned
5594 -+ * value even after the rcu_read_unlock() as the migration/destruction
5595 -+ * paths act under the queue lock too. IOW it is impossible to race with
5596 -+ * group migration/destruction and end up with an invalid group as:
5597 -+ * a) here cgroup has not yet been destroyed, nor its destroy callback
5598 -+ * has started execution, as current holds a reference to it,
5599 -+ * b) if it is destroyed after rcu_read_unlock() [after current is
5600 -+ * migrated to a different cgroup] its attach() callback will have
5601 -+ * taken care of remove all the references to the old cgroup data.
5602 -+ */
5603 -+static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic)
5604 -+{
5605 -+ struct bfq_data *bfqd = bic_to_bfqd(bic);
5606 -+ struct bfq_group *bfqg;
5607 -+ struct cgroup_subsys_state *css;
5608 -+
5609 -+ BUG_ON(bfqd == NULL);
5610 -+
5611 -+ rcu_read_lock();
5612 -+ css = task_css(current, bfqio_subsys_id);
5613 -+ bfqg = __bfq_bic_change_cgroup(bfqd, bic, css);
5614 -+ rcu_read_unlock();
5615 -+
5616 -+ return bfqg;
5617 -+}
5618 -+
5619 -+/**
5620 -+ * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
5621 -+ * @st: the service tree being flushed.
5622 -+ */
5623 -+static inline void bfq_flush_idle_tree(struct bfq_service_tree *st)
5624 -+{
5625 -+ struct bfq_entity *entity = st->first_idle;
5626 -+
5627 -+ for (; entity != NULL; entity = st->first_idle)
5628 -+ __bfq_deactivate_entity(entity, 0);
5629 -+}
5630 -+
5631 -+/**
5632 -+ * bfq_reparent_leaf_entity - move leaf entity to the root_group.
5633 -+ * @bfqd: the device data structure with the root group.
5634 -+ * @entity: the entity to move.
5635 -+ */
5636 -+static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
5637 -+ struct bfq_entity *entity)
5638 -+{
5639 -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
5640 -+
5641 -+ BUG_ON(bfqq == NULL);
5642 -+ bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);
5643 -+ return;
5644 -+}
5645 -+
5646 -+/**
5647 -+ * bfq_reparent_active_entities - move to the root group all active entities.
5648 -+ * @bfqd: the device data structure with the root group.
5649 -+ * @bfqg: the group to move from.
5650 -+ * @st: the service tree with the entities.
5651 -+ *
5652 -+ * Needs queue_lock to be taken and reference to be valid over the call.
5653 -+ */
5654 -+static inline void bfq_reparent_active_entities(struct bfq_data *bfqd,
5655 -+ struct bfq_group *bfqg,
5656 -+ struct bfq_service_tree *st)
5657 -+{
5658 -+ struct rb_root *active = &st->active;
5659 -+ struct bfq_entity *entity = NULL;
5660 -+
5661 -+ if (!RB_EMPTY_ROOT(&st->active))
5662 -+ entity = bfq_entity_of(rb_first(active));
5663 -+
5664 -+ for (; entity != NULL; entity = bfq_entity_of(rb_first(active)))
5665 -+ bfq_reparent_leaf_entity(bfqd, entity);
5666 -+
5667 -+ if (bfqg->sched_data.in_service_entity != NULL)
5668 -+ bfq_reparent_leaf_entity(bfqd,
5669 -+ bfqg->sched_data.in_service_entity);
5670 -+
5671 -+ return;
5672 -+}
5673 -+
5674 -+/**
5675 -+ * bfq_destroy_group - destroy @bfqg.
5676 -+ * @bgrp: the bfqio_cgroup containing @bfqg.
5677 -+ * @bfqg: the group being destroyed.
5678 -+ *
5679 -+ * Destroy @bfqg, making sure that it is not referenced from its parent.
5680 -+ */
5681 -+static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg)
5682 -+{
5683 -+ struct bfq_data *bfqd;
5684 -+ struct bfq_service_tree *st;
5685 -+ struct bfq_entity *entity = bfqg->my_entity;
5686 -+ unsigned long uninitialized_var(flags);
5687 -+ int i;
5688 -+
5689 -+ hlist_del(&bfqg->group_node);
5690 -+
5691 -+ /*
5692 -+ * Empty all service_trees belonging to this group before deactivating
5693 -+ * the group itself.
5694 -+ */
5695 -+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
5696 -+ st = bfqg->sched_data.service_tree + i;
5697 -+
5698 -+ /*
5699 -+ * The idle tree may still contain bfq_queues belonging
5700 -+ * to exited task because they never migrated to a different
5701 -+ * cgroup from the one being destroyed now. No one else
5702 -+ * can access them so it's safe to act without any lock.
5703 -+ */
5704 -+ bfq_flush_idle_tree(st);
5705 -+
5706 -+ /*
5707 -+ * It may happen that some queues are still active
5708 -+ * (busy) upon group destruction (if the corresponding
5709 -+ * processes have been forced to terminate). We move
5710 -+ * all the leaf entities corresponding to these queues
5711 -+ * to the root_group.
5712 -+ * Also, it may happen that the group has an entity
5713 -+ * under service, which is disconnected from the active
5714 -+ * tree: it must be moved, too.
5715 -+ * There is no need to put the sync queues, as the
5716 -+ * scheduler has taken no reference.
5717 -+ */
5718 -+ bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
5719 -+ if (bfqd != NULL) {
5720 -+ bfq_reparent_active_entities(bfqd, bfqg, st);
5721 -+ bfq_put_bfqd_unlock(bfqd, &flags);
5722 -+ }
5723 -+ BUG_ON(!RB_EMPTY_ROOT(&st->active));
5724 -+ BUG_ON(!RB_EMPTY_ROOT(&st->idle));
5725 -+ }
5726 -+ BUG_ON(bfqg->sched_data.next_in_service != NULL);
5727 -+ BUG_ON(bfqg->sched_data.in_service_entity != NULL);
5728 -+
5729 -+ /*
5730 -+ * We may race with device destruction, take extra care when
5731 -+ * dereferencing bfqg->bfqd.
5732 -+ */
5733 -+ bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
5734 -+ if (bfqd != NULL) {
5735 -+ hlist_del(&bfqg->bfqd_node);
5736 -+ __bfq_deactivate_entity(entity, 0);
5737 -+ bfq_put_async_queues(bfqd, bfqg);
5738 -+ bfq_put_bfqd_unlock(bfqd, &flags);
5739 -+ }
5740 -+ BUG_ON(entity->tree != NULL);
5741 -+
5742 -+ /*
5743 -+ * No need to defer the kfree() to the end of the RCU grace
5744 -+ * period: we are called from the destroy() callback of our
5745 -+ * cgroup, so we can be sure that no one is a) still using
5746 -+ * this cgroup or b) doing lookups in it.
5747 -+ */
5748 -+ kfree(bfqg);
5749 -+}
5750 -+
5751 -+static void bfq_end_raising_async(struct bfq_data *bfqd)
5752 -+{
5753 -+ struct hlist_node *tmp;
5754 -+ struct bfq_group *bfqg;
5755 -+
5756 -+ hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node)
5757 -+ bfq_end_raising_async_queues(bfqd, bfqg);
5758 -+ bfq_end_raising_async_queues(bfqd, bfqd->root_group);
5759 -+}
5760 -+
5761 -+/**
5762 -+ * bfq_disconnect_groups - disconnect @bfqd from all its groups.
5763 -+ * @bfqd: the device descriptor being exited.
5764 -+ *
5765 -+ * When the device exits we just make sure that no lookup can return
5766 -+ * the now unused group structures. They will be deallocated on cgroup
5767 -+ * destruction.
5768 -+ */
5769 -+static void bfq_disconnect_groups(struct bfq_data *bfqd)
5770 -+{
5771 -+ struct hlist_node *tmp;
5772 -+ struct bfq_group *bfqg;
5773 -+
5774 -+ bfq_log(bfqd, "disconnect_groups beginning");
5775 -+ hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node) {
5776 -+ hlist_del(&bfqg->bfqd_node);
5777 -+
5778 -+ __bfq_deactivate_entity(bfqg->my_entity, 0);
5779 -+
5780 -+ /*
5781 -+ * Don't remove from the group hash, just set an
5782 -+ * invalid key. No lookups can race with the
5783 -+ * assignment as bfqd is being destroyed; this
5784 -+ * implies also that new elements cannot be added
5785 -+ * to the list.
5786 -+ */
5787 -+ rcu_assign_pointer(bfqg->bfqd, NULL);
5788 -+
5789 -+ bfq_log(bfqd, "disconnect_groups: put async for group %p",
5790 -+ bfqg);
5791 -+ bfq_put_async_queues(bfqd, bfqg);
5792 -+ }
5793 -+}
5794 -+
5795 -+static inline void bfq_free_root_group(struct bfq_data *bfqd)
5796 -+{
5797 -+ struct bfqio_cgroup *bgrp = &bfqio_root_cgroup;
5798 -+ struct bfq_group *bfqg = bfqd->root_group;
5799 -+
5800 -+ bfq_put_async_queues(bfqd, bfqg);
5801 -+
5802 -+ spin_lock_irq(&bgrp->lock);
5803 -+ hlist_del_rcu(&bfqg->group_node);
5804 -+ spin_unlock_irq(&bgrp->lock);
5805 -+
5806 -+ /*
5807 -+ * No need to synchronize_rcu() here: since the device is gone
5808 -+ * there cannot be any read-side access to its root_group.
5809 -+ */
5810 -+ kfree(bfqg);
5811 -+}
5812 -+
5813 -+static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
5814 -+{
5815 -+ struct bfq_group *bfqg;
5816 -+ struct bfqio_cgroup *bgrp;
5817 -+ int i;
5818 -+
5819 -+ bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node);
5820 -+ if (bfqg == NULL)
5821 -+ return NULL;
5822 -+
5823 -+ bfqg->entity.parent = NULL;
5824 -+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
5825 -+ bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
5826 -+
5827 -+ bgrp = &bfqio_root_cgroup;
5828 -+ spin_lock_irq(&bgrp->lock);
5829 -+ rcu_assign_pointer(bfqg->bfqd, bfqd);
5830 -+ hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data);
5831 -+ spin_unlock_irq(&bgrp->lock);
5832 -+
5833 -+ return bfqg;
5834 -+}
5835 -+
5836 -+#define SHOW_FUNCTION(__VAR) \
5837 -+static u64 bfqio_cgroup_##__VAR##_read(struct cgroup_subsys_state *css, \
5838 -+ struct cftype *cftype) \
5839 -+{ \
5840 -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); \
5841 -+ u64 ret = -ENODEV; \
5842 -+ \
5843 -+ mutex_lock(&bfqio_mutex); \
5844 -+ if (bfqio_is_removed(bgrp)) \
5845 -+ goto out_unlock; \
5846 -+ \
5847 -+ spin_lock_irq(&bgrp->lock); \
5848 -+ ret = bgrp->__VAR; \
5849 -+ spin_unlock_irq(&bgrp->lock); \
5850 -+ \
5851 -+out_unlock: \
5852 -+ mutex_unlock(&bfqio_mutex); \
5853 -+ return ret; \
5854 -+}
5855 -+
5856 -+SHOW_FUNCTION(weight);
5857 -+SHOW_FUNCTION(ioprio);
5858 -+SHOW_FUNCTION(ioprio_class);
5859 -+#undef SHOW_FUNCTION
5860 -+
5861 -+#define STORE_FUNCTION(__VAR, __MIN, __MAX) \
5862 -+static int bfqio_cgroup_##__VAR##_write(struct cgroup_subsys_state *css,\
5863 -+ struct cftype *cftype, \
5864 -+ u64 val) \
5865 -+{ \
5866 -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); \
5867 -+ struct bfq_group *bfqg; \
5868 -+ int ret = -EINVAL; \
5869 -+ \
5870 -+ if (val < (__MIN) || val > (__MAX)) \
5871 -+ return ret; \
5872 -+ \
5873 -+ ret = -ENODEV; \
5874 -+ mutex_lock(&bfqio_mutex); \
5875 -+ if (bfqio_is_removed(bgrp)) \
5876 -+ goto out_unlock; \
5877 -+ ret = 0; \
5878 -+ \
5879 -+ spin_lock_irq(&bgrp->lock); \
5880 -+ bgrp->__VAR = (unsigned short)val; \
5881 -+ hlist_for_each_entry(bfqg, &bgrp->group_data, group_node) { \
5882 -+ /* \
5883 -+ * Setting the ioprio_changed flag of the entity \
5884 -+ * to 1 with new_##__VAR == ##__VAR would re-set \
5885 -+ * the value of the weight to its ioprio mapping. \
5886 -+ * Set the flag only if necessary. \
5887 -+ */ \
5888 -+ if ((unsigned short)val != bfqg->entity.new_##__VAR) { \
5889 -+ bfqg->entity.new_##__VAR = (unsigned short)val; \
5890 -+ /* \
5891 -+ * Make sure that the above new value has been \
5892 -+ * stored in bfqg->entity.new_##__VAR before \
5893 -+ * setting the ioprio_changed flag. In fact, \
5894 -+ * this flag may be read asynchronously (in \
5895 -+ * critical sections protected by a different \
5896 -+ * lock than that held here), and finding this \
5897 -+ * flag set may cause the execution of the code \
5898 -+ * for updating parameters whose value may \
5899 -+ * depend also on bfqg->entity.new_##__VAR (in \
5900 -+ * __bfq_entity_update_weight_prio). \
5901 -+ * This barrier makes sure that the new value \
5902 -+ * of bfqg->entity.new_##__VAR is correctly \
5903 -+ * seen in that code. \
5904 -+ */ \
5905 -+ smp_wmb(); \
5906 -+ bfqg->entity.ioprio_changed = 1; \
5907 -+ } \
5908 -+ } \
5909 -+ spin_unlock_irq(&bgrp->lock); \
5910 -+ \
5911 -+out_unlock: \
5912 -+ mutex_unlock(&bfqio_mutex); \
5913 -+ return ret; \
5914 -+}
5915 -+
5916 -+STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT);
5917 -+STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1);
5918 -+STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE);
5919 -+#undef STORE_FUNCTION
5920 -+
5921 -+static struct cftype bfqio_files[] = {
5922 -+ {
5923 -+ .name = "weight",
5924 -+ .read_u64 = bfqio_cgroup_weight_read,
5925 -+ .write_u64 = bfqio_cgroup_weight_write,
5926 -+ },
5927 -+ {
5928 -+ .name = "ioprio",
5929 -+ .read_u64 = bfqio_cgroup_ioprio_read,
5930 -+ .write_u64 = bfqio_cgroup_ioprio_write,
5931 -+ },
5932 -+ {
5933 -+ .name = "ioprio_class",
5934 -+ .read_u64 = bfqio_cgroup_ioprio_class_read,
5935 -+ .write_u64 = bfqio_cgroup_ioprio_class_write,
5936 -+ },
5937 -+ { }, /* terminate */
5938 -+};
5939 -+
5940 -+static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys_state
5941 -+ *parent_css)
5942 -+{
5943 -+ struct bfqio_cgroup *bgrp;
5944 -+
5945 -+ if (parent_css != NULL) {
5946 -+ bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL);
5947 -+ if (bgrp == NULL)
5948 -+ return ERR_PTR(-ENOMEM);
5949 -+ } else
5950 -+ bgrp = &bfqio_root_cgroup;
5951 -+
5952 -+ spin_lock_init(&bgrp->lock);
5953 -+ INIT_HLIST_HEAD(&bgrp->group_data);
5954 -+ bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO;
5955 -+ bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS;
5956 -+
5957 -+ return &bgrp->css;
5958 -+}
5959 -+
5960 -+/*
5961 -+ * We cannot support shared io contexts, as we have no means to support
5962 -+ * two tasks with the same ioc in two different groups without major rework
5963 -+ * of the main bic/bfqq data structures. By now we allow a task to change
5964 -+ * its cgroup only if it's the only owner of its ioc; the drawback of this
5965 -+ * behavior is that a group containing a task that forked using CLONE_IO
5966 -+ * will not be destroyed until the tasks sharing the ioc die.
5967 -+ */
5968 -+static int bfqio_can_attach(struct cgroup_subsys_state *css,
5969 -+ struct cgroup_taskset *tset)
5970 -+{
5971 -+ struct task_struct *task;
5972 -+ struct io_context *ioc;
5973 -+ int ret = 0;
5974 -+
5975 -+ cgroup_taskset_for_each(task, css, tset) {
5976 -+ /*
5977 -+ * task_lock() is needed to avoid races with
5978 -+ * exit_io_context()
5979 -+ */
5980 -+ task_lock(task);
5981 -+ ioc = task->io_context;
5982 -+ if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1)
5983 -+ /*
5984 -+ * ioc == NULL means that the task is either too young
5985 -+ * or exiting: if it has still no ioc the ioc can't be
5986 -+ * shared, if the task is exiting the attach will fail
5987 -+ * anyway, no matter what we return here.
5988 -+ */
5989 -+ ret = -EINVAL;
5990 -+ task_unlock(task);
5991 -+ if (ret)
5992 -+ break;
5993 -+ }
5994 -+
5995 -+ return ret;
5996 -+}
5997 -+
5998 -+static void bfqio_attach(struct cgroup_subsys_state *css,
5999 -+ struct cgroup_taskset *tset)
6000 -+{
6001 -+ struct task_struct *task;
6002 -+ struct io_context *ioc;
6003 -+ struct io_cq *icq;
6004 -+
6005 -+ /*
6006 -+ * IMPORTANT NOTE: The move of more than one process at a time to a
6007 -+ * new group has not yet been tested.
6008 -+ */
6009 -+ cgroup_taskset_for_each(task, css, tset) {
6010 -+ ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
6011 -+ if (ioc) {
6012 -+ /*
6013 -+ * Handle cgroup change here.
6014 -+ */
6015 -+ rcu_read_lock();
6016 -+ hlist_for_each_entry_rcu(icq, &ioc->icq_list, ioc_node)
6017 -+ if (!strncmp(
6018 -+ icq->q->elevator->type->elevator_name,
6019 -+ "bfq", ELV_NAME_MAX))
6020 -+ bfq_bic_change_cgroup(icq_to_bic(icq),
6021 -+ css);
6022 -+ rcu_read_unlock();
6023 -+ put_io_context(ioc);
6024 -+ }
6025 -+ }
6026 -+}
6027 -+
6028 -+static void bfqio_destroy(struct cgroup_subsys_state *css)
6029 -+{
6030 -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css);
6031 -+ struct hlist_node *tmp;
6032 -+ struct bfq_group *bfqg;
6033 -+
6034 -+ /*
6035 -+ * Since we are destroying the cgroup, there are no more tasks
6036 -+ * referencing it, and all the RCU grace periods that may have
6037 -+ * referenced it are ended (as the destruction of the parent
6038 -+ * cgroup is RCU-safe); bgrp->group_data will not be accessed by
6039 -+ * anything else and we don't need any synchronization.
6040 -+ */
6041 -+ hlist_for_each_entry_safe(bfqg, tmp, &bgrp->group_data, group_node)
6042 -+ bfq_destroy_group(bgrp, bfqg);
6043 -+
6044 -+ BUG_ON(!hlist_empty(&bgrp->group_data));
6045 -+
6046 -+ kfree(bgrp);
6047 -+}
6048 -+
6049 -+static int bfqio_css_online(struct cgroup_subsys_state *css)
6050 -+{
6051 -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css);
6052 -+
6053 -+ mutex_lock(&bfqio_mutex);
6054 -+ bgrp->online = true;
6055 -+ mutex_unlock(&bfqio_mutex);
6056 -+
6057 -+ return 0;
6058 -+}
6059 -+
6060 -+static void bfqio_css_offline(struct cgroup_subsys_state *css)
6061 -+{
6062 -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css);
6063 -+
6064 -+ mutex_lock(&bfqio_mutex);
6065 -+ bgrp->online = false;
6066 -+ mutex_unlock(&bfqio_mutex);
6067 -+}
6068 -+
6069 -+struct cgroup_subsys bfqio_subsys = {
6070 -+ .name = "bfqio",
6071 -+ .css_alloc = bfqio_create,
6072 -+ .css_online = bfqio_css_online,
6073 -+ .css_offline = bfqio_css_offline,
6074 -+ .can_attach = bfqio_can_attach,
6075 -+ .attach = bfqio_attach,
6076 -+ .css_free = bfqio_destroy,
6077 -+ .subsys_id = bfqio_subsys_id,
6078 -+ .base_cftypes = bfqio_files,
6079 -+};
6080 -+#else
6081 -+static inline void bfq_init_entity(struct bfq_entity *entity,
6082 -+ struct bfq_group *bfqg)
6083 -+{
6084 -+ entity->weight = entity->new_weight;
6085 -+ entity->orig_weight = entity->new_weight;
6086 -+ entity->ioprio = entity->new_ioprio;
6087 -+ entity->ioprio_class = entity->new_ioprio_class;
6088 -+ entity->sched_data = &bfqg->sched_data;
6089 -+}
6090 -+
6091 -+static inline struct bfq_group *
6092 -+bfq_bic_update_cgroup(struct bfq_io_cq *bic)
6093 -+{
6094 -+ struct bfq_data *bfqd = bic_to_bfqd(bic);
6095 -+ return bfqd->root_group;
6096 -+}
6097 -+
6098 -+static inline void bfq_bfqq_move(struct bfq_data *bfqd,
6099 -+ struct bfq_queue *bfqq,
6100 -+ struct bfq_entity *entity,
6101 -+ struct bfq_group *bfqg)
6102 -+{
6103 -+}
6104 -+
6105 -+static void bfq_end_raising_async(struct bfq_data *bfqd)
6106 -+{
6107 -+ bfq_end_raising_async_queues(bfqd, bfqd->root_group);
6108 -+}
6109 -+
6110 -+static inline void bfq_disconnect_groups(struct bfq_data *bfqd)
6111 -+{
6112 -+ bfq_put_async_queues(bfqd, bfqd->root_group);
6113 -+}
6114 -+
6115 -+static inline void bfq_free_root_group(struct bfq_data *bfqd)
6116 -+{
6117 -+ kfree(bfqd->root_group);
6118 -+}
6119 -+
6120 -+static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
6121 -+{
6122 -+ struct bfq_group *bfqg;
6123 -+ int i;
6124 -+
6125 -+ bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
6126 -+ if (bfqg == NULL)
6127 -+ return NULL;
6128 -+
6129 -+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
6130 -+ bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
6131 -+
6132 -+ return bfqg;
6133 -+}
6134 -+#endif
6135 -diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c
6136 -new file mode 100644
6137 -index 0000000..7f6b000
6138 ---- /dev/null
6139 -+++ b/block/bfq-ioc.c
6140 -@@ -0,0 +1,36 @@
6141 -+/*
6142 -+ * BFQ: I/O context handling.
6143 -+ *
6144 -+ * Based on ideas and code from CFQ:
6145 -+ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk>
6146 -+ *
6147 -+ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it>
6148 -+ * Paolo Valente <paolo.valente@×××××××.it>
6149 -+ *
6150 -+ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it>
6151 -+ */
6152 -+
6153 -+/**
6154 -+ * icq_to_bic - convert iocontext queue structure to bfq_io_cq.
6155 -+ * @icq: the iocontext queue.
6156 -+ */
6157 -+static inline struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
6158 -+{
6159 -+ /* bic->icq is the first member, %NULL will convert to %NULL */
6160 -+ return container_of(icq, struct bfq_io_cq, icq);
6161 -+}
6162 -+
6163 -+/**
6164 -+ * bfq_bic_lookup - search into @ioc a bic associated to @bfqd.
6165 -+ * @bfqd: the lookup key.
6166 -+ * @ioc: the io_context of the process doing I/O.
6167 -+ *
6168 -+ * Queue lock must be held.
6169 -+ */
6170 -+static inline struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,
6171 -+ struct io_context *ioc)
6172 -+{
6173 -+ if (ioc)
6174 -+ return icq_to_bic(ioc_lookup_icq(ioc, bfqd->queue));
6175 -+ return NULL;
6176 -+}
6177 -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
6178 -new file mode 100644
6179 -index 0000000..f5f71e4
6180 ---- /dev/null
6181 -+++ b/block/bfq-iosched.c
6182 -@@ -0,0 +1,3300 @@
6183 -+/*
6184 -+ * Budget Fair Queueing (BFQ) disk scheduler.
6185 -+ *
6186 -+ * Based on ideas and code from CFQ:
6187 -+ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk>
6188 -+ *
6189 -+ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it>
6190 -+ * Paolo Valente <paolo.valente@×××××××.it>
6191 -+ *
6192 -+ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it>
6193 -+ *
6194 -+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file.
6195 -+ *
6196 -+ * BFQ is a proportional share disk scheduling algorithm based on the
6197 -+ * slice-by-slice service scheme of CFQ. But BFQ assigns budgets, measured in
6198 -+ * number of sectors, to tasks instead of time slices. The disk is not granted
6199 -+ * to the in-service task for a given time slice, but until it has exhausted
6200 -+ * its assigned budget. This change from the time to the service domain allows
6201 -+ * BFQ to distribute the disk bandwidth among tasks as desired, without any
6202 -+ * distortion due to ZBR, workload fluctuations or other factors. BFQ uses an
6203 -+ * ad hoc internal scheduler, called B-WF2Q+, to schedule tasks according to
6204 -+ * their budgets (more precisely BFQ schedules queues associated to tasks).
6205 -+ * Thanks to this accurate scheduler, BFQ can afford to assign high budgets to
6206 -+ * disk-bound non-seeky tasks (to boost the throughput), and yet guarantee low
6207 -+ * latencies to interactive and soft real-time applications.
6208 -+ *
6209 -+ * BFQ is described in [1], where also a reference to the initial, more
6210 -+ * theoretical paper on BFQ can be found. The interested reader can find in
6211 -+ * the latter paper full details on the main algorithm as well as formulas of
6212 -+ * the guarantees, plus formal proofs of all the properties. With respect to
6213 -+ * the version of BFQ presented in these papers, this implementation adds a
6214 -+ * few more heuristics, such as the one that guarantees a low latency to soft
6215 -+ * real-time applications, and a hierarchical extension based on H-WF2Q+.
6216 -+ *
6217 -+ * B-WF2Q+ is based on WF2Q+, that is described in [2], together with
6218 -+ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N)
6219 -+ * complexity derives from the one introduced with EEVDF in [3].
6220 -+ *
6221 -+ * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness
6222 -+ * with the BFQ Disk I/O Scheduler'',
6223 -+ * Proceedings of the 5th Annual International Systems and Storage
6224 -+ * Conference (SYSTOR '12), June 2012.
6225 -+ *
6226 -+ * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf
6227 -+ *
6228 -+ * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing
6229 -+ * Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689,
6230 -+ * Oct 1997.
6231 -+ *
6232 -+ * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz
6233 -+ *
6234 -+ * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline
6235 -+ * First: A Flexible and Accurate Mechanism for Proportional Share
6236 -+ * Resource Allocation,'' technical report.
6237 -+ *
6238 -+ * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf
6239 -+ */
6240 -+#include <linux/module.h>
6241 -+#include <linux/slab.h>
6242 -+#include <linux/blkdev.h>
6243 -+#include <linux/cgroup.h>
6244 -+#include <linux/elevator.h>
6245 -+#include <linux/jiffies.h>
6246 -+#include <linux/rbtree.h>
6247 -+#include <linux/ioprio.h>
6248 -+#include "bfq.h"
6249 -+#include "blk.h"
6250 -+
6251 -+/* Max number of dispatches in one round of service. */
6252 -+static const int bfq_quantum = 4;
6253 -+
6254 -+/* Expiration time of sync (0) and async (1) requests, in jiffies. */
6255 -+static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
6256 -+
6257 -+/* Maximum backwards seek, in KiB. */
6258 -+static const int bfq_back_max = 16 * 1024;
6259 -+
6260 -+/* Penalty of a backwards seek, in number of sectors. */
6261 -+static const int bfq_back_penalty = 2;
6262 -+
6263 -+/* Idling period duration, in jiffies. */
6264 -+static int bfq_slice_idle = HZ / 125;
6265 -+
6266 -+/* Default maximum budget values, in sectors and number of requests. */
6267 -+static const int bfq_default_max_budget = 16 * 1024;
6268 -+static const int bfq_max_budget_async_rq = 4;
6269 -+
6270 -+/*
6271 -+ * Async to sync throughput distribution is controlled as follows:
6272 -+ * when an async request is served, the entity is charged the number
6273 -+ * of sectors of the request, multiplied by the factor below
6274 -+ */
6275 -+static const int bfq_async_charge_factor = 10;
6276 -+
6277 -+/* Default timeout values, in jiffies, approximating CFQ defaults. */
6278 -+static const int bfq_timeout_sync = HZ / 8;
6279 -+static int bfq_timeout_async = HZ / 25;
6280 -+
6281 -+struct kmem_cache *bfq_pool;
6282 -+
6283 -+/* Below this threshold (in ms), we consider thinktime immediate. */
6284 -+#define BFQ_MIN_TT 2
6285 -+
6286 -+/* hw_tag detection: parallel requests threshold and min samples needed. */
6287 -+#define BFQ_HW_QUEUE_THRESHOLD 4
6288 -+#define BFQ_HW_QUEUE_SAMPLES 32
6289 -+
6290 -+#define BFQQ_SEEK_THR (sector_t)(8 * 1024)
6291 -+#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR)
6292 -+
6293 -+/* Min samples used for peak rate estimation (for autotuning). */
6294 -+#define BFQ_PEAK_RATE_SAMPLES 32
6295 -+
6296 -+/* Shift used for peak rate fixed precision calculations. */
6297 -+#define BFQ_RATE_SHIFT 16
6298 -+
6299 -+/*
6300 -+ * The duration of the weight raising for interactive applications is
6301 -+ * computed automatically (as default behaviour), using the following
6302 -+ * formula: duration = (R / r) * T, where r is the peak rate of the
6303 -+ * disk, and R and T are two reference parameters. In particular, R is
6304 -+ * the peak rate of a reference disk, and T is about the maximum time
6305 -+ * for starting popular large applications on that disk, under BFQ and
6306 -+ * while reading two files in parallel. Finally, BFQ uses two
6307 -+ * different pairs (R, T) depending on whether the disk is rotational
6308 -+ * or non-rotational.
6309 -+ */
6310 -+#define T_rot (msecs_to_jiffies(5500))
6311 -+#define T_nonrot (msecs_to_jiffies(2000))
6312 -+/* Next two quantities are in sectors/usec, left-shifted by BFQ_RATE_SHIFT */
6313 -+#define R_rot 17415
6314 -+#define R_nonrot 34791
6315 -+
6316 -+#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \
6317 -+ { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 })
6318 -+
6319 -+#define RQ_BIC(rq) ((struct bfq_io_cq *) (rq)->elv.priv[0])
6320 -+#define RQ_BFQQ(rq) ((rq)->elv.priv[1])
6321 -+
6322 -+static inline void bfq_schedule_dispatch(struct bfq_data *bfqd);
6323 -+
6324 -+#include "bfq-ioc.c"
6325 -+#include "bfq-sched.c"
6326 -+#include "bfq-cgroup.c"
6327 -+
6328 -+#define bfq_class_idle(bfqq) ((bfqq)->entity.ioprio_class ==\
6329 -+ IOPRIO_CLASS_IDLE)
6330 -+#define bfq_class_rt(bfqq) ((bfqq)->entity.ioprio_class ==\
6331 -+ IOPRIO_CLASS_RT)
6332 -+
6333 -+#define bfq_sample_valid(samples) ((samples) > 80)
6334 -+
6335 -+/*
6336 -+ * We regard a request as SYNC, if either it's a read or has the SYNC bit
6337 -+ * set (in which case it could also be a direct WRITE).
6338 -+ */
6339 -+static inline int bfq_bio_sync(struct bio *bio)
6340 -+{
6341 -+ if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC))
6342 -+ return 1;
6343 -+
6344 -+ return 0;
6345 -+}
6346 -+
6347 -+/*
6348 -+ * Scheduler run of queue, if there are requests pending and no one in the
6349 -+ * driver that will restart queueing.
6350 -+ */
6351 -+static inline void bfq_schedule_dispatch(struct bfq_data *bfqd)
6352 -+{
6353 -+ if (bfqd->queued != 0) {
6354 -+ bfq_log(bfqd, "schedule dispatch");
6355 -+ kblockd_schedule_work(bfqd->queue, &bfqd->unplug_work);
6356 -+ }
6357 -+}
6358 -+
6359 -+/*
6360 -+ * Lifted from AS - choose which of rq1 and rq2 that is best served now.
6361 -+ * We choose the request that is closesr to the head right now. Distance
6362 -+ * behind the head is penalized and only allowed to a certain extent.
6363 -+ */
6364 -+static struct request *bfq_choose_req(struct bfq_data *bfqd,
6365 -+ struct request *rq1,
6366 -+ struct request *rq2,
6367 -+ sector_t last)
6368 -+{
6369 -+ sector_t s1, s2, d1 = 0, d2 = 0;
6370 -+ unsigned long back_max;
6371 -+#define BFQ_RQ1_WRAP 0x01 /* request 1 wraps */
6372 -+#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */
6373 -+ unsigned wrap = 0; /* bit mask: requests behind the disk head? */
6374 -+
6375 -+ if (rq1 == NULL || rq1 == rq2)
6376 -+ return rq2;
6377 -+ if (rq2 == NULL)
6378 -+ return rq1;
6379 -+
6380 -+ if (rq_is_sync(rq1) && !rq_is_sync(rq2))
6381 -+ return rq1;
6382 -+ else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
6383 -+ return rq2;
6384 -+ if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))
6385 -+ return rq1;
6386 -+ else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META))
6387 -+ return rq2;
6388 -+
6389 -+ s1 = blk_rq_pos(rq1);
6390 -+ s2 = blk_rq_pos(rq2);
6391 -+
6392 -+ /*
6393 -+ * By definition, 1KiB is 2 sectors.
6394 -+ */
6395 -+ back_max = bfqd->bfq_back_max * 2;
6396 -+
6397 -+ /*
6398 -+ * Strict one way elevator _except_ in the case where we allow
6399 -+ * short backward seeks which are biased as twice the cost of a
6400 -+ * similar forward seek.
6401 -+ */
6402 -+ if (s1 >= last)
6403 -+ d1 = s1 - last;
6404 -+ else if (s1 + back_max >= last)
6405 -+ d1 = (last - s1) * bfqd->bfq_back_penalty;
6406 -+ else
6407 -+ wrap |= BFQ_RQ1_WRAP;
6408 -+
6409 -+ if (s2 >= last)
6410 -+ d2 = s2 - last;
6411 -+ else if (s2 + back_max >= last)
6412 -+ d2 = (last - s2) * bfqd->bfq_back_penalty;
6413 -+ else
6414 -+ wrap |= BFQ_RQ2_WRAP;
6415 -+
6416 -+ /* Found required data */
6417 -+
6418 -+ /*
6419 -+ * By doing switch() on the bit mask "wrap" we avoid having to
6420 -+ * check two variables for all permutations: --> faster!
6421 -+ */
6422 -+ switch (wrap) {
6423 -+ case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
6424 -+ if (d1 < d2)
6425 -+ return rq1;
6426 -+ else if (d2 < d1)
6427 -+ return rq2;
6428 -+ else {
6429 -+ if (s1 >= s2)
6430 -+ return rq1;
6431 -+ else
6432 -+ return rq2;
6433 -+ }
6434 -+
6435 -+ case BFQ_RQ2_WRAP:
6436 -+ return rq1;
6437 -+ case BFQ_RQ1_WRAP:
6438 -+ return rq2;
6439 -+ case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */
6440 -+ default:
6441 -+ /*
6442 -+ * Since both rqs are wrapped,
6443 -+ * start with the one that's further behind head
6444 -+ * (--> only *one* back seek required),
6445 -+ * since back seek takes more time than forward.
6446 -+ */
6447 -+ if (s1 <= s2)
6448 -+ return rq1;
6449 -+ else
6450 -+ return rq2;
6451 -+ }
6452 -+}
6453 -+
6454 -+static struct bfq_queue *
6455 -+bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root,
6456 -+ sector_t sector, struct rb_node **ret_parent,
6457 -+ struct rb_node ***rb_link)
6458 -+{
6459 -+ struct rb_node **p, *parent;
6460 -+ struct bfq_queue *bfqq = NULL;
6461 -+
6462 -+ parent = NULL;
6463 -+ p = &root->rb_node;
6464 -+ while (*p) {
6465 -+ struct rb_node **n;
6466 -+
6467 -+ parent = *p;
6468 -+ bfqq = rb_entry(parent, struct bfq_queue, pos_node);
6469 -+
6470 -+ /*
6471 -+ * Sort strictly based on sector. Smallest to the left,
6472 -+ * largest to the right.
6473 -+ */
6474 -+ if (sector > blk_rq_pos(bfqq->next_rq))
6475 -+ n = &(*p)->rb_right;
6476 -+ else if (sector < blk_rq_pos(bfqq->next_rq))
6477 -+ n = &(*p)->rb_left;
6478 -+ else
6479 -+ break;
6480 -+ p = n;
6481 -+ bfqq = NULL;
6482 -+ }
6483 -+
6484 -+ *ret_parent = parent;
6485 -+ if (rb_link)
6486 -+ *rb_link = p;
6487 -+
6488 -+ bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d",
6489 -+ (long long unsigned)sector,
6490 -+ bfqq != NULL ? bfqq->pid : 0);
6491 -+
6492 -+ return bfqq;
6493 -+}
6494 -+
6495 -+static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq)
6496 -+{
6497 -+ struct rb_node **p, *parent;
6498 -+ struct bfq_queue *__bfqq;
6499 -+
6500 -+ if (bfqq->pos_root != NULL) {
6501 -+ rb_erase(&bfqq->pos_node, bfqq->pos_root);
6502 -+ bfqq->pos_root = NULL;
6503 -+ }
6504 -+
6505 -+ if (bfq_class_idle(bfqq))
6506 -+ return;
6507 -+ if (!bfqq->next_rq)
6508 -+ return;
6509 -+
6510 -+ bfqq->pos_root = &bfqd->rq_pos_tree;
6511 -+ __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,
6512 -+ blk_rq_pos(bfqq->next_rq), &parent, &p);
6513 -+ if (__bfqq == NULL) {
6514 -+ rb_link_node(&bfqq->pos_node, parent, p);
6515 -+ rb_insert_color(&bfqq->pos_node, bfqq->pos_root);
6516 -+ } else
6517 -+ bfqq->pos_root = NULL;
6518 -+}
6519 -+
6520 -+static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
6521 -+ struct bfq_queue *bfqq,
6522 -+ struct request *last)
6523 -+{
6524 -+ struct rb_node *rbnext = rb_next(&last->rb_node);
6525 -+ struct rb_node *rbprev = rb_prev(&last->rb_node);
6526 -+ struct request *next = NULL, *prev = NULL;
6527 -+
6528 -+ BUG_ON(RB_EMPTY_NODE(&last->rb_node));
6529 -+
6530 -+ if (rbprev != NULL)
6531 -+ prev = rb_entry_rq(rbprev);
6532 -+
6533 -+ if (rbnext != NULL)
6534 -+ next = rb_entry_rq(rbnext);
6535 -+ else {
6536 -+ rbnext = rb_first(&bfqq->sort_list);
6537 -+ if (rbnext && rbnext != &last->rb_node)
6538 -+ next = rb_entry_rq(rbnext);
6539 -+ }
6540 -+
6541 -+ return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last));
6542 -+}
6543 -+
6544 -+static void bfq_del_rq_rb(struct request *rq)
6545 -+{
6546 -+ struct bfq_queue *bfqq = RQ_BFQQ(rq);
6547 -+ struct bfq_data *bfqd = bfqq->bfqd;
6548 -+ const int sync = rq_is_sync(rq);
6549 -+
6550 -+ BUG_ON(bfqq->queued[sync] == 0);
6551 -+ bfqq->queued[sync]--;
6552 -+ bfqd->queued--;
6553 -+
6554 -+ elv_rb_del(&bfqq->sort_list, rq);
6555 -+
6556 -+ if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
6557 -+ if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue)
6558 -+ bfq_del_bfqq_busy(bfqd, bfqq, 1);
6559 -+ /*
6560 -+ * Remove queue from request-position tree as it is empty.
6561 -+ */
6562 -+ if (bfqq->pos_root != NULL) {
6563 -+ rb_erase(&bfqq->pos_node, bfqq->pos_root);
6564 -+ bfqq->pos_root = NULL;
6565 -+ }
6566 -+ }
6567 -+}
6568 -+
6569 -+/* see the definition of bfq_async_charge_factor for details */
6570 -+static inline unsigned long bfq_serv_to_charge(struct request *rq,
6571 -+ struct bfq_queue *bfqq)
6572 -+{
6573 -+ return blk_rq_sectors(rq) *
6574 -+ (1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->raising_coeff == 1) *
6575 -+ bfq_async_charge_factor));
6576 -+}
6577 -+
6578 -+/**
6579 -+ * bfq_updated_next_req - update the queue after a new next_rq selection.
6580 -+ * @bfqd: the device data the queue belongs to.
6581 -+ * @bfqq: the queue to update.
6582 -+ *
6583 -+ * If the first request of a queue changes we make sure that the queue
6584 -+ * has enough budget to serve at least its first request (if the
6585 -+ * request has grown). We do this because if the queue has not enough
6586 -+ * budget for its first request, it has to go through two dispatch
6587 -+ * rounds to actually get it dispatched.
6588 -+ */
6589 -+static void bfq_updated_next_req(struct bfq_data *bfqd,
6590 -+ struct bfq_queue *bfqq)
6591 -+{
6592 -+ struct bfq_entity *entity = &bfqq->entity;
6593 -+ struct bfq_service_tree *st = bfq_entity_service_tree(entity);
6594 -+ struct request *next_rq = bfqq->next_rq;
6595 -+ unsigned long new_budget;
6596 -+
6597 -+ if (next_rq == NULL)
6598 -+ return;
6599 -+
6600 -+ if (bfqq == bfqd->in_service_queue)
6601 -+ /*
6602 -+ * In order not to break guarantees, budgets cannot be
6603 -+ * changed after an entity has been selected.
6604 -+ */
6605 -+ return;
6606 -+
6607 -+ BUG_ON(entity->tree != &st->active);
6608 -+ BUG_ON(entity == entity->sched_data->in_service_entity);
6609 -+
6610 -+ new_budget = max_t(unsigned long, bfqq->max_budget,
6611 -+ bfq_serv_to_charge(next_rq, bfqq));
6612 -+ entity->budget = new_budget;
6613 -+ bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", new_budget);
6614 -+ bfq_activate_bfqq(bfqd, bfqq);
6615 -+}
6616 -+
6617 -+static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd)
6618 -+{
6619 -+ u64 dur;
6620 -+
6621 -+ if (bfqd->bfq_raising_max_time > 0)
6622 -+ return bfqd->bfq_raising_max_time;
6623 -+
6624 -+ dur = bfqd->RT_prod;
6625 -+ do_div(dur, bfqd->peak_rate);
6626 -+
6627 -+ return dur;
6628 -+}
6629 -+
6630 -+static void bfq_add_rq_rb(struct request *rq)
6631 -+{
6632 -+ struct bfq_queue *bfqq = RQ_BFQQ(rq);
6633 -+ struct bfq_entity *entity = &bfqq->entity;
6634 -+ struct bfq_data *bfqd = bfqq->bfqd;
6635 -+ struct request *next_rq, *prev;
6636 -+ unsigned long old_raising_coeff = bfqq->raising_coeff;
6637 -+ int idle_for_long_time = 0;
6638 -+
6639 -+ bfq_log_bfqq(bfqd, bfqq, "add_rq_rb %d", rq_is_sync(rq));
6640 -+ bfqq->queued[rq_is_sync(rq)]++;
6641 -+ bfqd->queued++;
6642 -+
6643 -+ elv_rb_add(&bfqq->sort_list, rq);
6644 -+
6645 -+ /*
6646 -+ * Check if this request is a better next-serve candidate.
6647 -+ */
6648 -+ prev = bfqq->next_rq;
6649 -+ next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);
6650 -+ BUG_ON(next_rq == NULL);
6651 -+ bfqq->next_rq = next_rq;
6652 -+
6653 -+ /*
6654 -+ * Adjust priority tree position, if next_rq changes.
6655 -+ */
6656 -+ if (prev != bfqq->next_rq)
6657 -+ bfq_rq_pos_tree_add(bfqd, bfqq);
6658 -+
6659 -+ if (!bfq_bfqq_busy(bfqq)) {
6660 -+ int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 &&
6661 -+ time_is_before_jiffies(bfqq->soft_rt_next_start);
6662 -+ idle_for_long_time = time_is_before_jiffies(
6663 -+ bfqq->budget_timeout +
6664 -+ bfqd->bfq_raising_min_idle_time);
6665 -+ entity->budget = max_t(unsigned long, bfqq->max_budget,
6666 -+ bfq_serv_to_charge(next_rq, bfqq));
6667 -+
6668 -+ if (!bfqd->low_latency)
6669 -+ goto add_bfqq_busy;
6670 -+
6671 -+ /*
6672 -+ * If the queue is not being boosted and has been idle
6673 -+ * for enough time, start a weight-raising period
6674 -+ */
6675 -+ if (old_raising_coeff == 1 &&
6676 -+ (idle_for_long_time || soft_rt)) {
6677 -+ bfqq->raising_coeff = bfqd->bfq_raising_coeff;
6678 -+ if (idle_for_long_time)
6679 -+ bfqq->raising_cur_max_time =
6680 -+ bfq_wrais_duration(bfqd);
6681 -+ else
6682 -+ bfqq->raising_cur_max_time =
6683 -+ bfqd->bfq_raising_rt_max_time;
6684 -+ bfq_log_bfqq(bfqd, bfqq,
6685 -+ "wrais starting at %lu, "
6686 -+ "rais_max_time %u",
6687 -+ jiffies,
6688 -+ jiffies_to_msecs(bfqq->
6689 -+ raising_cur_max_time));
6690 -+ } else if (old_raising_coeff > 1) {
6691 -+ if (idle_for_long_time)
6692 -+ bfqq->raising_cur_max_time =
6693 -+ bfq_wrais_duration(bfqd);
6694 -+ else if (bfqq->raising_cur_max_time ==
6695 -+ bfqd->bfq_raising_rt_max_time &&
6696 -+ !soft_rt) {
6697 -+ bfqq->raising_coeff = 1;
6698 -+ bfq_log_bfqq(bfqd, bfqq,
6699 -+ "wrais ending at %lu, "
6700 -+ "rais_max_time %u",
6701 -+ jiffies,
6702 -+ jiffies_to_msecs(bfqq->
6703 -+ raising_cur_max_time));
6704 -+ } else if (time_before(
6705 -+ bfqq->last_rais_start_finish +
6706 -+ bfqq->raising_cur_max_time,
6707 -+ jiffies +
6708 -+ bfqd->bfq_raising_rt_max_time) &&
6709 -+ soft_rt) {
6710 -+ /*
6711 -+ *
6712 -+ * The remaining weight-raising time is lower
6713 -+ * than bfqd->bfq_raising_rt_max_time, which
6714 -+ * means that the application is enjoying
6715 -+ * weight raising either because deemed soft-
6716 -+ * rt in the near past, or because deemed
6717 -+ * interactive a long ago. In both cases,
6718 -+ * resetting now the current remaining weight-
6719 -+ * raising time for the application to the
6720 -+ * weight-raising duration for soft rt
6721 -+ * applications would not cause any latency
6722 -+ * increase for the application (as the new
6723 -+ * duration would be higher than the remaining
6724 -+ * time).
6725 -+ *
6726 -+ * In addition, the application is now meeting
6727 -+ * the requirements for being deemed soft rt.
6728 -+ * In the end we can correctly and safely
6729 -+ * (re)charge the weight-raising duration for
6730 -+ * the application with the weight-raising
6731 -+ * duration for soft rt applications.
6732 -+ *
6733 -+ * In particular, doing this recharge now, i.e.,
6734 -+ * before the weight-raising period for the
6735 -+ * application finishes, reduces the probability
6736 -+ * of the following negative scenario:
6737 -+ * 1) the weight of a soft rt application is
6738 -+ * raised at startup (as for any newly
6739 -+ * created application),
6740 -+ * 2) since the application is not interactive,
6741 -+ * at a certain time weight-raising is
6742 -+ * stopped for the application,
6743 -+ * 3) at that time the application happens to
6744 -+ * still have pending requests, and hence
6745 -+ * is destined to not have a chance to be
6746 -+ * deemed soft rt before these requests are
6747 -+ * completed (see the comments to the
6748 -+ * function bfq_bfqq_softrt_next_start()
6749 -+ * for details on soft rt detection),
6750 -+ * 4) these pending requests experience a high
6751 -+ * latency because the application is not
6752 -+ * weight-raised while they are pending.
6753 -+ */
6754 -+ bfqq->last_rais_start_finish = jiffies;
6755 -+ bfqq->raising_cur_max_time =
6756 -+ bfqd->bfq_raising_rt_max_time;
6757 -+ }
6758 -+ }
6759 -+ if (old_raising_coeff != bfqq->raising_coeff)
6760 -+ entity->ioprio_changed = 1;
6761 -+add_bfqq_busy:
6762 -+ bfqq->last_idle_bklogged = jiffies;
6763 -+ bfqq->service_from_backlogged = 0;
6764 -+ bfq_clear_bfqq_softrt_update(bfqq);
6765 -+ bfq_add_bfqq_busy(bfqd, bfqq);
6766 -+ } else {
6767 -+ if (bfqd->low_latency && old_raising_coeff == 1 &&
6768 -+ !rq_is_sync(rq) &&
6769 -+ time_is_before_jiffies(
6770 -+ bfqq->last_rais_start_finish +
6771 -+ bfqd->bfq_raising_min_inter_arr_async)) {
6772 -+ bfqq->raising_coeff = bfqd->bfq_raising_coeff;
6773 -+ bfqq->raising_cur_max_time = bfq_wrais_duration(bfqd);
6774 -+
6775 -+ bfqd->raised_busy_queues++;
6776 -+ entity->ioprio_changed = 1;
6777 -+ bfq_log_bfqq(bfqd, bfqq,
6778 -+ "non-idle wrais starting at %lu, "
6779 -+ "rais_max_time %u",
6780 -+ jiffies,
6781 -+ jiffies_to_msecs(bfqq->
6782 -+ raising_cur_max_time));
6783 -+ }
6784 -+ bfq_updated_next_req(bfqd, bfqq);
6785 -+ }
6786 -+
6787 -+ if (bfqd->low_latency &&
6788 -+ (old_raising_coeff == 1 || bfqq->raising_coeff == 1 ||
6789 -+ idle_for_long_time))
6790 -+ bfqq->last_rais_start_finish = jiffies;
6791 -+}
6792 -+
6793 -+static void bfq_reposition_rq_rb(struct bfq_queue *bfqq, struct request *rq)
6794 -+{
6795 -+ elv_rb_del(&bfqq->sort_list, rq);
6796 -+ bfqq->queued[rq_is_sync(rq)]--;
6797 -+ bfqq->bfqd->queued--;
6798 -+ bfq_add_rq_rb(rq);
6799 -+}
6800 -+
6801 -+static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd,
6802 -+ struct bio *bio)
6803 -+{
6804 -+ struct task_struct *tsk = current;
6805 -+ struct bfq_io_cq *bic;
6806 -+ struct bfq_queue *bfqq;
6807 -+
6808 -+ bic = bfq_bic_lookup(bfqd, tsk->io_context);
6809 -+ if (bic == NULL)
6810 -+ return NULL;
6811 -+
6812 -+ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
6813 -+ if (bfqq != NULL)
6814 -+ return elv_rb_find(&bfqq->sort_list, bio_end_sector(bio));
6815 -+
6816 -+ return NULL;
6817 -+}
6818 -+
6819 -+static void bfq_activate_request(struct request_queue *q, struct request *rq)
6820 -+{
6821 -+ struct bfq_data *bfqd = q->elevator->elevator_data;
6822 -+
6823 -+ bfqd->rq_in_driver++;
6824 -+ bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
6825 -+ bfq_log(bfqd, "activate_request: new bfqd->last_position %llu",
6826 -+ (long long unsigned)bfqd->last_position);
6827 -+}
6828 -+
6829 -+static void bfq_deactivate_request(struct request_queue *q, struct request *rq)
6830 -+{
6831 -+ struct bfq_data *bfqd = q->elevator->elevator_data;
6832 -+
6833 -+ WARN_ON(bfqd->rq_in_driver == 0);
6834 -+ bfqd->rq_in_driver--;
6835 -+}
6836 -+
6837 -+static void bfq_remove_request(struct request *rq)
6838 -+{
6839 -+ struct bfq_queue *bfqq = RQ_BFQQ(rq);
6840 -+ struct bfq_data *bfqd = bfqq->bfqd;
6841 -+
6842 -+ if (bfqq->next_rq == rq) {
6843 -+ bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq);
6844 -+ bfq_updated_next_req(bfqd, bfqq);
6845 -+ }
6846 -+
6847 -+ list_del_init(&rq->queuelist);
6848 -+ bfq_del_rq_rb(rq);
6849 -+
6850 -+ if (rq->cmd_flags & REQ_META) {
6851 -+ WARN_ON(bfqq->meta_pending == 0);
6852 -+ bfqq->meta_pending--;
6853 -+ }
6854 -+}
6855 -+
6856 -+static int bfq_merge(struct request_queue *q, struct request **req,
6857 -+ struct bio *bio)
6858 -+{
6859 -+ struct bfq_data *bfqd = q->elevator->elevator_data;
6860 -+ struct request *__rq;
6861 -+
6862 -+ __rq = bfq_find_rq_fmerge(bfqd, bio);
6863 -+ if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) {
6864 -+ *req = __rq;
6865 -+ return ELEVATOR_FRONT_MERGE;
6866 -+ }
6867 -+
6868 -+ return ELEVATOR_NO_MERGE;
6869 -+}
6870 -+
6871 -+static void bfq_merged_request(struct request_queue *q, struct request *req,
6872 -+ int type)
6873 -+{
6874 -+ if (type == ELEVATOR_FRONT_MERGE) {
6875 -+ struct bfq_queue *bfqq = RQ_BFQQ(req);
6876 -+
6877 -+ bfq_reposition_rq_rb(bfqq, req);
6878 -+ }
6879 -+}
6880 -+
6881 -+static void bfq_merged_requests(struct request_queue *q, struct request *rq,
6882 -+ struct request *next)
6883 -+{
6884 -+ struct bfq_queue *bfqq = RQ_BFQQ(rq);
6885 -+
6886 -+ /*
6887 -+ * Reposition in fifo if next is older than rq.
6888 -+ */
6889 -+ if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
6890 -+ time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
6891 -+ list_move(&rq->queuelist, &next->queuelist);
6892 -+ rq_set_fifo_time(rq, rq_fifo_time(next));
6893 -+ }
6894 -+
6895 -+ if (bfqq->next_rq == next)
6896 -+ bfqq->next_rq = rq;
6897 -+
6898 -+ bfq_remove_request(next);
6899 -+}
6900 -+
6901 -+/* Must be called with bfqq != NULL */
6902 -+static inline void bfq_bfqq_end_raising(struct bfq_queue *bfqq)
6903 -+{
6904 -+ BUG_ON(bfqq == NULL);
6905 -+ if (bfq_bfqq_busy(bfqq))
6906 -+ bfqq->bfqd->raised_busy_queues--;
6907 -+ bfqq->raising_coeff = 1;
6908 -+ bfqq->raising_cur_max_time = 0;
6909 -+ /* Trigger a weight change on the next activation of the queue */
6910 -+ bfqq->entity.ioprio_changed = 1;
6911 -+}
6912 -+
6913 -+static void bfq_end_raising_async_queues(struct bfq_data *bfqd,
6914 -+ struct bfq_group *bfqg)
6915 -+{
6916 -+ int i, j;
6917 -+
6918 -+ for (i = 0; i < 2; i++)
6919 -+ for (j = 0; j < IOPRIO_BE_NR; j++)
6920 -+ if (bfqg->async_bfqq[i][j] != NULL)
6921 -+ bfq_bfqq_end_raising(bfqg->async_bfqq[i][j]);
6922 -+ if (bfqg->async_idle_bfqq != NULL)
6923 -+ bfq_bfqq_end_raising(bfqg->async_idle_bfqq);
6924 -+}
6925 -+
6926 -+static void bfq_end_raising(struct bfq_data *bfqd)
6927 -+{
6928 -+ struct bfq_queue *bfqq;
6929 -+
6930 -+ spin_lock_irq(bfqd->queue->queue_lock);
6931 -+
6932 -+ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list)
6933 -+ bfq_bfqq_end_raising(bfqq);
6934 -+ list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list)
6935 -+ bfq_bfqq_end_raising(bfqq);
6936 -+ bfq_end_raising_async(bfqd);
6937 -+
6938 -+ spin_unlock_irq(bfqd->queue->queue_lock);
6939 -+}
6940 -+
6941 -+static int bfq_allow_merge(struct request_queue *q, struct request *rq,
6942 -+ struct bio *bio)
6943 -+{
6944 -+ struct bfq_data *bfqd = q->elevator->elevator_data;
6945 -+ struct bfq_io_cq *bic;
6946 -+ struct bfq_queue *bfqq;
6947 -+
6948 -+ /*
6949 -+ * Disallow merge of a sync bio into an async request.
6950 -+ */
6951 -+ if (bfq_bio_sync(bio) && !rq_is_sync(rq))
6952 -+ return 0;
6953 -+
6954 -+ /*
6955 -+ * Lookup the bfqq that this bio will be queued with. Allow
6956 -+ * merge only if rq is queued there.
6957 -+ * Queue lock is held here.
6958 -+ */
6959 -+ bic = bfq_bic_lookup(bfqd, current->io_context);
6960 -+ if (bic == NULL)
6961 -+ return 0;
6962 -+
6963 -+ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
6964 -+ return bfqq == RQ_BFQQ(rq);
6965 -+}
6966 -+
6967 -+static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
6968 -+ struct bfq_queue *bfqq)
6969 -+{
6970 -+ if (bfqq != NULL) {
6971 -+ bfq_mark_bfqq_must_alloc(bfqq);
6972 -+ bfq_mark_bfqq_budget_new(bfqq);
6973 -+ bfq_clear_bfqq_fifo_expire(bfqq);
6974 -+
6975 -+ bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
6976 -+
6977 -+ bfq_log_bfqq(bfqd, bfqq,
6978 -+ "set_in_service_queue, cur-budget = %lu",
6979 -+ bfqq->entity.budget);
6980 -+ }
6981 -+
6982 -+ bfqd->in_service_queue = bfqq;
6983 -+}
6984 -+
6985 -+/*
6986 -+ * Get and set a new queue for service.
6987 -+ */
6988 -+static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd,
6989 -+ struct bfq_queue *bfqq)
6990 -+{
6991 -+ if (!bfqq)
6992 -+ bfqq = bfq_get_next_queue(bfqd);
6993 -+ else
6994 -+ bfq_get_next_queue_forced(bfqd, bfqq);
6995 -+
6996 -+ __bfq_set_in_service_queue(bfqd, bfqq);
6997 -+ return bfqq;
6998 -+}
6999 -+
7000 -+static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd,
7001 -+ struct request *rq)
7002 -+{
7003 -+ if (blk_rq_pos(rq) >= bfqd->last_position)
7004 -+ return blk_rq_pos(rq) - bfqd->last_position;
7005 -+ else
7006 -+ return bfqd->last_position - blk_rq_pos(rq);
7007 -+}
7008 -+
7009 -+/*
7010 -+ * Return true if bfqq has no request pending and rq is close enough to
7011 -+ * bfqd->last_position, or if rq is closer to bfqd->last_position than
7012 -+ * bfqq->next_rq
7013 -+ */
7014 -+static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq)
7015 -+{
7016 -+ return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR;
7017 -+}
7018 -+
7019 -+static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)
7020 -+{
7021 -+ struct rb_root *root = &bfqd->rq_pos_tree;
7022 -+ struct rb_node *parent, *node;
7023 -+ struct bfq_queue *__bfqq;
7024 -+ sector_t sector = bfqd->last_position;
7025 -+
7026 -+ if (RB_EMPTY_ROOT(root))
7027 -+ return NULL;
7028 -+
7029 -+ /*
7030 -+ * First, if we find a request starting at the end of the last
7031 -+ * request, choose it.
7032 -+ */
7033 -+ __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL);
7034 -+ if (__bfqq != NULL)
7035 -+ return __bfqq;
7036 -+
7037 -+ /*
7038 -+ * If the exact sector wasn't found, the parent of the NULL leaf
7039 -+ * will contain the closest sector (rq_pos_tree sorted by next_request
7040 -+ * position).
7041 -+ */
7042 -+ __bfqq = rb_entry(parent, struct bfq_queue, pos_node);
7043 -+ if (bfq_rq_close(bfqd, __bfqq->next_rq))
7044 -+ return __bfqq;
7045 -+
7046 -+ if (blk_rq_pos(__bfqq->next_rq) < sector)
7047 -+ node = rb_next(&__bfqq->pos_node);
7048 -+ else
7049 -+ node = rb_prev(&__bfqq->pos_node);
7050 -+ if (node == NULL)
7051 -+ return NULL;
7052 -+
7053 -+ __bfqq = rb_entry(node, struct bfq_queue, pos_node);
7054 -+ if (bfq_rq_close(bfqd, __bfqq->next_rq))
7055 -+ return __bfqq;
7056 -+
7057 -+ return NULL;
7058 -+}
7059 -+
7060 -+/*
7061 -+ * bfqd - obvious
7062 -+ * cur_bfqq - passed in so that we don't decide that the current queue
7063 -+ * is closely cooperating with itself.
7064 -+ *
7065 -+ * We are assuming that cur_bfqq has dispatched at least one request,
7066 -+ * and that bfqd->last_position reflects a position on the disk associated
7067 -+ * with the I/O issued by cur_bfqq.
7068 -+ */
7069 -+static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,
7070 -+ struct bfq_queue *cur_bfqq)
7071 -+{
7072 -+ struct bfq_queue *bfqq;
7073 -+
7074 -+ if (bfq_class_idle(cur_bfqq))
7075 -+ return NULL;
7076 -+ if (!bfq_bfqq_sync(cur_bfqq))
7077 -+ return NULL;
7078 -+ if (BFQQ_SEEKY(cur_bfqq))
7079 -+ return NULL;
7080 -+
7081 -+ /* If device has only one backlogged bfq_queue, don't search. */
7082 -+ if (bfqd->busy_queues == 1)
7083 -+ return NULL;
7084 -+
7085 -+ /*
7086 -+ * We should notice if some of the queues are cooperating, e.g.
7087 -+ * working closely on the same area of the disk. In that case,
7088 -+ * we can group them together and don't waste time idling.
7089 -+ */
7090 -+ bfqq = bfqq_close(bfqd);
7091 -+ if (bfqq == NULL || bfqq == cur_bfqq)
7092 -+ return NULL;
7093 -+
7094 -+ /*
7095 -+ * Do not merge queues from different bfq_groups.
7096 -+ */
7097 -+ if (bfqq->entity.parent != cur_bfqq->entity.parent)
7098 -+ return NULL;
7099 -+
7100 -+ /*
7101 -+ * It only makes sense to merge sync queues.
7102 -+ */
7103 -+ if (!bfq_bfqq_sync(bfqq))
7104 -+ return NULL;
7105 -+ if (BFQQ_SEEKY(bfqq))
7106 -+ return NULL;
7107 -+
7108 -+ /*
7109 -+ * Do not merge queues of different priority classes.
7110 -+ */
7111 -+ if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq))
7112 -+ return NULL;
7113 -+
7114 -+ return bfqq;
7115 -+}
7116 -+
7117 -+/*
7118 -+ * If enough samples have been computed, return the current max budget
7119 -+ * stored in bfqd, which is dynamically updated according to the
7120 -+ * estimated disk peak rate; otherwise return the default max budget
7121 -+ */
7122 -+static inline unsigned long bfq_max_budget(struct bfq_data *bfqd)
7123 -+{
7124 -+ if (bfqd->budgets_assigned < 194)
7125 -+ return bfq_default_max_budget;
7126 -+ else
7127 -+ return bfqd->bfq_max_budget;
7128 -+}
7129 -+
7130 -+/*
7131 -+ * Return min budget, which is a fraction of the current or default
7132 -+ * max budget (trying with 1/32)
7133 -+ */
7134 -+static inline unsigned long bfq_min_budget(struct bfq_data *bfqd)
7135 -+{
7136 -+ if (bfqd->budgets_assigned < 194)
7137 -+ return bfq_default_max_budget / 32;
7138 -+ else
7139 -+ return bfqd->bfq_max_budget / 32;
7140 -+}
7141 -+
7142 -+/*
7143 -+ * Decides whether idling should be done for given device and
7144 -+ * given in-service queue.
7145 -+ */
7146 -+static inline bool bfq_queue_nonrot_noidle(struct bfq_data *bfqd,
7147 -+ struct bfq_queue *in_service_bfqq)
7148 -+{
7149 -+ if (in_service_bfqq == NULL)
7150 -+ return false;
7151 -+ /*
7152 -+ * If the device is non-rotational, and hence has no seek penalty,
7153 -+ * disable idling; but do so only if:
7154 -+ * - device does not support queuing, otherwise we still have
7155 -+ * a problem with sync vs async workloads;
7156 -+ * - the queue is not weight-raised, to preserve guarantees.
7157 -+ */
7158 -+ return blk_queue_nonrot(bfqd->queue) && bfqd->hw_tag &&
7159 -+ (in_service_bfqq->raising_coeff == 1);
7160 -+}
7161 -+
7162 -+static void bfq_arm_slice_timer(struct bfq_data *bfqd)
7163 -+{
7164 -+ struct bfq_queue *bfqq = bfqd->in_service_queue;
7165 -+ struct bfq_io_cq *bic;
7166 -+ unsigned long sl;
7167 -+
7168 -+ WARN_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
7169 -+
7170 -+ /* Tasks have exited, don't wait. */
7171 -+ bic = bfqd->in_service_bic;
7172 -+ if (bic == NULL || atomic_read(&bic->icq.ioc->active_ref) == 0)
7173 -+ return;
7174 -+
7175 -+ bfq_mark_bfqq_wait_request(bfqq);
7176 -+
7177 -+ /*
7178 -+ * We don't want to idle for seeks, but we do want to allow
7179 -+ * fair distribution of slice time for a process doing back-to-back
7180 -+ * seeks. So allow a little bit of time for him to submit a new rq.
7181 -+ *
7182 -+ * To prevent processes with (partly) seeky workloads from
7183 -+ * being too ill-treated, grant them a small fraction of the
7184 -+ * assigned budget before reducing the waiting time to
7185 -+ * BFQ_MIN_TT. This happened to help reduce latency.
7186 -+ */
7187 -+ sl = bfqd->bfq_slice_idle;
7188 -+ if (bfq_sample_valid(bfqq->seek_samples) && BFQQ_SEEKY(bfqq) &&
7189 -+ bfqq->entity.service > bfq_max_budget(bfqd) / 8 &&
7190 -+ bfqq->raising_coeff == 1)
7191 -+ sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT));
7192 -+ else if (bfqq->raising_coeff > 1)
7193 -+ sl = sl * 3;
7194 -+ bfqd->last_idling_start = ktime_get();
7195 -+ mod_timer(&bfqd->idle_slice_timer, jiffies + sl);
7196 -+ bfq_log(bfqd, "arm idle: %u/%u ms",
7197 -+ jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle));
7198 -+}
7199 -+
7200 -+/*
7201 -+ * Set the maximum time for the in-service queue to consume its
7202 -+ * budget. This prevents seeky processes from lowering the disk
7203 -+ * throughput (always guaranteed with a time slice scheme as in CFQ).
7204 -+ */
7205 -+static void bfq_set_budget_timeout(struct bfq_data *bfqd)
7206 -+{
7207 -+ struct bfq_queue *bfqq = bfqd->in_service_queue;
7208 -+ unsigned int timeout_coeff;
7209 -+ if (bfqq->raising_cur_max_time == bfqd->bfq_raising_rt_max_time)
7210 -+ timeout_coeff = 1;
7211 -+ else
7212 -+ timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
7213 -+
7214 -+ bfqd->last_budget_start = ktime_get();
7215 -+
7216 -+ bfq_clear_bfqq_budget_new(bfqq);
7217 -+ bfqq->budget_timeout = jiffies +
7218 -+ bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff;
7219 -+
7220 -+ bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u",
7221 -+ jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] *
7222 -+ timeout_coeff));
7223 -+}
7224 -+
7225 -+/*
7226 -+ * Move request from internal lists to the request queue dispatch list.
7227 -+ */
7228 -+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
7229 -+{
7230 -+ struct bfq_data *bfqd = q->elevator->elevator_data;
7231 -+ struct bfq_queue *bfqq = RQ_BFQQ(rq);
7232 -+
7233 -+ bfq_remove_request(rq);
7234 -+ bfqq->dispatched++;
7235 -+ elv_dispatch_sort(q, rq);
7236 -+
7237 -+ if (bfq_bfqq_sync(bfqq))
7238 -+ bfqd->sync_flight++;
7239 -+}
7240 -+
7241 -+/*
7242 -+ * Return expired entry, or NULL to just start from scratch in rbtree.
7243 -+ */
7244 -+static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
7245 -+{
7246 -+ struct request *rq = NULL;
7247 -+
7248 -+ if (bfq_bfqq_fifo_expire(bfqq))
7249 -+ return NULL;
7250 -+
7251 -+ bfq_mark_bfqq_fifo_expire(bfqq);
7252 -+
7253 -+ if (list_empty(&bfqq->fifo))
7254 -+ return NULL;
7255 -+
7256 -+ rq = rq_entry_fifo(bfqq->fifo.next);
7257 -+
7258 -+ if (time_before(jiffies, rq_fifo_time(rq)))
7259 -+ return NULL;
7260 -+
7261 -+ return rq;
7262 -+}
7263 -+
7264 -+/*
7265 -+ * Must be called with the queue_lock held.
7266 -+ */
7267 -+static int bfqq_process_refs(struct bfq_queue *bfqq)
7268 -+{
7269 -+ int process_refs, io_refs;
7270 -+
7271 -+ io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
7272 -+ process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;
7273 -+ BUG_ON(process_refs < 0);
7274 -+ return process_refs;
7275 -+}
7276 -+
7277 -+static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
7278 -+{
7279 -+ int process_refs, new_process_refs;
7280 -+ struct bfq_queue *__bfqq;
7281 -+
7282 -+ /*
7283 -+ * If there are no process references on the new_bfqq, then it is
7284 -+ * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
7285 -+ * may have dropped their last reference (not just their last process
7286 -+ * reference).
7287 -+ */
7288 -+ if (!bfqq_process_refs(new_bfqq))
7289 -+ return;
7290 -+
7291 -+ /* Avoid a circular list and skip interim queue merges. */
7292 -+ while ((__bfqq = new_bfqq->new_bfqq)) {
7293 -+ if (__bfqq == bfqq)
7294 -+ return;
7295 -+ new_bfqq = __bfqq;
7296 -+ }
7297 -+
7298 -+ process_refs = bfqq_process_refs(bfqq);
7299 -+ new_process_refs = bfqq_process_refs(new_bfqq);
7300 -+ /*
7301 -+ * If the process for the bfqq has gone away, there is no
7302 -+ * sense in merging the queues.
7303 -+ */
7304 -+ if (process_refs == 0 || new_process_refs == 0)
7305 -+ return;
7306 -+
7307 -+ /*
7308 -+ * Merge in the direction of the lesser amount of work.
7309 -+ */
7310 -+ if (new_process_refs >= process_refs) {
7311 -+ bfqq->new_bfqq = new_bfqq;
7312 -+ atomic_add(process_refs, &new_bfqq->ref);
7313 -+ } else {
7314 -+ new_bfqq->new_bfqq = bfqq;
7315 -+ atomic_add(new_process_refs, &bfqq->ref);
7316 -+ }
7317 -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
7318 -+ new_bfqq->pid);
7319 -+}
7320 -+
7321 -+static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq)
7322 -+{
7323 -+ struct bfq_entity *entity = &bfqq->entity;
7324 -+ return entity->budget - entity->service;
7325 -+}
7326 -+
7327 -+static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
7328 -+{
7329 -+ BUG_ON(bfqq != bfqd->in_service_queue);
7330 -+
7331 -+ __bfq_bfqd_reset_in_service(bfqd);
7332 -+
7333 -+ /*
7334 -+ * If this bfqq is shared between multiple processes, check
7335 -+ * to make sure that those processes are still issuing I/Os
7336 -+ * within the mean seek distance. If not, it may be time to
7337 -+ * break the queues apart again.
7338 -+ */
7339 -+ if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq))
7340 -+ bfq_mark_bfqq_split_coop(bfqq);
7341 -+
7342 -+ if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
7343 -+ /*
7344 -+ * overloading budget_timeout field to store when
7345 -+ * the queue remains with no backlog, used by
7346 -+ * the weight-raising mechanism
7347 -+ */
7348 -+ bfqq->budget_timeout = jiffies;
7349 -+ bfq_del_bfqq_busy(bfqd, bfqq, 1);
7350 -+ } else {
7351 -+ bfq_activate_bfqq(bfqd, bfqq);
7352 -+ /*
7353 -+ * Resort priority tree of potential close cooperators.
7354 -+ */
7355 -+ bfq_rq_pos_tree_add(bfqd, bfqq);
7356 -+ }
7357 -+}
7358 -+
7359 -+/**
7360 -+ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior.
7361 -+ * @bfqd: device data.
7362 -+ * @bfqq: queue to update.
7363 -+ * @reason: reason for expiration.
7364 -+ *
7365 -+ * Handle the feedback on @bfqq budget. See the body for detailed
7366 -+ * comments.
7367 -+ */
7368 -+static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
7369 -+ struct bfq_queue *bfqq,
7370 -+ enum bfqq_expiration reason)
7371 -+{
7372 -+ struct request *next_rq;
7373 -+ unsigned long budget, min_budget;
7374 -+
7375 -+ budget = bfqq->max_budget;
7376 -+ min_budget = bfq_min_budget(bfqd);
7377 -+
7378 -+ BUG_ON(bfqq != bfqd->in_service_queue);
7379 -+
7380 -+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu",
7381 -+ bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));
7382 -+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu",
7383 -+ budget, bfq_min_budget(bfqd));
7384 -+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d",
7385 -+ bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue));
7386 -+
7387 -+ if (bfq_bfqq_sync(bfqq)) {
7388 -+ switch (reason) {
7389 -+ /*
7390 -+ * Caveat: in all the following cases we trade latency
7391 -+ * for throughput.
7392 -+ */
7393 -+ case BFQ_BFQQ_TOO_IDLE:
7394 -+ /*
7395 -+ * This is the only case where we may reduce
7396 -+ * the budget: if there is no request of the
7397 -+ * process still waiting for completion, then
7398 -+ * we assume (tentatively) that the timer has
7399 -+ * expired because the batch of requests of
7400 -+ * the process could have been served with a
7401 -+ * smaller budget. Hence, betting that
7402 -+ * process will behave in the same way when it
7403 -+ * becomes backlogged again, we reduce its
7404 -+ * next budget. As long as we guess right,
7405 -+ * this budget cut reduces the latency
7406 -+ * experienced by the process.
7407 -+ *
7408 -+ * However, if there are still outstanding
7409 -+ * requests, then the process may have not yet
7410 -+ * issued its next request just because it is
7411 -+ * still waiting for the completion of some of
7412 -+ * the still outstanding ones. So in this
7413 -+ * subcase we do not reduce its budget, on the
7414 -+ * contrary we increase it to possibly boost
7415 -+ * the throughput, as discussed in the
7416 -+ * comments to the BUDGET_TIMEOUT case.
7417 -+ */
7418 -+ if (bfqq->dispatched > 0) /* still outstanding reqs */
7419 -+ budget = min(budget * 2, bfqd->bfq_max_budget);
7420 -+ else {
7421 -+ if (budget > 5 * min_budget)
7422 -+ budget -= 4 * min_budget;
7423 -+ else
7424 -+ budget = min_budget;
7425 -+ }
7426 -+ break;
7427 -+ case BFQ_BFQQ_BUDGET_TIMEOUT:
7428 -+ /*
7429 -+ * We double the budget here because: 1) it
7430 -+ * gives the chance to boost the throughput if
7431 -+ * this is not a seeky process (which may have
7432 -+ * bumped into this timeout because of, e.g.,
7433 -+ * ZBR), 2) together with charge_full_budget
7434 -+ * it helps give seeky processes higher
7435 -+ * timestamps, and hence be served less
7436 -+ * frequently.
7437 -+ */
7438 -+ budget = min(budget * 2, bfqd->bfq_max_budget);
7439 -+ break;
7440 -+ case BFQ_BFQQ_BUDGET_EXHAUSTED:
7441 -+ /*
7442 -+ * The process still has backlog, and did not
7443 -+ * let either the budget timeout or the disk
7444 -+ * idling timeout expire. Hence it is not
7445 -+ * seeky, has a short thinktime and may be
7446 -+ * happy with a higher budget too. So
7447 -+ * definitely increase the budget of this good
7448 -+ * candidate to boost the disk throughput.
7449 -+ */
7450 -+ budget = min(budget * 4, bfqd->bfq_max_budget);
7451 -+ break;
7452 -+ case BFQ_BFQQ_NO_MORE_REQUESTS:
7453 -+ /*
7454 -+ * Leave the budget unchanged.
7455 -+ */
7456 -+ default:
7457 -+ return;
7458 -+ }
7459 -+ } else /* async queue */
7460 -+ /* async queues get always the maximum possible budget
7461 -+ * (their ability to dispatch is limited by
7462 -+ * @bfqd->bfq_max_budget_async_rq).
7463 -+ */
7464 -+ budget = bfqd->bfq_max_budget;
7465 -+
7466 -+ bfqq->max_budget = budget;
7467 -+
7468 -+ if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 &&
7469 -+ bfqq->max_budget > bfqd->bfq_max_budget)
7470 -+ bfqq->max_budget = bfqd->bfq_max_budget;
7471 -+
7472 -+ /*
7473 -+ * Make sure that we have enough budget for the next request.
7474 -+ * Since the finish time of the bfqq must be kept in sync with
7475 -+ * the budget, be sure to call __bfq_bfqq_expire() after the
7476 -+ * update.
7477 -+ */
7478 -+ next_rq = bfqq->next_rq;
7479 -+ if (next_rq != NULL)
7480 -+ bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget,
7481 -+ bfq_serv_to_charge(next_rq, bfqq));
7482 -+ else
7483 -+ bfqq->entity.budget = bfqq->max_budget;
7484 -+
7485 -+ bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu",
7486 -+ next_rq != NULL ? blk_rq_sectors(next_rq) : 0,
7487 -+ bfqq->entity.budget);
7488 -+}
7489 -+
7490 -+static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout)
7491 -+{
7492 -+ unsigned long max_budget;
7493 -+
7494 -+ /*
7495 -+ * The max_budget calculated when autotuning is equal to the
7496 -+ * amount of sectors transfered in timeout_sync at the
7497 -+ * estimated peak rate.
7498 -+ */
7499 -+ max_budget = (unsigned long)(peak_rate * 1000 *
7500 -+ timeout >> BFQ_RATE_SHIFT);
7501 -+
7502 -+ return max_budget;
7503 -+}
7504 -+
7505 -+/*
7506 -+ * In addition to updating the peak rate, checks whether the process
7507 -+ * is "slow", and returns 1 if so. This slow flag is used, in addition
7508 -+ * to the budget timeout, to reduce the amount of service provided to
7509 -+ * seeky processes, and hence reduce their chances to lower the
7510 -+ * throughput. See the code for more details.
7511 -+ */
7512 -+static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
7513 -+ int compensate, enum bfqq_expiration reason)
7514 -+{
7515 -+ u64 bw, usecs, expected, timeout;
7516 -+ ktime_t delta;
7517 -+ int update = 0;
7518 -+
7519 -+ if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq))
7520 -+ return 0;
7521 -+
7522 -+ if (compensate)
7523 -+ delta = bfqd->last_idling_start;
7524 -+ else
7525 -+ delta = ktime_get();
7526 -+ delta = ktime_sub(delta, bfqd->last_budget_start);
7527 -+ usecs = ktime_to_us(delta);
7528 -+
7529 -+ /* Don't trust short/unrealistic values. */
7530 -+ if (usecs < 100 || usecs >= LONG_MAX)
7531 -+ return 0;
7532 -+
7533 -+ /*
7534 -+ * Calculate the bandwidth for the last slice. We use a 64 bit
7535 -+ * value to store the peak rate, in sectors per usec in fixed
7536 -+ * point math. We do so to have enough precision in the estimate
7537 -+ * and to avoid overflows.
7538 -+ */
7539 -+ bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT;
7540 -+ do_div(bw, (unsigned long)usecs);
7541 -+
7542 -+ timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
7543 -+
7544 -+ /*
7545 -+ * Use only long (> 20ms) intervals to filter out spikes for
7546 -+ * the peak rate estimation.
7547 -+ */
7548 -+ if (usecs > 20000) {
7549 -+ if (bw > bfqd->peak_rate ||
7550 -+ (!BFQQ_SEEKY(bfqq) &&
7551 -+ reason == BFQ_BFQQ_BUDGET_TIMEOUT)) {
7552 -+ bfq_log(bfqd, "measured bw =%llu", bw);
7553 -+ /*
7554 -+ * To smooth oscillations use a low-pass filter with
7555 -+ * alpha=7/8, i.e.,
7556 -+ * new_rate = (7/8) * old_rate + (1/8) * bw
7557 -+ */
7558 -+ do_div(bw, 8);
7559 -+ if (bw == 0)
7560 -+ return 0;
7561 -+ bfqd->peak_rate *= 7;
7562 -+ do_div(bfqd->peak_rate, 8);
7563 -+ bfqd->peak_rate += bw;
7564 -+ update = 1;
7565 -+ bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate);
7566 -+ }
7567 -+
7568 -+ update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1;
7569 -+
7570 -+ if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES)
7571 -+ bfqd->peak_rate_samples++;
7572 -+
7573 -+ if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES &&
7574 -+ update && bfqd->bfq_user_max_budget == 0) {
7575 -+ bfqd->bfq_max_budget =
7576 -+ bfq_calc_max_budget(bfqd->peak_rate, timeout);
7577 -+ bfq_log(bfqd, "new max_budget=%lu",
7578 -+ bfqd->bfq_max_budget);
7579 -+ }
7580 -+ }
7581 -+
7582 -+ /*
7583 -+ * If the process has been served for a too short time
7584 -+ * interval to let its possible sequential accesses prevail on
7585 -+ * the initial seek time needed to move the disk head on the
7586 -+ * first sector it requested, then give the process a chance
7587 -+ * and for the moment return false.
7588 -+ */
7589 -+ if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8)
7590 -+ return 0;
7591 -+
7592 -+ /*
7593 -+ * A process is considered ``slow'' (i.e., seeky, so that we
7594 -+ * cannot treat it fairly in the service domain, as it would
7595 -+ * slow down too much the other processes) if, when a slice
7596 -+ * ends for whatever reason, it has received service at a
7597 -+ * rate that would not be high enough to complete the budget
7598 -+ * before the budget timeout expiration.
7599 -+ */
7600 -+ expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT;
7601 -+
7602 -+ /*
7603 -+ * Caveat: processes doing IO in the slower disk zones will
7604 -+ * tend to be slow(er) even if not seeky. And the estimated
7605 -+ * peak rate will actually be an average over the disk
7606 -+ * surface. Hence, to not be too harsh with unlucky processes,
7607 -+ * we keep a budget/3 margin of safety before declaring a
7608 -+ * process slow.
7609 -+ */
7610 -+ return expected > (4 * bfqq->entity.budget) / 3;
7611 -+}
7612 -+
7613 -+/*
7614 -+ * To be deemed as soft real-time, an application must meet two requirements.
7615 -+ * First, the application must not require an average bandwidth higher than
7616 -+ * the approximate bandwidth required to playback or record a compressed high-
7617 -+ * definition video.
7618 -+ * The next function is invoked on the completion of the last request of a
7619 -+ * batch, to compute the next-start time instant, soft_rt_next_start, such
7620 -+ * that, if the next request of the application does not arrive before
7621 -+ * soft_rt_next_start, then the above requirement on the bandwidth is met.
7622 -+ *
7623 -+ * The second requirement is that the request pattern of the application is
7624 -+ * isochronous, i.e., that, after issuing a request or a batch of requests,
7625 -+ * the application stops issuing new requests until all its pending requests
7626 -+ * have been completed. After that, the application may issue a new batch,
7627 -+ * and so on.
7628 -+ * For this reason the next function is invoked to compute soft_rt_next_start
7629 -+ * only for applications that meet this requirement, whereas soft_rt_next_start
7630 -+ * is set to infinity for applications that do not.
7631 -+ *
7632 -+ * Unfortunately, even a greedy application may happen to behave in an
7633 -+ * isochronous way if the CPU load is high. In fact, the application may stop
7634 -+ * issuing requests while the CPUs are busy serving other processes, then
7635 -+ * restart, then stop again for a while, and so on. In addition, if the disk
7636 -+ * achieves a low enough throughput with the request pattern issued by the
7637 -+ * application (e.g., because the request pattern is random and/or the device
7638 -+ * is slow), then the application may meet the above bandwidth requirement too.
7639 -+ * To prevent such a greedy application to be deemed as soft real-time, a
7640 -+ * further rule is used in the computation of soft_rt_next_start:
7641 -+ * soft_rt_next_start must be higher than the current time plus the maximum
7642 -+ * time for which the arrival of a request is waited for when a sync queue
7643 -+ * becomes idle, namely bfqd->bfq_slice_idle.
7644 -+ * This filters out greedy applications, as the latter issue instead their next
7645 -+ * request as soon as possible after the last one has been completed (in
7646 -+ * contrast, when a batch of requests is completed, a soft real-time application
7647 -+ * spends some time processing data).
7648 -+ *
7649 -+ * Unfortunately, the last filter may easily generate false positives if only
7650 -+ * bfqd->bfq_slice_idle is used as a reference time interval and one or both
7651 -+ * the following cases occur:
7652 -+ * 1) HZ is so low that the duration of a jiffy is comparable to or higher
7653 -+ * than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with
7654 -+ * HZ=100.
7655 -+ * 2) jiffies, instead of increasing at a constant rate, may stop increasing
7656 -+ * for a while, then suddenly 'jump' by several units to recover the lost
7657 -+ * increments. This seems to happen, e.g., inside virtual machines.
7658 -+ * To address this issue, we do not use as a reference time interval just
7659 -+ * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In
7660 -+ * particular we add the minimum number of jiffies for which the filter seems
7661 -+ * to be quite precise also in embedded systems and KVM/QEMU virtual machines.
7662 -+ */
7663 -+static inline unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
7664 -+ struct bfq_queue *bfqq)
7665 -+{
7666 -+ return max(bfqq->last_idle_bklogged +
7667 -+ HZ * bfqq->service_from_backlogged /
7668 -+ bfqd->bfq_raising_max_softrt_rate,
7669 -+ jiffies + bfqq->bfqd->bfq_slice_idle + 4);
7670 -+}
7671 -+
7672 -+/*
7673 -+ * Return the largest-possible time instant such that, for as long as possible,
7674 -+ * the current time will be lower than this time instant according to the macro
7675 -+ * time_is_before_jiffies().
7676 -+ */
7677 -+static inline unsigned long bfq_infinity_from_now(unsigned long now)
7678 -+{
7679 -+ return now + ULONG_MAX / 2;
7680 -+}
7681 -+
7682 -+/**
7683 -+ * bfq_bfqq_expire - expire a queue.
7684 -+ * @bfqd: device owning the queue.
7685 -+ * @bfqq: the queue to expire.
7686 -+ * @compensate: if true, compensate for the time spent idling.
7687 -+ * @reason: the reason causing the expiration.
7688 -+ *
7689 -+ *
7690 -+ * If the process associated to the queue is slow (i.e., seeky), or in
7691 -+ * case of budget timeout, or, finally, if it is async, we
7692 -+ * artificially charge it an entire budget (independently of the
7693 -+ * actual service it received). As a consequence, the queue will get
7694 -+ * higher timestamps than the correct ones upon reactivation, and
7695 -+ * hence it will be rescheduled as if it had received more service
7696 -+ * than what it actually received. In the end, this class of processes
7697 -+ * will receive less service in proportion to how slowly they consume
7698 -+ * their budgets (and hence how seriously they tend to lower the
7699 -+ * throughput).
7700 -+ *
7701 -+ * In contrast, when a queue expires because it has been idling for
7702 -+ * too much or because it exhausted its budget, we do not touch the
7703 -+ * amount of service it has received. Hence when the queue will be
7704 -+ * reactivated and its timestamps updated, the latter will be in sync
7705 -+ * with the actual service received by the queue until expiration.
7706 -+ *
7707 -+ * Charging a full budget to the first type of queues and the exact
7708 -+ * service to the others has the effect of using the WF2Q+ policy to
7709 -+ * schedule the former on a timeslice basis, without violating the
7710 -+ * service domain guarantees of the latter.
7711 -+ */
7712 -+static void bfq_bfqq_expire(struct bfq_data *bfqd,
7713 -+ struct bfq_queue *bfqq,
7714 -+ int compensate,
7715 -+ enum bfqq_expiration reason)
7716 -+{
7717 -+ int slow;
7718 -+ BUG_ON(bfqq != bfqd->in_service_queue);
7719 -+
7720 -+ /* Update disk peak rate for autotuning and check whether the
7721 -+ * process is slow (see bfq_update_peak_rate).
7722 -+ */
7723 -+ slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason);
7724 -+
7725 -+ /*
7726 -+ * As above explained, 'punish' slow (i.e., seeky), timed-out
7727 -+ * and async queues, to favor sequential sync workloads.
7728 -+ *
7729 -+ * Processes doing IO in the slower disk zones will tend to be
7730 -+ * slow(er) even if not seeky. Hence, since the estimated peak
7731 -+ * rate is actually an average over the disk surface, these
7732 -+ * processes may timeout just for bad luck. To avoid punishing
7733 -+ * them we do not charge a full budget to a process that
7734 -+ * succeeded in consuming at least 2/3 of its budget.
7735 -+ */
7736 -+ if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
7737 -+ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3))
7738 -+ bfq_bfqq_charge_full_budget(bfqq);
7739 -+
7740 -+ bfqq->service_from_backlogged += bfqq->entity.service;
7741 -+
7742 -+ if (bfqd->low_latency && bfqq->raising_coeff == 1)
7743 -+ bfqq->last_rais_start_finish = jiffies;
7744 -+
7745 -+ if (bfqd->low_latency && bfqd->bfq_raising_max_softrt_rate > 0 &&
7746 -+ RB_EMPTY_ROOT(&bfqq->sort_list)) {
7747 -+ /*
7748 -+ * If we get here, and there are no outstanding requests,
7749 -+ * then the request pattern is isochronous (see the comments
7750 -+ * to the function bfq_bfqq_softrt_next_start()). Hence we can
7751 -+ * compute soft_rt_next_start. If, instead, the queue still
7752 -+ * has outstanding requests, then we have to wait for the
7753 -+ * completion of all the outstanding requests to discover
7754 -+ * whether the request pattern is actually isochronous.
7755 -+ */
7756 -+ if (bfqq->dispatched == 0)
7757 -+ bfqq->soft_rt_next_start =
7758 -+ bfq_bfqq_softrt_next_start(bfqd, bfqq);
7759 -+ else {
7760 -+ /*
7761 -+ * The application is still waiting for the
7762 -+ * completion of one or more requests:
7763 -+ * prevent it from possibly being incorrectly
7764 -+ * deemed as soft real-time by setting its
7765 -+ * soft_rt_next_start to infinity. In fact,
7766 -+ * without this assignment, the application
7767 -+ * would be incorrectly deemed as soft
7768 -+ * real-time if:
7769 -+ * 1) it issued a new request before the
7770 -+ * completion of all its in-flight
7771 -+ * requests, and
7772 -+ * 2) at that time, its soft_rt_next_start
7773 -+ * happened to be in the past.
7774 -+ */
7775 -+ bfqq->soft_rt_next_start =
7776 -+ bfq_infinity_from_now(jiffies);
7777 -+ /*
7778 -+ * Schedule an update of soft_rt_next_start to when
7779 -+ * the task may be discovered to be isochronous.
7780 -+ */
7781 -+ bfq_mark_bfqq_softrt_update(bfqq);
7782 -+ }
7783 -+ }
7784 -+
7785 -+ bfq_log_bfqq(bfqd, bfqq,
7786 -+ "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, slow,
7787 -+ bfqq->dispatched, bfq_bfqq_idle_window(bfqq));
7788 -+
7789 -+ /* Increase, decrease or leave budget unchanged according to reason */
7790 -+ __bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
7791 -+ __bfq_bfqq_expire(bfqd, bfqq);
7792 -+}
7793 -+
7794 -+/*
7795 -+ * Budget timeout is not implemented through a dedicated timer, but
7796 -+ * just checked on request arrivals and completions, as well as on
7797 -+ * idle timer expirations.
7798 -+ */
7799 -+static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
7800 -+{
7801 -+ if (bfq_bfqq_budget_new(bfqq))
7802 -+ return 0;
7803 -+
7804 -+ if (time_before(jiffies, bfqq->budget_timeout))
7805 -+ return 0;
7806 -+
7807 -+ return 1;
7808 -+}
7809 -+
7810 -+/*
7811 -+ * If we expire a queue that is waiting for the arrival of a new
7812 -+ * request, we may prevent the fictitious timestamp back-shifting that
7813 -+ * allows the guarantees of the queue to be preserved (see [1] for
7814 -+ * this tricky aspect). Hence we return true only if this condition
7815 -+ * does not hold, or if the queue is slow enough to deserve only to be
7816 -+ * kicked off for preserving a high throughput.
7817 -+*/
7818 -+static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
7819 -+{
7820 -+ bfq_log_bfqq(bfqq->bfqd, bfqq,
7821 -+ "may_budget_timeout: wr %d left %d timeout %d",
7822 -+ bfq_bfqq_wait_request(bfqq),
7823 -+ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3,
7824 -+ bfq_bfqq_budget_timeout(bfqq));
7825 -+
7826 -+ return (!bfq_bfqq_wait_request(bfqq) ||
7827 -+ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3)
7828 -+ &&
7829 -+ bfq_bfqq_budget_timeout(bfqq);
7830 -+}
7831 -+
7832 -+/*
7833 -+ * For weight-raised queues issuing sync requests, idling is always performed,
7834 -+ * as this is instrumental in guaranteeing a high fraction of the throughput
7835 -+ * to these queues, and hence in guaranteeing a lower latency for their
7836 -+ * requests. See [1] for details.
7837 -+ *
7838 -+ * For non-weight-raised queues, idling is instead disabled if the device is
7839 -+ * NCQ-enabled and non-rotational, as this boosts the throughput on such
7840 -+ * devices.
7841 -+ */
7842 -+static inline bool bfq_bfqq_must_not_expire(struct bfq_queue *bfqq)
7843 -+{
7844 -+ struct bfq_data *bfqd = bfqq->bfqd;
7845 -+
7846 -+ return bfq_bfqq_sync(bfqq) && (
7847 -+ bfqq->raising_coeff > 1 ||
7848 -+ (bfq_bfqq_idle_window(bfqq) &&
7849 -+ !(bfqd->hw_tag &&
7850 -+ (blk_queue_nonrot(bfqd->queue) ||
7851 -+ /*
7852 -+ * If there are weight-raised busy queues, then do not idle
7853 -+ * the disk for a sync non-weight-raised queue, and hence
7854 -+ * expire the queue immediately if empty. Combined with the
7855 -+ * timestamping rules of BFQ (see [1] for details), this
7856 -+ * causes sync non-weight-raised queues to get a lower
7857 -+ * fraction of the disk throughput, and hence reduces the rate
7858 -+ * at which the processes associated to these queues ask for
7859 -+ * requests from the request pool.
7860 -+ *
7861 -+ * This is beneficial for weight-raised processes, when the
7862 -+ * system operates in request-pool saturation conditions
7863 -+ * (e.g., in the presence of write hogs). In fact, if
7864 -+ * non-weight-raised processes ask for requests at a lower
7865 -+ * rate, then weight-raised processes have a higher
7866 -+ * probability to get a request from the pool immediately
7867 -+ * (or at least soon) when they need one. Hence they have a
7868 -+ * higher probability to actually get a fraction of the disk
7869 -+ * throughput proportional to their high weight. This is
7870 -+ * especially true with NCQ-enabled drives, which enqueue
7871 -+ * several requests in advance and further reorder
7872 -+ * internally-queued requests.
7873 -+ *
7874 -+ * Mistreating non-weight-raised queues in the above-described
7875 -+ * way, when there are busy weight-raised queues, seems to
7876 -+ * mitigate starvation problems in the presence of heavy write
7877 -+ * workloads and NCQ, and hence to guarantee a higher
7878 -+ * application and system responsiveness in these hostile
7879 -+ * scenarios.
7880 -+ */
7881 -+ bfqd->raised_busy_queues > 0)
7882 -+ )
7883 -+ )
7884 -+ );
7885 -+}
7886 -+
7887 -+/*
7888 -+ * If the in-service queue is empty, but it is sync and either of the following
7889 -+ * conditions holds, then: 1) the queue must remain in service and cannot be
7890 -+ * expired, and 2) the disk must be idled to wait for the possible arrival
7891 -+ * of a new request for the queue. The conditions are:
7892 -+ * - the device is rotational and not performing NCQ, and the queue has its
7893 -+ * idle window set (in this case, waiting for a new request for the queue
7894 -+ * is likely to boost the disk throughput);
7895 -+ * - the queue is weight-raised (waiting for the request is necessary to
7896 -+ * provide the queue with fairness and latency guarantees, see [1] for
7897 -+ * details).
7898 -+ */
7899 -+static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
7900 -+{
7901 -+ struct bfq_data *bfqd = bfqq->bfqd;
7902 -+
7903 -+ return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 &&
7904 -+ bfq_bfqq_must_not_expire(bfqq) &&
7905 -+ !bfq_queue_nonrot_noidle(bfqd, bfqq);
7906 -+}
7907 -+
7908 -+/*
7909 -+ * Select a queue for service. If we have a current queue in service,
7910 -+ * check whether to continue servicing it, or retrieve and set a new one.
7911 -+ */
7912 -+static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
7913 -+{
7914 -+ struct bfq_queue *bfqq, *new_bfqq = NULL;
7915 -+ struct request *next_rq;
7916 -+ enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;
7917 -+
7918 -+ bfqq = bfqd->in_service_queue;
7919 -+ if (bfqq == NULL)
7920 -+ goto new_queue;
7921 -+
7922 -+ bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue");
7923 -+
7924 -+ /*
7925 -+ * If another queue has a request waiting within our mean seek
7926 -+ * distance, let it run. The expire code will check for close
7927 -+ * cooperators and put the close queue at the front of the
7928 -+ * service tree. If possible, merge the expiring queue with the
7929 -+ * new bfqq.
7930 -+ */
7931 -+ new_bfqq = bfq_close_cooperator(bfqd, bfqq);
7932 -+ if (new_bfqq != NULL && bfqq->new_bfqq == NULL)
7933 -+ bfq_setup_merge(bfqq, new_bfqq);
7934 -+
7935 -+ if (bfq_may_expire_for_budg_timeout(bfqq) &&
7936 -+ !timer_pending(&bfqd->idle_slice_timer) &&
7937 -+ !bfq_bfqq_must_idle(bfqq))
7938 -+ goto expire;
7939 -+
7940 -+ next_rq = bfqq->next_rq;
7941 -+ /*
7942 -+ * If bfqq has requests queued and it has enough budget left to
7943 -+ * serve them, keep the queue, otherwise expire it.
7944 -+ */
7945 -+ if (next_rq != NULL) {
7946 -+ if (bfq_serv_to_charge(next_rq, bfqq) >
7947 -+ bfq_bfqq_budget_left(bfqq)) {
7948 -+ reason = BFQ_BFQQ_BUDGET_EXHAUSTED;
7949 -+ goto expire;
7950 -+ } else {
7951 -+ /*
7952 -+ * The idle timer may be pending because we may not
7953 -+ * disable disk idling even when a new request arrives
7954 -+ */
7955 -+ if (timer_pending(&bfqd->idle_slice_timer)) {
7956 -+ /*
7957 -+ * If we get here: 1) at least a new request
7958 -+ * has arrived but we have not disabled the
7959 -+ * timer because the request was too small,
7960 -+ * 2) then the block layer has unplugged the
7961 -+ * device, causing the dispatch to be invoked.
7962 -+ *
7963 -+ * Since the device is unplugged, now the
7964 -+ * requests are probably large enough to
7965 -+ * provide a reasonable throughput.
7966 -+ * So we disable idling.
7967 -+ */
7968 -+ bfq_clear_bfqq_wait_request(bfqq);
7969 -+ del_timer(&bfqd->idle_slice_timer);
7970 -+ }
7971 -+ if (new_bfqq == NULL)
7972 -+ goto keep_queue;
7973 -+ else
7974 -+ goto expire;
7975 -+ }
7976 -+ }
7977 -+
7978 -+ /*
7979 -+ * No requests pending. If the in-service queue has no cooperator and
7980 -+ * still has requests in flight (possibly waiting for a completion)
7981 -+ * or is idling for a new request, then keep it.
7982 -+ */
7983 -+ if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) ||
7984 -+ (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq)))) {
7985 -+ bfqq = NULL;
7986 -+ goto keep_queue;
7987 -+ } else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) {
7988 -+ /*
7989 -+ * Expiring the queue because there is a close cooperator,
7990 -+ * cancel timer.
7991 -+ */
7992 -+ bfq_clear_bfqq_wait_request(bfqq);
7993 -+ del_timer(&bfqd->idle_slice_timer);
7994 -+ }
7995 -+
7996 -+ reason = BFQ_BFQQ_NO_MORE_REQUESTS;
7997 -+expire:
7998 -+ bfq_bfqq_expire(bfqd, bfqq, 0, reason);
7999 -+new_queue:
8000 -+ bfqq = bfq_set_in_service_queue(bfqd, new_bfqq);
8001 -+ bfq_log(bfqd, "select_queue: new queue %d returned",
8002 -+ bfqq != NULL ? bfqq->pid : 0);
8003 -+keep_queue:
8004 -+ return bfqq;
8005 -+}
8006 -+
8007 -+static void bfq_update_raising_data(struct bfq_data *bfqd,
8008 -+ struct bfq_queue *bfqq)
8009 -+{
8010 -+ if (bfqq->raising_coeff > 1) { /* queue is being boosted */
8011 -+ struct bfq_entity *entity = &bfqq->entity;
8012 -+
8013 -+ bfq_log_bfqq(bfqd, bfqq,
8014 -+ "raising period dur %u/%u msec, "
8015 -+ "old raising coeff %u, w %d(%d)",
8016 -+ jiffies_to_msecs(jiffies -
8017 -+ bfqq->last_rais_start_finish),
8018 -+ jiffies_to_msecs(bfqq->raising_cur_max_time),
8019 -+ bfqq->raising_coeff,
8020 -+ bfqq->entity.weight, bfqq->entity.orig_weight);
8021 -+
8022 -+ BUG_ON(bfqq != bfqd->in_service_queue && entity->weight !=
8023 -+ entity->orig_weight * bfqq->raising_coeff);
8024 -+ if (entity->ioprio_changed)
8025 -+ bfq_log_bfqq(bfqd, bfqq,
8026 -+ "WARN: pending prio change");
8027 -+ /*
8028 -+ * If too much time has elapsed from the beginning
8029 -+ * of this weight-raising, stop it.
8030 -+ */
8031 -+ if (time_is_before_jiffies(bfqq->last_rais_start_finish +
8032 -+ bfqq->raising_cur_max_time)) {
8033 -+ bfqq->last_rais_start_finish = jiffies;
8034 -+ bfq_log_bfqq(bfqd, bfqq,
8035 -+ "wrais ending at %lu, "
8036 -+ "rais_max_time %u",
8037 -+ bfqq->last_rais_start_finish,
8038 -+ jiffies_to_msecs(bfqq->
8039 -+ raising_cur_max_time));
8040 -+ bfq_bfqq_end_raising(bfqq);
8041 -+ __bfq_entity_update_weight_prio(
8042 -+ bfq_entity_service_tree(entity),
8043 -+ entity);
8044 -+ }
8045 -+ }
8046 -+}
8047 -+
8048 -+/*
8049 -+ * Dispatch one request from bfqq, moving it to the request queue
8050 -+ * dispatch list.
8051 -+ */
8052 -+static int bfq_dispatch_request(struct bfq_data *bfqd,
8053 -+ struct bfq_queue *bfqq)
8054 -+{
8055 -+ int dispatched = 0;
8056 -+ struct request *rq;
8057 -+ unsigned long service_to_charge;
8058 -+
8059 -+ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
8060 -+
8061 -+ /* Follow expired path, else get first next available. */
8062 -+ rq = bfq_check_fifo(bfqq);
8063 -+ if (rq == NULL)
8064 -+ rq = bfqq->next_rq;
8065 -+ service_to_charge = bfq_serv_to_charge(rq, bfqq);
8066 -+
8067 -+ if (service_to_charge > bfq_bfqq_budget_left(bfqq)) {
8068 -+ /*
8069 -+ * This may happen if the next rq is chosen
8070 -+ * in fifo order instead of sector order.
8071 -+ * The budget is properly dimensioned
8072 -+ * to be always sufficient to serve the next request
8073 -+ * only if it is chosen in sector order. The reason is
8074 -+ * that it would be quite inefficient and little useful
8075 -+ * to always make sure that the budget is large enough
8076 -+ * to serve even the possible next rq in fifo order.
8077 -+ * In fact, requests are seldom served in fifo order.
8078 -+ *
8079 -+ * Expire the queue for budget exhaustion, and
8080 -+ * make sure that the next act_budget is enough
8081 -+ * to serve the next request, even if it comes
8082 -+ * from the fifo expired path.
8083 -+ */
8084 -+ bfqq->next_rq = rq;
8085 -+ /*
8086 -+ * Since this dispatch is failed, make sure that
8087 -+ * a new one will be performed
8088 -+ */
8089 -+ if (!bfqd->rq_in_driver)
8090 -+ bfq_schedule_dispatch(bfqd);
8091 -+ goto expire;
8092 -+ }
8093 -+
8094 -+ /* Finally, insert request into driver dispatch list. */
8095 -+ bfq_bfqq_served(bfqq, service_to_charge);
8096 -+ bfq_dispatch_insert(bfqd->queue, rq);
8097 -+
8098 -+ bfq_update_raising_data(bfqd, bfqq);
8099 -+
8100 -+ bfq_log_bfqq(bfqd, bfqq,
8101 -+ "dispatched %u sec req (%llu), budg left %lu",
8102 -+ blk_rq_sectors(rq),
8103 -+ (long long unsigned)blk_rq_pos(rq),
8104 -+ bfq_bfqq_budget_left(bfqq));
8105 -+
8106 -+ dispatched++;
8107 -+
8108 -+ if (bfqd->in_service_bic == NULL) {
8109 -+ atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount);
8110 -+ bfqd->in_service_bic = RQ_BIC(rq);
8111 -+ }
8112 -+
8113 -+ if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) &&
8114 -+ dispatched >= bfqd->bfq_max_budget_async_rq) ||
8115 -+ bfq_class_idle(bfqq)))
8116 -+ goto expire;
8117 -+
8118 -+ return dispatched;
8119 -+
8120 -+expire:
8121 -+ bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED);
8122 -+ return dispatched;
8123 -+}
8124 -+
8125 -+static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq)
8126 -+{
8127 -+ int dispatched = 0;
8128 -+
8129 -+ while (bfqq->next_rq != NULL) {
8130 -+ bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq);
8131 -+ dispatched++;
8132 -+ }
8133 -+
8134 -+ BUG_ON(!list_empty(&bfqq->fifo));
8135 -+ return dispatched;
8136 -+}
8137 -+
8138 -+/*
8139 -+ * Drain our current requests. Used for barriers and when switching
8140 -+ * io schedulers on-the-fly.
8141 -+ */
8142 -+static int bfq_forced_dispatch(struct bfq_data *bfqd)
8143 -+{
8144 -+ struct bfq_queue *bfqq, *n;
8145 -+ struct bfq_service_tree *st;
8146 -+ int dispatched = 0;
8147 -+
8148 -+ bfqq = bfqd->in_service_queue;
8149 -+ if (bfqq != NULL)
8150 -+ __bfq_bfqq_expire(bfqd, bfqq);
8151 -+
8152 -+ /*
8153 -+ * Loop through classes, and be careful to leave the scheduler
8154 -+ * in a consistent state, as feedback mechanisms and vtime
8155 -+ * updates cannot be disabled during the process.
8156 -+ */
8157 -+ list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) {
8158 -+ st = bfq_entity_service_tree(&bfqq->entity);
8159 -+
8160 -+ dispatched += __bfq_forced_dispatch_bfqq(bfqq);
8161 -+ bfqq->max_budget = bfq_max_budget(bfqd);
8162 -+
8163 -+ bfq_forget_idle(st);
8164 -+ }
8165 -+
8166 -+ BUG_ON(bfqd->busy_queues != 0);
8167 -+
8168 -+ return dispatched;
8169 -+}
8170 -+
8171 -+static int bfq_dispatch_requests(struct request_queue *q, int force)
8172 -+{
8173 -+ struct bfq_data *bfqd = q->elevator->elevator_data;
8174 -+ struct bfq_queue *bfqq;
8175 -+ int max_dispatch;
8176 -+
8177 -+ bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues);
8178 -+ if (bfqd->busy_queues == 0)
8179 -+ return 0;
8180 -+
8181 -+ if (unlikely(force))
8182 -+ return bfq_forced_dispatch(bfqd);
8183 -+
8184 -+ bfqq = bfq_select_queue(bfqd);
8185 -+ if (bfqq == NULL)
8186 -+ return 0;
8187 -+
8188 -+ max_dispatch = bfqd->bfq_quantum;
8189 -+ if (bfq_class_idle(bfqq))
8190 -+ max_dispatch = 1;
8191 -+
8192 -+ if (!bfq_bfqq_sync(bfqq))
8193 -+ max_dispatch = bfqd->bfq_max_budget_async_rq;
8194 -+
8195 -+ if (bfqq->dispatched >= max_dispatch) {
8196 -+ if (bfqd->busy_queues > 1)
8197 -+ return 0;
8198 -+ if (bfqq->dispatched >= 4 * max_dispatch)
8199 -+ return 0;
8200 -+ }
8201 -+
8202 -+ if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq))
8203 -+ return 0;
8204 -+
8205 -+ bfq_clear_bfqq_wait_request(bfqq);
8206 -+ BUG_ON(timer_pending(&bfqd->idle_slice_timer));
8207 -+
8208 -+ if (!bfq_dispatch_request(bfqd, bfqq))
8209 -+ return 0;
8210 -+
8211 -+ bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d (max_disp %d)",
8212 -+ bfqq->pid, max_dispatch);
8213 -+
8214 -+ return 1;
8215 -+}
8216 -+
8217 -+/*
8218 -+ * Task holds one reference to the queue, dropped when task exits. Each rq
8219 -+ * in-flight on this queue also holds a reference, dropped when rq is freed.
8220 -+ *
8221 -+ * Queue lock must be held here.
8222 -+ */
8223 -+static void bfq_put_queue(struct bfq_queue *bfqq)
8224 -+{
8225 -+ struct bfq_data *bfqd = bfqq->bfqd;
8226 -+
8227 -+ BUG_ON(atomic_read(&bfqq->ref) <= 0);
8228 -+
8229 -+ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq,
8230 -+ atomic_read(&bfqq->ref));
8231 -+ if (!atomic_dec_and_test(&bfqq->ref))
8232 -+ return;
8233 -+
8234 -+ BUG_ON(rb_first(&bfqq->sort_list) != NULL);
8235 -+ BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0);
8236 -+ BUG_ON(bfqq->entity.tree != NULL);
8237 -+ BUG_ON(bfq_bfqq_busy(bfqq));
8238 -+ BUG_ON(bfqd->in_service_queue == bfqq);
8239 -+
8240 -+ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq);
8241 -+
8242 -+ kmem_cache_free(bfq_pool, bfqq);
8243 -+}
8244 -+
8245 -+static void bfq_put_cooperator(struct bfq_queue *bfqq)
8246 -+{
8247 -+ struct bfq_queue *__bfqq, *next;
8248 -+
8249 -+ /*
8250 -+ * If this queue was scheduled to merge with another queue, be
8251 -+ * sure to drop the reference taken on that queue (and others in
8252 -+ * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs.
8253 -+ */
8254 -+ __bfqq = bfqq->new_bfqq;
8255 -+ while (__bfqq) {
8256 -+ if (__bfqq == bfqq) {
8257 -+ WARN(1, "bfqq->new_bfqq loop detected.\n");
8258 -+ break;
8259 -+ }
8260 -+ next = __bfqq->new_bfqq;
8261 -+ bfq_put_queue(__bfqq);
8262 -+ __bfqq = next;
8263 -+ }
8264 -+}
8265 -+
8266 -+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
8267 -+{
8268 -+ if (bfqq == bfqd->in_service_queue) {
8269 -+ __bfq_bfqq_expire(bfqd, bfqq);
8270 -+ bfq_schedule_dispatch(bfqd);
8271 -+ }
8272 -+
8273 -+ bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq,
8274 -+ atomic_read(&bfqq->ref));
8275 -+
8276 -+ bfq_put_cooperator(bfqq);
8277 -+
8278 -+ bfq_put_queue(bfqq);
8279 -+}
8280 -+
8281 -+static void bfq_init_icq(struct io_cq *icq)
8282 -+{
8283 -+ struct bfq_io_cq *bic = icq_to_bic(icq);
8284 -+
8285 -+ bic->ttime.last_end_request = jiffies;
8286 -+}
8287 -+
8288 -+static void bfq_exit_icq(struct io_cq *icq)
8289 -+{
8290 -+ struct bfq_io_cq *bic = icq_to_bic(icq);
8291 -+ struct bfq_data *bfqd = bic_to_bfqd(bic);
8292 -+
8293 -+ if (bic->bfqq[BLK_RW_ASYNC]) {
8294 -+ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_ASYNC]);
8295 -+ bic->bfqq[BLK_RW_ASYNC] = NULL;
8296 -+ }
8297 -+
8298 -+ if (bic->bfqq[BLK_RW_SYNC]) {
8299 -+ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);
8300 -+ bic->bfqq[BLK_RW_SYNC] = NULL;
8301 -+ }
8302 -+}
8303 -+
8304 -+/*
8305 -+ * Update the entity prio values; note that the new values will not
8306 -+ * be used until the next (re)activation.
8307 -+ */
8308 -+static void bfq_init_prio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
8309 -+{
8310 -+ struct task_struct *tsk = current;
8311 -+ int ioprio_class;
8312 -+
8313 -+ if (!bfq_bfqq_prio_changed(bfqq))
8314 -+ return;
8315 -+
8316 -+ ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
8317 -+ switch (ioprio_class) {
8318 -+ default:
8319 -+ dev_err(bfqq->bfqd->queue->backing_dev_info.dev,
8320 -+ "bfq: bad prio %x\n", ioprio_class);
8321 -+ case IOPRIO_CLASS_NONE:
8322 -+ /*
8323 -+ * No prio set, inherit CPU scheduling settings.
8324 -+ */
8325 -+ bfqq->entity.new_ioprio = task_nice_ioprio(tsk);
8326 -+ bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk);
8327 -+ break;
8328 -+ case IOPRIO_CLASS_RT:
8329 -+ bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
8330 -+ bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT;
8331 -+ break;
8332 -+ case IOPRIO_CLASS_BE:
8333 -+ bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
8334 -+ bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE;
8335 -+ break;
8336 -+ case IOPRIO_CLASS_IDLE:
8337 -+ bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE;
8338 -+ bfqq->entity.new_ioprio = 7;
8339 -+ bfq_clear_bfqq_idle_window(bfqq);
8340 -+ break;
8341 -+ }
8342 -+
8343 -+ bfqq->entity.ioprio_changed = 1;
8344 -+
8345 -+ /*
8346 -+ * Keep track of original prio settings in case we have to temporarily
8347 -+ * elevate the priority of this queue.
8348 -+ */
8349 -+ bfqq->org_ioprio = bfqq->entity.new_ioprio;
8350 -+ bfq_clear_bfqq_prio_changed(bfqq);
8351 -+}
8352 -+
8353 -+static void bfq_changed_ioprio(struct bfq_io_cq *bic)
8354 -+{
8355 -+ struct bfq_data *bfqd;
8356 -+ struct bfq_queue *bfqq, *new_bfqq;
8357 -+ struct bfq_group *bfqg;
8358 -+ unsigned long uninitialized_var(flags);
8359 -+ int ioprio = bic->icq.ioc->ioprio;
8360 -+
8361 -+ bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),
8362 -+ &flags);
8363 -+ /*
8364 -+ * This condition may trigger on a newly created bic, be sure to drop
8365 -+ * the lock before returning.
8366 -+ */
8367 -+ if (unlikely(bfqd == NULL) || likely(bic->ioprio == ioprio))
8368 -+ goto out;
8369 -+
8370 -+ bfqq = bic->bfqq[BLK_RW_ASYNC];
8371 -+ if (bfqq != NULL) {
8372 -+ bfqg = container_of(bfqq->entity.sched_data, struct bfq_group,
8373 -+ sched_data);
8374 -+ new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, bic,
8375 -+ GFP_ATOMIC);
8376 -+ if (new_bfqq != NULL) {
8377 -+ bic->bfqq[BLK_RW_ASYNC] = new_bfqq;
8378 -+ bfq_log_bfqq(bfqd, bfqq,
8379 -+ "changed_ioprio: bfqq %p %d",
8380 -+ bfqq, atomic_read(&bfqq->ref));
8381 -+ bfq_put_queue(bfqq);
8382 -+ }
8383 -+ }
8384 -+
8385 -+ bfqq = bic->bfqq[BLK_RW_SYNC];
8386 -+ if (bfqq != NULL)
8387 -+ bfq_mark_bfqq_prio_changed(bfqq);
8388 -+
8389 -+ bic->ioprio = ioprio;
8390 -+
8391 -+out:
8392 -+ bfq_put_bfqd_unlock(bfqd, &flags);
8393 -+}
8394 -+
8395 -+static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
8396 -+ pid_t pid, int is_sync)
8397 -+{
8398 -+ RB_CLEAR_NODE(&bfqq->entity.rb_node);
8399 -+ INIT_LIST_HEAD(&bfqq->fifo);
8400 -+
8401 -+ atomic_set(&bfqq->ref, 0);
8402 -+ bfqq->bfqd = bfqd;
8403 -+
8404 -+ bfq_mark_bfqq_prio_changed(bfqq);
8405 -+
8406 -+ if (is_sync) {
8407 -+ if (!bfq_class_idle(bfqq))
8408 -+ bfq_mark_bfqq_idle_window(bfqq);
8409 -+ bfq_mark_bfqq_sync(bfqq);
8410 -+ }
8411 -+
8412 -+ /* Tentative initial value to trade off between thr and lat */
8413 -+ bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3;
8414 -+ bfqq->pid = pid;
8415 -+
8416 -+ bfqq->raising_coeff = 1;
8417 -+ bfqq->last_rais_start_finish = 0;
8418 -+ /*
8419 -+ * Set to the value for which bfqq will not be deemed as
8420 -+ * soft rt when it becomes backlogged.
8421 -+ */
8422 -+ bfqq->soft_rt_next_start = bfq_infinity_from_now(jiffies);
8423 -+}
8424 -+
8425 -+static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd,
8426 -+ struct bfq_group *bfqg,
8427 -+ int is_sync,
8428 -+ struct bfq_io_cq *bic,
8429 -+ gfp_t gfp_mask)
8430 -+{
8431 -+ struct bfq_queue *bfqq, *new_bfqq = NULL;
8432 -+
8433 -+retry:
8434 -+ /* bic always exists here */
8435 -+ bfqq = bic_to_bfqq(bic, is_sync);
8436 -+
8437 -+ /*
8438 -+ * Always try a new alloc if we fall back to the OOM bfqq
8439 -+ * originally, since it should just be a temporary situation.
8440 -+ */
8441 -+ if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) {
8442 -+ bfqq = NULL;
8443 -+ if (new_bfqq != NULL) {
8444 -+ bfqq = new_bfqq;
8445 -+ new_bfqq = NULL;
8446 -+ } else if (gfp_mask & __GFP_WAIT) {
8447 -+ spin_unlock_irq(bfqd->queue->queue_lock);
8448 -+ new_bfqq = kmem_cache_alloc_node(bfq_pool,
8449 -+ gfp_mask | __GFP_ZERO,
8450 -+ bfqd->queue->node);
8451 -+ spin_lock_irq(bfqd->queue->queue_lock);
8452 -+ if (new_bfqq != NULL)
8453 -+ goto retry;
8454 -+ } else {
8455 -+ bfqq = kmem_cache_alloc_node(bfq_pool,
8456 -+ gfp_mask | __GFP_ZERO,
8457 -+ bfqd->queue->node);
8458 -+ }
8459 -+
8460 -+ if (bfqq != NULL) {
8461 -+ bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync);
8462 -+ bfq_log_bfqq(bfqd, bfqq, "allocated");
8463 -+ } else {
8464 -+ bfqq = &bfqd->oom_bfqq;
8465 -+ bfq_log_bfqq(bfqd, bfqq, "using oom bfqq");
8466 -+ }
8467 -+
8468 -+ bfq_init_prio_data(bfqq, bic);
8469 -+ bfq_init_entity(&bfqq->entity, bfqg);
8470 -+ }
8471 -+
8472 -+ if (new_bfqq != NULL)
8473 -+ kmem_cache_free(bfq_pool, new_bfqq);
8474 -+
8475 -+ return bfqq;
8476 -+}
8477 -+
8478 -+static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
8479 -+ struct bfq_group *bfqg,
8480 -+ int ioprio_class, int ioprio)
8481 -+{
8482 -+ switch (ioprio_class) {
8483 -+ case IOPRIO_CLASS_RT:
8484 -+ return &bfqg->async_bfqq[0][ioprio];
8485 -+ case IOPRIO_CLASS_NONE:
8486 -+ ioprio = IOPRIO_NORM;
8487 -+ /* fall through */
8488 -+ case IOPRIO_CLASS_BE:
8489 -+ return &bfqg->async_bfqq[1][ioprio];
8490 -+ case IOPRIO_CLASS_IDLE:
8491 -+ return &bfqg->async_idle_bfqq;
8492 -+ default:
8493 -+ BUG();
8494 -+ }
8495 -+}
8496 -+
8497 -+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
8498 -+ struct bfq_group *bfqg, int is_sync,
8499 -+ struct bfq_io_cq *bic, gfp_t gfp_mask)
8500 -+{
8501 -+ const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
8502 -+ const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
8503 -+ struct bfq_queue **async_bfqq = NULL;
8504 -+ struct bfq_queue *bfqq = NULL;
8505 -+
8506 -+ if (!is_sync) {
8507 -+ async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
8508 -+ ioprio);
8509 -+ bfqq = *async_bfqq;
8510 -+ }
8511 -+
8512 -+ if (bfqq == NULL)
8513 -+ bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, bic, gfp_mask);
8514 -+
8515 -+ /*
8516 -+ * Pin the queue now that it's allocated, scheduler exit will prune it.
8517 -+ */
8518 -+ if (!is_sync && *async_bfqq == NULL) {
8519 -+ atomic_inc(&bfqq->ref);
8520 -+ bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d",
8521 -+ bfqq, atomic_read(&bfqq->ref));
8522 -+ *async_bfqq = bfqq;
8523 -+ }
8524 -+
8525 -+ atomic_inc(&bfqq->ref);
8526 -+ bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq,
8527 -+ atomic_read(&bfqq->ref));
8528 -+ return bfqq;
8529 -+}
8530 -+
8531 -+static void bfq_update_io_thinktime(struct bfq_data *bfqd,
8532 -+ struct bfq_io_cq *bic)
8533 -+{
8534 -+ unsigned long elapsed = jiffies - bic->ttime.last_end_request;
8535 -+ unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle);
8536 -+
8537 -+ bic->ttime.ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8;
8538 -+ bic->ttime.ttime_total = (7*bic->ttime.ttime_total + 256*ttime) / 8;
8539 -+ bic->ttime.ttime_mean = (bic->ttime.ttime_total + 128) /
8540 -+ bic->ttime.ttime_samples;
8541 -+}
8542 -+
8543 -+static void bfq_update_io_seektime(struct bfq_data *bfqd,
8544 -+ struct bfq_queue *bfqq,
8545 -+ struct request *rq)
8546 -+{
8547 -+ sector_t sdist;
8548 -+ u64 total;
8549 -+
8550 -+ if (bfqq->last_request_pos < blk_rq_pos(rq))
8551 -+ sdist = blk_rq_pos(rq) - bfqq->last_request_pos;
8552 -+ else
8553 -+ sdist = bfqq->last_request_pos - blk_rq_pos(rq);
8554 -+
8555 -+ /*
8556 -+ * Don't allow the seek distance to get too large from the
8557 -+ * odd fragment, pagein, etc.
8558 -+ */
8559 -+ if (bfqq->seek_samples == 0) /* first request, not really a seek */
8560 -+ sdist = 0;
8561 -+ else if (bfqq->seek_samples <= 60) /* second & third seek */
8562 -+ sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024);
8563 -+ else
8564 -+ sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64);
8565 -+
8566 -+ bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8;
8567 -+ bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8;
8568 -+ total = bfqq->seek_total + (bfqq->seek_samples/2);
8569 -+ do_div(total, bfqq->seek_samples);
8570 -+ bfqq->seek_mean = (sector_t)total;
8571 -+
8572 -+ bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist,
8573 -+ (u64)bfqq->seek_mean);
8574 -+}
8575 -+
8576 -+/*
8577 -+ * Disable idle window if the process thinks too long or seeks so much that
8578 -+ * it doesn't matter.
8579 -+ */
8580 -+static void bfq_update_idle_window(struct bfq_data *bfqd,
8581 -+ struct bfq_queue *bfqq,
8582 -+ struct bfq_io_cq *bic)
8583 -+{
8584 -+ int enable_idle;
8585 -+
8586 -+ /* Don't idle for async or idle io prio class. */
8587 -+ if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))
8588 -+ return;
8589 -+
8590 -+ enable_idle = bfq_bfqq_idle_window(bfqq);
8591 -+
8592 -+ if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
8593 -+ bfqd->bfq_slice_idle == 0 ||
8594 -+ (bfqd->hw_tag && BFQQ_SEEKY(bfqq) &&
8595 -+ bfqq->raising_coeff == 1))
8596 -+ enable_idle = 0;
8597 -+ else if (bfq_sample_valid(bic->ttime.ttime_samples)) {
8598 -+ if (bic->ttime.ttime_mean > bfqd->bfq_slice_idle &&
8599 -+ bfqq->raising_coeff == 1)
8600 -+ enable_idle = 0;
8601 -+ else
8602 -+ enable_idle = 1;
8603 -+ }
8604 -+ bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d",
8605 -+ enable_idle);
8606 -+
8607 -+ if (enable_idle)
8608 -+ bfq_mark_bfqq_idle_window(bfqq);
8609 -+ else
8610 -+ bfq_clear_bfqq_idle_window(bfqq);
8611 -+}
8612 -+
8613 -+/*
8614 -+ * Called when a new fs request (rq) is added to bfqq. Check if there's
8615 -+ * something we should do about it.
8616 -+ */
8617 -+static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
8618 -+ struct request *rq)
8619 -+{
8620 -+ struct bfq_io_cq *bic = RQ_BIC(rq);
8621 -+
8622 -+ if (rq->cmd_flags & REQ_META)
8623 -+ bfqq->meta_pending++;
8624 -+
8625 -+ bfq_update_io_thinktime(bfqd, bic);
8626 -+ bfq_update_io_seektime(bfqd, bfqq, rq);
8627 -+ if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
8628 -+ !BFQQ_SEEKY(bfqq))
8629 -+ bfq_update_idle_window(bfqd, bfqq, bic);
8630 -+
8631 -+ bfq_log_bfqq(bfqd, bfqq,
8632 -+ "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",
8633 -+ bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq),
8634 -+ (long long unsigned)bfqq->seek_mean);
8635 -+
8636 -+ bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
8637 -+
8638 -+ if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) {
8639 -+ int small_req = bfqq->queued[rq_is_sync(rq)] == 1 &&
8640 -+ blk_rq_sectors(rq) < 32;
8641 -+ int budget_timeout = bfq_bfqq_budget_timeout(bfqq);
8642 -+
8643 -+ /*
8644 -+ * There is just this request queued: if the request
8645 -+ * is small and the queue is not to be expired, then
8646 -+ * just exit.
8647 -+ *
8648 -+ * In this way, if the disk is being idled to wait for
8649 -+ * a new request from the in-service queue, we avoid
8650 -+ * unplugging the device and committing the disk to serve
8651 -+ * just a small request. On the contrary, we wait for
8652 -+ * the block layer to decide when to unplug the device:
8653 -+ * hopefully, new requests will be merged to this one
8654 -+ * quickly, then the device will be unplugged and
8655 -+ * larger requests will be dispatched.
8656 -+ */
8657 -+ if (small_req && !budget_timeout)
8658 -+ return;
8659 -+
8660 -+ /*
8661 -+ * A large enough request arrived, or the queue is to
8662 -+ * be expired: in both cases disk idling is to be
8663 -+ * stopped, so clear wait_request flag and reset
8664 -+ * timer.
8665 -+ */
8666 -+ bfq_clear_bfqq_wait_request(bfqq);
8667 -+ del_timer(&bfqd->idle_slice_timer);
8668 -+
8669 -+ /*
8670 -+ * The queue is not empty, because a new request just
8671 -+ * arrived. Hence we can safely expire the queue, in
8672 -+ * case of budget timeout, without risking that the
8673 -+ * timestamps of the queue are not updated correctly.
8674 -+ * See [1] for more details.
8675 -+ */
8676 -+ if (budget_timeout)
8677 -+ bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT);
8678 -+
8679 -+ /*
8680 -+ * Let the request rip immediately, or let a new queue be
8681 -+ * selected if bfqq has just been expired.
8682 -+ */
8683 -+ __blk_run_queue(bfqd->queue);
8684 -+ }
8685 -+}
8686 -+
8687 -+static void bfq_insert_request(struct request_queue *q, struct request *rq)
8688 -+{
8689 -+ struct bfq_data *bfqd = q->elevator->elevator_data;
8690 -+ struct bfq_queue *bfqq = RQ_BFQQ(rq);
8691 -+
8692 -+ assert_spin_locked(bfqd->queue->queue_lock);
8693 -+ bfq_init_prio_data(bfqq, RQ_BIC(rq));
8694 -+
8695 -+ bfq_add_rq_rb(rq);
8696 -+
8697 -+ rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]);
8698 -+ list_add_tail(&rq->queuelist, &bfqq->fifo);
8699 -+
8700 -+ bfq_rq_enqueued(bfqd, bfqq, rq);
8701 -+}
8702 -+
8703 -+static void bfq_update_hw_tag(struct bfq_data *bfqd)
8704 -+{
8705 -+ bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver,
8706 -+ bfqd->rq_in_driver);
8707 -+
8708 -+ if (bfqd->hw_tag == 1)
8709 -+ return;
8710 -+
8711 -+ /*
8712 -+ * This sample is valid if the number of outstanding requests
8713 -+ * is large enough to allow a queueing behavior. Note that the
8714 -+ * sum is not exact, as it's not taking into account deactivated
8715 -+ * requests.
8716 -+ */
8717 -+ if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD)
8718 -+ return;
8719 -+
8720 -+ if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES)
8721 -+ return;
8722 -+
8723 -+ bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD;
8724 -+ bfqd->max_rq_in_driver = 0;
8725 -+ bfqd->hw_tag_samples = 0;
8726 -+}
8727 -+
8728 -+static void bfq_completed_request(struct request_queue *q, struct request *rq)
8729 -+{
8730 -+ struct bfq_queue *bfqq = RQ_BFQQ(rq);
8731 -+ struct bfq_data *bfqd = bfqq->bfqd;
8732 -+ const int sync = rq_is_sync(rq);
8733 -+
8734 -+ bfq_log_bfqq(bfqd, bfqq, "completed %u sects req (%d)",
8735 -+ blk_rq_sectors(rq), sync);
8736 -+
8737 -+ bfq_update_hw_tag(bfqd);
8738 -+
8739 -+ WARN_ON(!bfqd->rq_in_driver);
8740 -+ WARN_ON(!bfqq->dispatched);
8741 -+ bfqd->rq_in_driver--;
8742 -+ bfqq->dispatched--;
8743 -+
8744 -+ if (bfq_bfqq_sync(bfqq))
8745 -+ bfqd->sync_flight--;
8746 -+
8747 -+ if (sync)
8748 -+ RQ_BIC(rq)->ttime.last_end_request = jiffies;
8749 -+
8750 -+ /*
8751 -+ * If we are waiting to discover whether the request pattern of the
8752 -+ * task associated with the queue is actually isochronous, and
8753 -+ * both requisites for this condition to hold are satisfied, then
8754 -+ * compute soft_rt_next_start (see the comments to the function
8755 -+ * bfq_bfqq_softrt_next_start()).
8756 -+ */
8757 -+ if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 &&
8758 -+ RB_EMPTY_ROOT(&bfqq->sort_list))
8759 -+ bfqq->soft_rt_next_start =
8760 -+ bfq_bfqq_softrt_next_start(bfqd, bfqq);
8761 -+
8762 -+ /*
8763 -+ * If this is the in-service queue, check if it needs to be expired,
8764 -+ * or if we want to idle in case it has no pending requests.
8765 -+ */
8766 -+ if (bfqd->in_service_queue == bfqq) {
8767 -+ if (bfq_bfqq_budget_new(bfqq))
8768 -+ bfq_set_budget_timeout(bfqd);
8769 -+
8770 -+ if (bfq_bfqq_must_idle(bfqq)) {
8771 -+ bfq_arm_slice_timer(bfqd);
8772 -+ goto out;
8773 -+ } else if (bfq_may_expire_for_budg_timeout(bfqq))
8774 -+ bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT);
8775 -+ else if (RB_EMPTY_ROOT(&bfqq->sort_list) &&
8776 -+ (bfqq->dispatched == 0 ||
8777 -+ !bfq_bfqq_must_not_expire(bfqq)))
8778 -+ bfq_bfqq_expire(bfqd, bfqq, 0,
8779 -+ BFQ_BFQQ_NO_MORE_REQUESTS);
8780 -+ }
8781 -+
8782 -+ if (!bfqd->rq_in_driver)
8783 -+ bfq_schedule_dispatch(bfqd);
8784 -+
8785 -+out:
8786 -+ return;
8787 -+}
8788 -+
8789 -+static inline int __bfq_may_queue(struct bfq_queue *bfqq)
8790 -+{
8791 -+ if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) {
8792 -+ bfq_clear_bfqq_must_alloc(bfqq);
8793 -+ return ELV_MQUEUE_MUST;
8794 -+ }
8795 -+
8796 -+ return ELV_MQUEUE_MAY;
8797 -+}
8798 -+
8799 -+static int bfq_may_queue(struct request_queue *q, int rw)
8800 -+{
8801 -+ struct bfq_data *bfqd = q->elevator->elevator_data;
8802 -+ struct task_struct *tsk = current;
8803 -+ struct bfq_io_cq *bic;
8804 -+ struct bfq_queue *bfqq;
8805 -+
8806 -+ /*
8807 -+ * Don't force setup of a queue from here, as a call to may_queue
8808 -+ * does not necessarily imply that a request actually will be queued.
8809 -+ * So just lookup a possibly existing queue, or return 'may queue'
8810 -+ * if that fails.
8811 -+ */
8812 -+ bic = bfq_bic_lookup(bfqd, tsk->io_context);
8813 -+ if (bic == NULL)
8814 -+ return ELV_MQUEUE_MAY;
8815 -+
8816 -+ bfqq = bic_to_bfqq(bic, rw_is_sync(rw));
8817 -+ if (bfqq != NULL) {
8818 -+ bfq_init_prio_data(bfqq, bic);
8819 -+
8820 -+ return __bfq_may_queue(bfqq);
8821 -+ }
8822 -+
8823 -+ return ELV_MQUEUE_MAY;
8824 -+}
8825 -+
8826 -+/*
8827 -+ * Queue lock held here.
8828 -+ */
8829 -+static void bfq_put_request(struct request *rq)
8830 -+{
8831 -+ struct bfq_queue *bfqq = RQ_BFQQ(rq);
8832 -+
8833 -+ if (bfqq != NULL) {
8834 -+ const int rw = rq_data_dir(rq);
8835 -+
8836 -+ BUG_ON(!bfqq->allocated[rw]);
8837 -+ bfqq->allocated[rw]--;
8838 -+
8839 -+ rq->elv.priv[0] = NULL;
8840 -+ rq->elv.priv[1] = NULL;
8841 -+
8842 -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d",
8843 -+ bfqq, atomic_read(&bfqq->ref));
8844 -+ bfq_put_queue(bfqq);
8845 -+ }
8846 -+}
8847 -+
8848 -+static struct bfq_queue *
8849 -+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
8850 -+ struct bfq_queue *bfqq)
8851 -+{
8852 -+ bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
8853 -+ (long unsigned)bfqq->new_bfqq->pid);
8854 -+ bic_set_bfqq(bic, bfqq->new_bfqq, 1);
8855 -+ bfq_mark_bfqq_coop(bfqq->new_bfqq);
8856 -+ bfq_put_queue(bfqq);
8857 -+ return bic_to_bfqq(bic, 1);
8858 -+}
8859 -+
8860 -+/*
8861 -+ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this
8862 -+ * was the last process referring to said bfqq.
8863 -+ */
8864 -+static struct bfq_queue *
8865 -+bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
8866 -+{
8867 -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");
8868 -+ if (bfqq_process_refs(bfqq) == 1) {
8869 -+ bfqq->pid = current->pid;
8870 -+ bfq_clear_bfqq_coop(bfqq);
8871 -+ bfq_clear_bfqq_split_coop(bfqq);
8872 -+ return bfqq;
8873 -+ }
8874 -+
8875 -+ bic_set_bfqq(bic, NULL, 1);
8876 -+
8877 -+ bfq_put_cooperator(bfqq);
8878 -+
8879 -+ bfq_put_queue(bfqq);
8880 -+ return NULL;
8881 -+}
8882 -+
8883 -+/*
8884 -+ * Allocate bfq data structures associated with this request.
8885 -+ */
8886 -+static int bfq_set_request(struct request_queue *q, struct request *rq,
8887 -+ struct bio *bio, gfp_t gfp_mask)
8888 -+{
8889 -+ struct bfq_data *bfqd = q->elevator->elevator_data;
8890 -+ struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);
8891 -+ const int rw = rq_data_dir(rq);
8892 -+ const int is_sync = rq_is_sync(rq);
8893 -+ struct bfq_queue *bfqq;
8894 -+ struct bfq_group *bfqg;
8895 -+ unsigned long flags;
8896 -+
8897 -+ might_sleep_if(gfp_mask & __GFP_WAIT);
8898 -+
8899 -+ bfq_changed_ioprio(bic);
8900 -+
8901 -+ spin_lock_irqsave(q->queue_lock, flags);
8902 -+
8903 -+ if (bic == NULL)
8904 -+ goto queue_fail;
8905 -+
8906 -+ bfqg = bfq_bic_update_cgroup(bic);
8907 -+
8908 -+new_queue:
8909 -+ bfqq = bic_to_bfqq(bic, is_sync);
8910 -+ if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) {
8911 -+ bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask);
8912 -+ bic_set_bfqq(bic, bfqq, is_sync);
8913 -+ } else {
8914 -+ /*
8915 -+ * If the queue was seeky for too long, break it apart.
8916 -+ */
8917 -+ if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
8918 -+ bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
8919 -+ bfqq = bfq_split_bfqq(bic, bfqq);
8920 -+ if (!bfqq)
8921 -+ goto new_queue;
8922 -+ }
8923 -+
8924 -+ /*
8925 -+ * Check to see if this queue is scheduled to merge with
8926 -+ * another closely cooperating queue. The merging of queues
8927 -+ * happens here as it must be done in process context.
8928 -+ * The reference on new_bfqq was taken in merge_bfqqs.
8929 -+ */
8930 -+ if (bfqq->new_bfqq != NULL)
8931 -+ bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq);
8932 -+ }
8933 -+
8934 -+ bfqq->allocated[rw]++;
8935 -+ atomic_inc(&bfqq->ref);
8936 -+ bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq,
8937 -+ atomic_read(&bfqq->ref));
8938 -+
8939 -+ rq->elv.priv[0] = bic;
8940 -+ rq->elv.priv[1] = bfqq;
8941 -+
8942 -+ spin_unlock_irqrestore(q->queue_lock, flags);
8943 -+
8944 -+ return 0;
8945 -+
8946 -+queue_fail:
8947 -+ bfq_schedule_dispatch(bfqd);
8948 -+ spin_unlock_irqrestore(q->queue_lock, flags);
8949 -+
8950 -+ return 1;
8951 -+}
8952 -+
8953 -+static void bfq_kick_queue(struct work_struct *work)
8954 -+{
8955 -+ struct bfq_data *bfqd =
8956 -+ container_of(work, struct bfq_data, unplug_work);
8957 -+ struct request_queue *q = bfqd->queue;
8958 -+
8959 -+ spin_lock_irq(q->queue_lock);
8960 -+ __blk_run_queue(q);
8961 -+ spin_unlock_irq(q->queue_lock);
8962 -+}
8963 -+
8964 -+/*
8965 -+ * Handler of the expiration of the timer running if the in-service queue
8966 -+ * is idling inside its time slice.
8967 -+ */
8968 -+static void bfq_idle_slice_timer(unsigned long data)
8969 -+{
8970 -+ struct bfq_data *bfqd = (struct bfq_data *)data;
8971 -+ struct bfq_queue *bfqq;
8972 -+ unsigned long flags;
8973 -+ enum bfqq_expiration reason;
8974 -+
8975 -+ spin_lock_irqsave(bfqd->queue->queue_lock, flags);
8976 -+
8977 -+ bfqq = bfqd->in_service_queue;
8978 -+ /*
8979 -+ * Theoretical race here: the in-service queue can be NULL or different
8980 -+ * from the queue that was idling if the timer handler spins on
8981 -+ * the queue_lock and a new request arrives for the current
8982 -+ * queue and there is a full dispatch cycle that changes the
8983 -+ * in-service queue. This can hardly happen, but in the worst case
8984 -+ * we just expire a queue too early.
8985 -+ */
8986 -+ if (bfqq != NULL) {
8987 -+ bfq_log_bfqq(bfqd, bfqq, "slice_timer expired");
8988 -+ if (bfq_bfqq_budget_timeout(bfqq))
8989 -+ /*
8990 -+ * Also here the queue can be safely expired
8991 -+ * for budget timeout without wasting
8992 -+ * guarantees
8993 -+ */
8994 -+ reason = BFQ_BFQQ_BUDGET_TIMEOUT;
8995 -+ else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0)
8996 -+ /*
8997 -+ * The queue may not be empty upon timer expiration,
8998 -+ * because we may not disable the timer when the first
8999 -+ * request of the in-service queue arrives during
9000 -+ * disk idling
9001 -+ */
9002 -+ reason = BFQ_BFQQ_TOO_IDLE;
9003 -+ else
9004 -+ goto schedule_dispatch;
9005 -+
9006 -+ bfq_bfqq_expire(bfqd, bfqq, 1, reason);
9007 -+ }
9008 -+
9009 -+schedule_dispatch:
9010 -+ bfq_schedule_dispatch(bfqd);
9011 -+
9012 -+ spin_unlock_irqrestore(bfqd->queue->queue_lock, flags);
9013 -+}
9014 -+
9015 -+static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)
9016 -+{
9017 -+ del_timer_sync(&bfqd->idle_slice_timer);
9018 -+ cancel_work_sync(&bfqd->unplug_work);
9019 -+}
9020 -+
9021 -+static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd,
9022 -+ struct bfq_queue **bfqq_ptr)
9023 -+{
9024 -+ struct bfq_group *root_group = bfqd->root_group;
9025 -+ struct bfq_queue *bfqq = *bfqq_ptr;
9026 -+
9027 -+ bfq_log(bfqd, "put_async_bfqq: %p", bfqq);
9028 -+ if (bfqq != NULL) {
9029 -+ bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group);
9030 -+ bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d",
9031 -+ bfqq, atomic_read(&bfqq->ref));
9032 -+ bfq_put_queue(bfqq);
9033 -+ *bfqq_ptr = NULL;
9034 -+ }
9035 -+}
9036 -+
9037 -+/*
9038 -+ * Release all the bfqg references to its async queues. If we are
9039 -+ * deallocating the group these queues may still contain requests, so
9040 -+ * we reparent them to the root cgroup (i.e., the only one that will
9041 -+ * exist for sure until all the requests on a device are gone).
9042 -+ */
9043 -+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
9044 -+{
9045 -+ int i, j;
9046 -+
9047 -+ for (i = 0; i < 2; i++)
9048 -+ for (j = 0; j < IOPRIO_BE_NR; j++)
9049 -+ __bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]);
9050 -+
9051 -+ __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
9052 -+}
9053 -+
9054 -+static void bfq_exit_queue(struct elevator_queue *e)
9055 -+{
9056 -+ struct bfq_data *bfqd = e->elevator_data;
9057 -+ struct request_queue *q = bfqd->queue;
9058 -+ struct bfq_queue *bfqq, *n;
9059 -+
9060 -+ bfq_shutdown_timer_wq(bfqd);
9061 -+
9062 -+ spin_lock_irq(q->queue_lock);
9063 -+
9064 -+ BUG_ON(bfqd->in_service_queue != NULL);
9065 -+ list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
9066 -+ bfq_deactivate_bfqq(bfqd, bfqq, 0);
9067 -+
9068 -+ bfq_disconnect_groups(bfqd);
9069 -+ spin_unlock_irq(q->queue_lock);
9070 -+
9071 -+ bfq_shutdown_timer_wq(bfqd);
9072 -+
9073 -+ synchronize_rcu();
9074 -+
9075 -+ BUG_ON(timer_pending(&bfqd->idle_slice_timer));
9076 -+
9077 -+ bfq_free_root_group(bfqd);
9078 -+ kfree(bfqd);
9079 -+}
9080 -+
9081 -+static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
9082 -+{
9083 -+ struct bfq_group *bfqg;
9084 -+ struct bfq_data *bfqd;
9085 -+ struct elevator_queue *eq;
9086 -+
9087 -+ eq = elevator_alloc(q, e);
9088 -+ if (eq == NULL)
9089 -+ return -ENOMEM;
9090 -+
9091 -+ bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node);
9092 -+ if (bfqd == NULL) {
9093 -+ kobject_put(&eq->kobj);
9094 -+ return -ENOMEM;
9095 -+ }
9096 -+ eq->elevator_data = bfqd;
9097 -+
9098 -+ /*
9099 -+ * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
9100 -+ * Grab a permanent reference to it, so that the normal code flow
9101 -+ * will not attempt to free it.
9102 -+ */
9103 -+ bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0);
9104 -+ atomic_inc(&bfqd->oom_bfqq.ref);
9105 -+
9106 -+ bfqd->queue = q;
9107 -+
9108 -+ spin_lock_irq(q->queue_lock);
9109 -+ q->elevator = eq;
9110 -+ spin_unlock_irq(q->queue_lock);
9111 -+
9112 -+ bfqg = bfq_alloc_root_group(bfqd, q->node);
9113 -+ if (bfqg == NULL) {
9114 -+ kfree(bfqd);
9115 -+ kobject_put(&eq->kobj);
9116 -+ return -ENOMEM;
9117 -+ }
9118 -+
9119 -+ bfqd->root_group = bfqg;
9120 -+
9121 -+ init_timer(&bfqd->idle_slice_timer);
9122 -+ bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
9123 -+ bfqd->idle_slice_timer.data = (unsigned long)bfqd;
9124 -+
9125 -+ bfqd->rq_pos_tree = RB_ROOT;
9126 -+
9127 -+ INIT_WORK(&bfqd->unplug_work, bfq_kick_queue);
9128 -+
9129 -+ INIT_LIST_HEAD(&bfqd->active_list);
9130 -+ INIT_LIST_HEAD(&bfqd->idle_list);
9131 -+
9132 -+ bfqd->hw_tag = -1;
9133 -+
9134 -+ bfqd->bfq_max_budget = bfq_default_max_budget;
9135 -+
9136 -+ bfqd->bfq_quantum = bfq_quantum;
9137 -+ bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0];
9138 -+ bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1];
9139 -+ bfqd->bfq_back_max = bfq_back_max;
9140 -+ bfqd->bfq_back_penalty = bfq_back_penalty;
9141 -+ bfqd->bfq_slice_idle = bfq_slice_idle;
9142 -+ bfqd->bfq_class_idle_last_service = 0;
9143 -+ bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq;
9144 -+ bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async;
9145 -+ bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync;
9146 -+
9147 -+ bfqd->low_latency = true;
9148 -+
9149 -+ bfqd->bfq_raising_coeff = 20;
9150 -+ bfqd->bfq_raising_rt_max_time = msecs_to_jiffies(300);
9151 -+ bfqd->bfq_raising_max_time = 0;
9152 -+ bfqd->bfq_raising_min_idle_time = msecs_to_jiffies(2000);
9153 -+ bfqd->bfq_raising_min_inter_arr_async = msecs_to_jiffies(500);
9154 -+ bfqd->bfq_raising_max_softrt_rate = 7000; /*
9155 -+ * Approximate rate required
9156 -+ * to playback or record a
9157 -+ * high-definition compressed
9158 -+ * video.
9159 -+ */
9160 -+ bfqd->raised_busy_queues = 0;
9161 -+
9162 -+ /* Initially estimate the device's peak rate as the reference rate */
9163 -+ if (blk_queue_nonrot(bfqd->queue)) {
9164 -+ bfqd->RT_prod = R_nonrot * T_nonrot;
9165 -+ bfqd->peak_rate = R_nonrot;
9166 -+ } else {
9167 -+ bfqd->RT_prod = R_rot * T_rot;
9168 -+ bfqd->peak_rate = R_rot;
9169 -+ }
9170 -+
9171 -+ return 0;
9172 -+}
9173 -+
9174 -+static void bfq_slab_kill(void)
9175 -+{
9176 -+ if (bfq_pool != NULL)
9177 -+ kmem_cache_destroy(bfq_pool);
9178 -+}
9179 -+
9180 -+static int __init bfq_slab_setup(void)
9181 -+{
9182 -+ bfq_pool = KMEM_CACHE(bfq_queue, 0);
9183 -+ if (bfq_pool == NULL)
9184 -+ return -ENOMEM;
9185 -+ return 0;
9186 -+}
9187 -+
9188 -+static ssize_t bfq_var_show(unsigned int var, char *page)
9189 -+{
9190 -+ return sprintf(page, "%d\n", var);
9191 -+}
9192 -+
9193 -+static ssize_t bfq_var_store(unsigned long *var, const char *page, size_t count)
9194 -+{
9195 -+ unsigned long new_val;
9196 -+ int ret = kstrtoul(page, 10, &new_val);
9197 -+
9198 -+ if (ret == 0)
9199 -+ *var = new_val;
9200 -+
9201 -+ return count;
9202 -+}
9203 -+
9204 -+static ssize_t bfq_raising_max_time_show(struct elevator_queue *e, char *page)
9205 -+{
9206 -+ struct bfq_data *bfqd = e->elevator_data;
9207 -+ return sprintf(page, "%d\n", bfqd->bfq_raising_max_time > 0 ?
9208 -+ jiffies_to_msecs(bfqd->bfq_raising_max_time) :
9209 -+ jiffies_to_msecs(bfq_wrais_duration(bfqd)));
9210 -+}
9211 -+
9212 -+static ssize_t bfq_weights_show(struct elevator_queue *e, char *page)
9213 -+{
9214 -+ struct bfq_queue *bfqq;
9215 -+ struct bfq_data *bfqd = e->elevator_data;
9216 -+ ssize_t num_char = 0;
9217 -+
9218 -+ num_char += sprintf(page + num_char, "Tot reqs queued %d\n\n",
9219 -+ bfqd->queued);
9220 -+
9221 -+ spin_lock_irq(bfqd->queue->queue_lock);
9222 -+
9223 -+ num_char += sprintf(page + num_char, "Active:\n");
9224 -+ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) {
9225 -+ num_char += sprintf(page + num_char,
9226 -+ "pid%d: weight %hu, nr_queued %d %d,"
9227 -+ " dur %d/%u\n",
9228 -+ bfqq->pid,
9229 -+ bfqq->entity.weight,
9230 -+ bfqq->queued[0],
9231 -+ bfqq->queued[1],
9232 -+ jiffies_to_msecs(jiffies -
9233 -+ bfqq->last_rais_start_finish),
9234 -+ jiffies_to_msecs(bfqq->raising_cur_max_time));
9235 -+ }
9236 -+
9237 -+ num_char += sprintf(page + num_char, "Idle:\n");
9238 -+ list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) {
9239 -+ num_char += sprintf(page + num_char,
9240 -+ "pid%d: weight %hu, dur %d/%u\n",
9241 -+ bfqq->pid,
9242 -+ bfqq->entity.weight,
9243 -+ jiffies_to_msecs(jiffies -
9244 -+ bfqq->last_rais_start_finish),
9245 -+ jiffies_to_msecs(bfqq->raising_cur_max_time));
9246 -+ }
9247 -+
9248 -+ spin_unlock_irq(bfqd->queue->queue_lock);
9249 -+
9250 -+ return num_char;
9251 -+}
9252 -+
9253 -+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
9254 -+static ssize_t __FUNC(struct elevator_queue *e, char *page) \
9255 -+{ \
9256 -+ struct bfq_data *bfqd = e->elevator_data; \
9257 -+ unsigned int __data = __VAR; \
9258 -+ if (__CONV) \
9259 -+ __data = jiffies_to_msecs(__data); \
9260 -+ return bfq_var_show(__data, (page)); \
9261 -+}
9262 -+SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0);
9263 -+SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1);
9264 -+SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1);
9265 -+SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
9266 -+SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);
9267 -+SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1);
9268 -+SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
9269 -+SHOW_FUNCTION(bfq_max_budget_async_rq_show, bfqd->bfq_max_budget_async_rq, 0);
9270 -+SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1);
9271 -+SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1);
9272 -+SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
9273 -+SHOW_FUNCTION(bfq_raising_coeff_show, bfqd->bfq_raising_coeff, 0);
9274 -+SHOW_FUNCTION(bfq_raising_rt_max_time_show, bfqd->bfq_raising_rt_max_time, 1);
9275 -+SHOW_FUNCTION(bfq_raising_min_idle_time_show, bfqd->bfq_raising_min_idle_time,
9276 -+ 1);
9277 -+SHOW_FUNCTION(bfq_raising_min_inter_arr_async_show,
9278 -+ bfqd->bfq_raising_min_inter_arr_async,
9279 -+ 1);
9280 -+SHOW_FUNCTION(bfq_raising_max_softrt_rate_show,
9281 -+ bfqd->bfq_raising_max_softrt_rate, 0);
9282 -+#undef SHOW_FUNCTION
9283 -+
9284 -+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
9285 -+static ssize_t \
9286 -+__FUNC(struct elevator_queue *e, const char *page, size_t count) \
9287 -+{ \
9288 -+ struct bfq_data *bfqd = e->elevator_data; \
9289 -+ unsigned long uninitialized_var(__data); \
9290 -+ int ret = bfq_var_store(&__data, (page), count); \
9291 -+ if (__data < (MIN)) \
9292 -+ __data = (MIN); \
9293 -+ else if (__data > (MAX)) \
9294 -+ __data = (MAX); \
9295 -+ if (__CONV) \
9296 -+ *(__PTR) = msecs_to_jiffies(__data); \
9297 -+ else \
9298 -+ *(__PTR) = __data; \
9299 -+ return ret; \
9300 -+}
9301 -+STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0);
9302 -+STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1,
9303 -+ INT_MAX, 1);
9304 -+STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1,
9305 -+ INT_MAX, 1);
9306 -+STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
9307 -+STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,
9308 -+ INT_MAX, 0);
9309 -+STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1);
9310 -+STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq,
9311 -+ 1, INT_MAX, 0);
9312 -+STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0,
9313 -+ INT_MAX, 1);
9314 -+STORE_FUNCTION(bfq_raising_coeff_store, &bfqd->bfq_raising_coeff, 1,
9315 -+ INT_MAX, 0);
9316 -+STORE_FUNCTION(bfq_raising_max_time_store, &bfqd->bfq_raising_max_time, 0,
9317 -+ INT_MAX, 1);
9318 -+STORE_FUNCTION(bfq_raising_rt_max_time_store, &bfqd->bfq_raising_rt_max_time, 0,
9319 -+ INT_MAX, 1);
9320 -+STORE_FUNCTION(bfq_raising_min_idle_time_store,
9321 -+ &bfqd->bfq_raising_min_idle_time, 0, INT_MAX, 1);
9322 -+STORE_FUNCTION(bfq_raising_min_inter_arr_async_store,
9323 -+ &bfqd->bfq_raising_min_inter_arr_async, 0, INT_MAX, 1);
9324 -+STORE_FUNCTION(bfq_raising_max_softrt_rate_store,
9325 -+ &bfqd->bfq_raising_max_softrt_rate, 0, INT_MAX, 0);
9326 -+#undef STORE_FUNCTION
9327 -+
9328 -+/* do nothing for the moment */
9329 -+static ssize_t bfq_weights_store(struct elevator_queue *e,
9330 -+ const char *page, size_t count)
9331 -+{
9332 -+ return count;
9333 -+}
9334 -+
9335 -+static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd)
9336 -+{
9337 -+ u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
9338 -+
9339 -+ if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES)
9340 -+ return bfq_calc_max_budget(bfqd->peak_rate, timeout);
9341 -+ else
9342 -+ return bfq_default_max_budget;
9343 -+}
9344 -+
9345 -+static ssize_t bfq_max_budget_store(struct elevator_queue *e,
9346 -+ const char *page, size_t count)
9347 -+{
9348 -+ struct bfq_data *bfqd = e->elevator_data;
9349 -+ unsigned long uninitialized_var(__data);
9350 -+ int ret = bfq_var_store(&__data, (page), count);
9351 -+
9352 -+ if (__data == 0)
9353 -+ bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);
9354 -+ else {
9355 -+ if (__data > INT_MAX)
9356 -+ __data = INT_MAX;
9357 -+ bfqd->bfq_max_budget = __data;
9358 -+ }
9359 -+
9360 -+ bfqd->bfq_user_max_budget = __data;
9361 -+
9362 -+ return ret;
9363 -+}
9364 -+
9365 -+static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
9366 -+ const char *page, size_t count)
9367 -+{
9368 -+ struct bfq_data *bfqd = e->elevator_data;
9369 -+ unsigned long uninitialized_var(__data);
9370 -+ int ret = bfq_var_store(&__data, (page), count);
9371 -+
9372 -+ if (__data < 1)
9373 -+ __data = 1;
9374 -+ else if (__data > INT_MAX)
9375 -+ __data = INT_MAX;
9376 -+
9377 -+ bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data);
9378 -+ if (bfqd->bfq_user_max_budget == 0)
9379 -+ bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);
9380 -+
9381 -+ return ret;
9382 -+}
9383 -+
9384 -+static ssize_t bfq_low_latency_store(struct elevator_queue *e,
9385 -+ const char *page, size_t count)
9386 -+{
9387 -+ struct bfq_data *bfqd = e->elevator_data;
9388 -+ unsigned long uninitialized_var(__data);
9389 -+ int ret = bfq_var_store(&__data, (page), count);
9390 -+
9391 -+ if (__data > 1)
9392 -+ __data = 1;
9393 -+ if (__data == 0 && bfqd->low_latency != 0)
9394 -+ bfq_end_raising(bfqd);
9395 -+ bfqd->low_latency = __data;
9396 -+
9397 -+ return ret;
9398 -+}
9399 -+
9400 -+#define BFQ_ATTR(name) \
9401 -+ __ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store)
9402 -+
9403 -+static struct elv_fs_entry bfq_attrs[] = {
9404 -+ BFQ_ATTR(quantum),
9405 -+ BFQ_ATTR(fifo_expire_sync),
9406 -+ BFQ_ATTR(fifo_expire_async),
9407 -+ BFQ_ATTR(back_seek_max),
9408 -+ BFQ_ATTR(back_seek_penalty),
9409 -+ BFQ_ATTR(slice_idle),
9410 -+ BFQ_ATTR(max_budget),
9411 -+ BFQ_ATTR(max_budget_async_rq),
9412 -+ BFQ_ATTR(timeout_sync),
9413 -+ BFQ_ATTR(timeout_async),
9414 -+ BFQ_ATTR(low_latency),
9415 -+ BFQ_ATTR(raising_coeff),
9416 -+ BFQ_ATTR(raising_max_time),
9417 -+ BFQ_ATTR(raising_rt_max_time),
9418 -+ BFQ_ATTR(raising_min_idle_time),
9419 -+ BFQ_ATTR(raising_min_inter_arr_async),
9420 -+ BFQ_ATTR(raising_max_softrt_rate),
9421 -+ BFQ_ATTR(weights),
9422 -+ __ATTR_NULL
9423 -+};
9424 -+
9425 -+static struct elevator_type iosched_bfq = {
9426 -+ .ops = {
9427 -+ .elevator_merge_fn = bfq_merge,
9428 -+ .elevator_merged_fn = bfq_merged_request,
9429 -+ .elevator_merge_req_fn = bfq_merged_requests,
9430 -+ .elevator_allow_merge_fn = bfq_allow_merge,
9431 -+ .elevator_dispatch_fn = bfq_dispatch_requests,
9432 -+ .elevator_add_req_fn = bfq_insert_request,
9433 -+ .elevator_activate_req_fn = bfq_activate_request,
9434 -+ .elevator_deactivate_req_fn = bfq_deactivate_request,
9435 -+ .elevator_completed_req_fn = bfq_completed_request,
9436 -+ .elevator_former_req_fn = elv_rb_former_request,
9437 -+ .elevator_latter_req_fn = elv_rb_latter_request,
9438 -+ .elevator_init_icq_fn = bfq_init_icq,
9439 -+ .elevator_exit_icq_fn = bfq_exit_icq,
9440 -+ .elevator_set_req_fn = bfq_set_request,
9441 -+ .elevator_put_req_fn = bfq_put_request,
9442 -+ .elevator_may_queue_fn = bfq_may_queue,
9443 -+ .elevator_init_fn = bfq_init_queue,
9444 -+ .elevator_exit_fn = bfq_exit_queue,
9445 -+ },
9446 -+ .icq_size = sizeof(struct bfq_io_cq),
9447 -+ .icq_align = __alignof__(struct bfq_io_cq),
9448 -+ .elevator_attrs = bfq_attrs,
9449 -+ .elevator_name = "bfq",
9450 -+ .elevator_owner = THIS_MODULE,
9451 -+};
9452 -+
9453 -+static int __init bfq_init(void)
9454 -+{
9455 -+ /*
9456 -+ * Can be 0 on HZ < 1000 setups.
9457 -+ */
9458 -+ if (bfq_slice_idle == 0)
9459 -+ bfq_slice_idle = 1;
9460 -+
9461 -+ if (bfq_timeout_async == 0)
9462 -+ bfq_timeout_async = 1;
9463 -+
9464 -+ if (bfq_slab_setup())
9465 -+ return -ENOMEM;
9466 -+
9467 -+ elv_register(&iosched_bfq);
9468 -+ pr_info("BFQ I/O-scheduler version: v7r2");
9469 -+
9470 -+ return 0;
9471 -+}
9472 -+
9473 -+static void __exit bfq_exit(void)
9474 -+{
9475 -+ elv_unregister(&iosched_bfq);
9476 -+ bfq_slab_kill();
9477 -+}
9478 -+
9479 -+module_init(bfq_init);
9480 -+module_exit(bfq_exit);
9481 -+
9482 -+MODULE_AUTHOR("Fabio Checconi, Paolo Valente");
9483 -diff --git a/block/bfq-sched.c b/block/bfq-sched.c
9484 -new file mode 100644
9485 -index 0000000..999b475
9486 ---- /dev/null
9487 -+++ b/block/bfq-sched.c
9488 -@@ -0,0 +1,1078 @@
9489 -+/*
9490 -+ * BFQ: Hierarchical B-WF2Q+ scheduler.
9491 -+ *
9492 -+ * Based on ideas and code from CFQ:
9493 -+ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk>
9494 -+ *
9495 -+ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it>
9496 -+ * Paolo Valente <paolo.valente@×××××××.it>
9497 -+ *
9498 -+ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it>
9499 -+ */
9500 -+
9501 -+#ifdef CONFIG_CGROUP_BFQIO
9502 -+#define for_each_entity(entity) \
9503 -+ for (; entity != NULL; entity = entity->parent)
9504 -+
9505 -+#define for_each_entity_safe(entity, parent) \
9506 -+ for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
9507 -+
9508 -+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
9509 -+ int extract,
9510 -+ struct bfq_data *bfqd);
9511 -+
9512 -+static inline void bfq_update_budget(struct bfq_entity *next_in_service)
9513 -+{
9514 -+ struct bfq_entity *bfqg_entity;
9515 -+ struct bfq_group *bfqg;
9516 -+ struct bfq_sched_data *group_sd;
9517 -+
9518 -+ BUG_ON(next_in_service == NULL);
9519 -+
9520 -+ group_sd = next_in_service->sched_data;
9521 -+
9522 -+ bfqg = container_of(group_sd, struct bfq_group, sched_data);
9523 -+ /*
9524 -+ * bfq_group's my_entity field is not NULL only if the group
9525 -+ * is not the root group. We must not touch the root entity
9526 -+ * as it must never become an in-service entity.
9527 -+ */
9528 -+ bfqg_entity = bfqg->my_entity;
9529 -+ if (bfqg_entity != NULL)
9530 -+ bfqg_entity->budget = next_in_service->budget;
9531 -+}
9532 -+
9533 -+static int bfq_update_next_in_service(struct bfq_sched_data *sd)
9534 -+{
9535 -+ struct bfq_entity *next_in_service;
9536 -+
9537 -+ if (sd->in_service_entity != NULL)
9538 -+ /* will update/requeue at the end of service */
9539 -+ return 0;
9540 -+
9541 -+ /*
9542 -+ * NOTE: this can be improved in many ways, such as returning
9543 -+ * 1 (and thus propagating upwards the update) only when the
9544 -+ * budget changes, or caching the bfqq that will be scheduled
9545 -+ * next from this subtree. By now we worry more about
9546 -+ * correctness than about performance...
9547 -+ */
9548 -+ next_in_service = bfq_lookup_next_entity(sd, 0, NULL);
9549 -+ sd->next_in_service = next_in_service;
9550 -+
9551 -+ if (next_in_service != NULL)
9552 -+ bfq_update_budget(next_in_service);
9553 -+
9554 -+ return 1;
9555 -+}
9556 -+
9557 -+static inline void bfq_check_next_in_service(struct bfq_sched_data *sd,
9558 -+ struct bfq_entity *entity)
9559 -+{
9560 -+ BUG_ON(sd->next_in_service != entity);
9561 -+}
9562 -+#else
9563 -+#define for_each_entity(entity) \
9564 -+ for (; entity != NULL; entity = NULL)
9565 -+
9566 -+#define for_each_entity_safe(entity, parent) \
9567 -+ for (parent = NULL; entity != NULL; entity = parent)
9568 -+
9569 -+static inline int bfq_update_next_in_service(struct bfq_sched_data *sd)
9570 -+{
9571 -+ return 0;
9572 -+}
9573 -+
9574 -+static inline void bfq_check_next_in_service(struct bfq_sched_data *sd,
9575 -+ struct bfq_entity *entity)
9576 -+{
9577 -+}
9578 -+
9579 -+static inline void bfq_update_budget(struct bfq_entity *next_in_service)
9580 -+{
9581 -+}
9582 -+#endif
9583 -+
9584 -+/*
9585 -+ * Shift for timestamp calculations. This actually limits the maximum
9586 -+ * service allowed in one timestamp delta (small shift values increase it),
9587 -+ * the maximum total weight that can be used for the queues in the system
9588 -+ * (big shift values increase it), and the period of virtual time wraparounds.
9589 -+ */
9590 -+#define WFQ_SERVICE_SHIFT 22
9591 -+
9592 -+/**
9593 -+ * bfq_gt - compare two timestamps.
9594 -+ * @a: first ts.
9595 -+ * @b: second ts.
9596 -+ *
9597 -+ * Return @a > @b, dealing with wrapping correctly.
9598 -+ */
9599 -+static inline int bfq_gt(u64 a, u64 b)
9600 -+{
9601 -+ return (s64)(a - b) > 0;
9602 -+}
9603 -+
9604 -+static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
9605 -+{
9606 -+ struct bfq_queue *bfqq = NULL;
9607 -+
9608 -+ BUG_ON(entity == NULL);
9609 -+
9610 -+ if (entity->my_sched_data == NULL)
9611 -+ bfqq = container_of(entity, struct bfq_queue, entity);
9612 -+
9613 -+ return bfqq;
9614 -+}
9615 -+
9616 -+
9617 -+/**
9618 -+ * bfq_delta - map service into the virtual time domain.
9619 -+ * @service: amount of service.
9620 -+ * @weight: scale factor (weight of an entity or weight sum).
9621 -+ */
9622 -+static inline u64 bfq_delta(unsigned long service,
9623 -+ unsigned long weight)
9624 -+{
9625 -+ u64 d = (u64)service << WFQ_SERVICE_SHIFT;
9626 -+
9627 -+ do_div(d, weight);
9628 -+ return d;
9629 -+}
9630 -+
9631 -+/**
9632 -+ * bfq_calc_finish - assign the finish time to an entity.
9633 -+ * @entity: the entity to act upon.
9634 -+ * @service: the service to be charged to the entity.
9635 -+ */
9636 -+static inline void bfq_calc_finish(struct bfq_entity *entity,
9637 -+ unsigned long service)
9638 -+{
9639 -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
9640 -+
9641 -+ BUG_ON(entity->weight == 0);
9642 -+
9643 -+ entity->finish = entity->start +
9644 -+ bfq_delta(service, entity->weight);
9645 -+
9646 -+ if (bfqq != NULL) {
9647 -+ bfq_log_bfqq(bfqq->bfqd, bfqq,
9648 -+ "calc_finish: serv %lu, w %d",
9649 -+ service, entity->weight);
9650 -+ bfq_log_bfqq(bfqq->bfqd, bfqq,
9651 -+ "calc_finish: start %llu, finish %llu, delta %llu",
9652 -+ entity->start, entity->finish,
9653 -+ bfq_delta(service, entity->weight));
9654 -+ }
9655 -+}
9656 -+
9657 -+/**
9658 -+ * bfq_entity_of - get an entity from a node.
9659 -+ * @node: the node field of the entity.
9660 -+ *
9661 -+ * Convert a node pointer to the relative entity. This is used only
9662 -+ * to simplify the logic of some functions and not as the generic
9663 -+ * conversion mechanism because, e.g., in the tree walking functions,
9664 -+ * the check for a %NULL value would be redundant.
9665 -+ */
9666 -+static inline struct bfq_entity *bfq_entity_of(struct rb_node *node)
9667 -+{
9668 -+ struct bfq_entity *entity = NULL;
9669 -+
9670 -+ if (node != NULL)
9671 -+ entity = rb_entry(node, struct bfq_entity, rb_node);
9672 -+
9673 -+ return entity;
9674 -+}
9675 -+
9676 -+/**
9677 -+ * bfq_extract - remove an entity from a tree.
9678 -+ * @root: the tree root.
9679 -+ * @entity: the entity to remove.
9680 -+ */
9681 -+static inline void bfq_extract(struct rb_root *root,
9682 -+ struct bfq_entity *entity)
9683 -+{
9684 -+ BUG_ON(entity->tree != root);
9685 -+
9686 -+ entity->tree = NULL;
9687 -+ rb_erase(&entity->rb_node, root);
9688 -+}
9689 -+
9690 -+/**
9691 -+ * bfq_idle_extract - extract an entity from the idle tree.
9692 -+ * @st: the service tree of the owning @entity.
9693 -+ * @entity: the entity being removed.
9694 -+ */
9695 -+static void bfq_idle_extract(struct bfq_service_tree *st,
9696 -+ struct bfq_entity *entity)
9697 -+{
9698 -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
9699 -+ struct rb_node *next;
9700 -+
9701 -+ BUG_ON(entity->tree != &st->idle);
9702 -+
9703 -+ if (entity == st->first_idle) {
9704 -+ next = rb_next(&entity->rb_node);
9705 -+ st->first_idle = bfq_entity_of(next);
9706 -+ }
9707 -+
9708 -+ if (entity == st->last_idle) {
9709 -+ next = rb_prev(&entity->rb_node);
9710 -+ st->last_idle = bfq_entity_of(next);
9711 -+ }
9712 -+
9713 -+ bfq_extract(&st->idle, entity);
9714 -+
9715 -+ if (bfqq != NULL)
9716 -+ list_del(&bfqq->bfqq_list);
9717 -+}
9718 -+
9719 -+/**
9720 -+ * bfq_insert - generic tree insertion.
9721 -+ * @root: tree root.
9722 -+ * @entity: entity to insert.
9723 -+ *
9724 -+ * This is used for the idle and the active tree, since they are both
9725 -+ * ordered by finish time.
9726 -+ */
9727 -+static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)
9728 -+{
9729 -+ struct bfq_entity *entry;
9730 -+ struct rb_node **node = &root->rb_node;
9731 -+ struct rb_node *parent = NULL;
9732 -+
9733 -+ BUG_ON(entity->tree != NULL);
9734 -+
9735 -+ while (*node != NULL) {
9736 -+ parent = *node;
9737 -+ entry = rb_entry(parent, struct bfq_entity, rb_node);
9738 -+
9739 -+ if (bfq_gt(entry->finish, entity->finish))
9740 -+ node = &parent->rb_left;
9741 -+ else
9742 -+ node = &parent->rb_right;
9743 -+ }
9744 -+
9745 -+ rb_link_node(&entity->rb_node, parent, node);
9746 -+ rb_insert_color(&entity->rb_node, root);
9747 -+
9748 -+ entity->tree = root;
9749 -+}
9750 -+
9751 -+/**
9752 -+ * bfq_update_min - update the min_start field of a entity.
9753 -+ * @entity: the entity to update.
9754 -+ * @node: one of its children.
9755 -+ *
9756 -+ * This function is called when @entity may store an invalid value for
9757 -+ * min_start due to updates to the active tree. The function assumes
9758 -+ * that the subtree rooted at @node (which may be its left or its right
9759 -+ * child) has a valid min_start value.
9760 -+ */
9761 -+static inline void bfq_update_min(struct bfq_entity *entity,
9762 -+ struct rb_node *node)
9763 -+{
9764 -+ struct bfq_entity *child;
9765 -+
9766 -+ if (node != NULL) {
9767 -+ child = rb_entry(node, struct bfq_entity, rb_node);
9768 -+ if (bfq_gt(entity->min_start, child->min_start))
9769 -+ entity->min_start = child->min_start;
9770 -+ }
9771 -+}
9772 -+
9773 -+/**
9774 -+ * bfq_update_active_node - recalculate min_start.
9775 -+ * @node: the node to update.
9776 -+ *
9777 -+ * @node may have changed position or one of its children may have moved,
9778 -+ * this function updates its min_start value. The left and right subtrees
9779 -+ * are assumed to hold a correct min_start value.
9780 -+ */
9781 -+static inline void bfq_update_active_node(struct rb_node *node)
9782 -+{
9783 -+ struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node);
9784 -+
9785 -+ entity->min_start = entity->start;
9786 -+ bfq_update_min(entity, node->rb_right);
9787 -+ bfq_update_min(entity, node->rb_left);
9788 -+}
9789 -+
9790 -+/**
9791 -+ * bfq_update_active_tree - update min_start for the whole active tree.
9792 -+ * @node: the starting node.
9793 -+ *
9794 -+ * @node must be the deepest modified node after an update. This function
9795 -+ * updates its min_start using the values held by its children, assuming
9796 -+ * that they did not change, and then updates all the nodes that may have
9797 -+ * changed in the path to the root. The only nodes that may have changed
9798 -+ * are the ones in the path or their siblings.
9799 -+ */
9800 -+static void bfq_update_active_tree(struct rb_node *node)
9801 -+{
9802 -+ struct rb_node *parent;
9803 -+
9804 -+up:
9805 -+ bfq_update_active_node(node);
9806 -+
9807 -+ parent = rb_parent(node);
9808 -+ if (parent == NULL)
9809 -+ return;
9810 -+
9811 -+ if (node == parent->rb_left && parent->rb_right != NULL)
9812 -+ bfq_update_active_node(parent->rb_right);
9813 -+ else if (parent->rb_left != NULL)
9814 -+ bfq_update_active_node(parent->rb_left);
9815 -+
9816 -+ node = parent;
9817 -+ goto up;
9818 -+}
9819 -+
9820 -+/**
9821 -+ * bfq_active_insert - insert an entity in the active tree of its group/device.
9822 -+ * @st: the service tree of the entity.
9823 -+ * @entity: the entity being inserted.
9824 -+ *
9825 -+ * The active tree is ordered by finish time, but an extra key is kept
9826 -+ * per each node, containing the minimum value for the start times of
9827 -+ * its children (and the node itself), so it's possible to search for
9828 -+ * the eligible node with the lowest finish time in logarithmic time.
9829 -+ */
9830 -+static void bfq_active_insert(struct bfq_service_tree *st,
9831 -+ struct bfq_entity *entity)
9832 -+{
9833 -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
9834 -+ struct rb_node *node = &entity->rb_node;
9835 -+
9836 -+ bfq_insert(&st->active, entity);
9837 -+
9838 -+ if (node->rb_left != NULL)
9839 -+ node = node->rb_left;
9840 -+ else if (node->rb_right != NULL)
9841 -+ node = node->rb_right;
9842 -+
9843 -+ bfq_update_active_tree(node);
9844 -+
9845 -+ if (bfqq != NULL)
9846 -+ list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);
9847 -+}
9848 -+
9849 -+/**
9850 -+ * bfq_ioprio_to_weight - calc a weight from an ioprio.
9851 -+ * @ioprio: the ioprio value to convert.
9852 -+ */
9853 -+static unsigned short bfq_ioprio_to_weight(int ioprio)
9854 -+{
9855 -+ WARN_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);
9856 -+ return IOPRIO_BE_NR - ioprio;
9857 -+}
9858 -+
9859 -+/**
9860 -+ * bfq_weight_to_ioprio - calc an ioprio from a weight.
9861 -+ * @weight: the weight value to convert.
9862 -+ *
9863 -+ * To preserve as mush as possible the old only-ioprio user interface,
9864 -+ * 0 is used as an escape ioprio value for weights (numerically) equal or
9865 -+ * larger than IOPRIO_BE_NR
9866 -+ */
9867 -+static unsigned short bfq_weight_to_ioprio(int weight)
9868 -+{
9869 -+ WARN_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT);
9870 -+ return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight;
9871 -+}
9872 -+
9873 -+static inline void bfq_get_entity(struct bfq_entity *entity)
9874 -+{
9875 -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
9876 -+ struct bfq_sched_data *sd;
9877 -+
9878 -+ if (bfqq != NULL) {
9879 -+ sd = entity->sched_data;
9880 -+ atomic_inc(&bfqq->ref);
9881 -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
9882 -+ bfqq, atomic_read(&bfqq->ref));
9883 -+ }
9884 -+}
9885 -+
9886 -+/**
9887 -+ * bfq_find_deepest - find the deepest node that an extraction can modify.
9888 -+ * @node: the node being removed.
9889 -+ *
9890 -+ * Do the first step of an extraction in an rb tree, looking for the
9891 -+ * node that will replace @node, and returning the deepest node that
9892 -+ * the following modifications to the tree can touch. If @node is the
9893 -+ * last node in the tree return %NULL.
9894 -+ */
9895 -+static struct rb_node *bfq_find_deepest(struct rb_node *node)
9896 -+{
9897 -+ struct rb_node *deepest;
9898 -+
9899 -+ if (node->rb_right == NULL && node->rb_left == NULL)
9900 -+ deepest = rb_parent(node);
9901 -+ else if (node->rb_right == NULL)
9902 -+ deepest = node->rb_left;
9903 -+ else if (node->rb_left == NULL)
9904 -+ deepest = node->rb_right;
9905 -+ else {
9906 -+ deepest = rb_next(node);
9907 -+ if (deepest->rb_right != NULL)
9908 -+ deepest = deepest->rb_right;
9909 -+ else if (rb_parent(deepest) != node)
9910 -+ deepest = rb_parent(deepest);
9911 -+ }
9912 -+
9913 -+ return deepest;
9914 -+}
9915 -+
9916 -+/**
9917 -+ * bfq_active_extract - remove an entity from the active tree.
9918 -+ * @st: the service_tree containing the tree.
9919 -+ * @entity: the entity being removed.
9920 -+ */
9921 -+static void bfq_active_extract(struct bfq_service_tree *st,
9922 -+ struct bfq_entity *entity)
9923 -+{
9924 -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
9925 -+ struct rb_node *node;
9926 -+
9927 -+ node = bfq_find_deepest(&entity->rb_node);
9928 -+ bfq_extract(&st->active, entity);
9929 -+
9930 -+ if (node != NULL)
9931 -+ bfq_update_active_tree(node);
9932 -+
9933 -+ if (bfqq != NULL)
9934 -+ list_del(&bfqq->bfqq_list);
9935 -+}
9936 -+
9937 -+/**
9938 -+ * bfq_idle_insert - insert an entity into the idle tree.
9939 -+ * @st: the service tree containing the tree.
9940 -+ * @entity: the entity to insert.
9941 -+ */
9942 -+static void bfq_idle_insert(struct bfq_service_tree *st,
9943 -+ struct bfq_entity *entity)
9944 -+{
9945 -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
9946 -+ struct bfq_entity *first_idle = st->first_idle;
9947 -+ struct bfq_entity *last_idle = st->last_idle;
9948 -+
9949 -+ if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish))
9950 -+ st->first_idle = entity;
9951 -+ if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish))
9952 -+ st->last_idle = entity;
9953 -+
9954 -+ bfq_insert(&st->idle, entity);
9955 -+
9956 -+ if (bfqq != NULL)
9957 -+ list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list);
9958 -+}
9959 -+
9960 -+/**
9961 -+ * bfq_forget_entity - remove an entity from the wfq trees.
9962 -+ * @st: the service tree.
9963 -+ * @entity: the entity being removed.
9964 -+ *
9965 -+ * Update the device status and forget everything about @entity, putting
9966 -+ * the device reference to it, if it is a queue. Entities belonging to
9967 -+ * groups are not refcounted.
9968 -+ */
9969 -+static void bfq_forget_entity(struct bfq_service_tree *st,
9970 -+ struct bfq_entity *entity)
9971 -+{
9972 -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
9973 -+ struct bfq_sched_data *sd;
9974 -+
9975 -+ BUG_ON(!entity->on_st);
9976 -+
9977 -+ entity->on_st = 0;
9978 -+ st->wsum -= entity->weight;
9979 -+ if (bfqq != NULL) {
9980 -+ sd = entity->sched_data;
9981 -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d",
9982 -+ bfqq, atomic_read(&bfqq->ref));
9983 -+ bfq_put_queue(bfqq);
9984 -+ }
9985 -+}
9986 -+
9987 -+/**
9988 -+ * bfq_put_idle_entity - release the idle tree ref of an entity.
9989 -+ * @st: service tree for the entity.
9990 -+ * @entity: the entity being released.
9991 -+ */
9992 -+static void bfq_put_idle_entity(struct bfq_service_tree *st,
9993 -+ struct bfq_entity *entity)
9994 -+{
9995 -+ bfq_idle_extract(st, entity);
9996 -+ bfq_forget_entity(st, entity);
9997 -+}
9998 -+
9999 -+/**
10000 -+ * bfq_forget_idle - update the idle tree if necessary.
10001 -+ * @st: the service tree to act upon.
10002 -+ *
10003 -+ * To preserve the global O(log N) complexity we only remove one entry here;
10004 -+ * as the idle tree will not grow indefinitely this can be done safely.
10005 -+ */
10006 -+static void bfq_forget_idle(struct bfq_service_tree *st)
10007 -+{
10008 -+ struct bfq_entity *first_idle = st->first_idle;
10009 -+ struct bfq_entity *last_idle = st->last_idle;
10010 -+
10011 -+ if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL &&
10012 -+ !bfq_gt(last_idle->finish, st->vtime)) {
10013 -+ /*
10014 -+ * Forget the whole idle tree, increasing the vtime past
10015 -+ * the last finish time of idle entities.
10016 -+ */
10017 -+ st->vtime = last_idle->finish;
10018 -+ }
10019 -+
10020 -+ if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime))
10021 -+ bfq_put_idle_entity(st, first_idle);
10022 -+}
10023 -+
10024 -+static struct bfq_service_tree *
10025 -+__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
10026 -+ struct bfq_entity *entity)
10027 -+{
10028 -+ struct bfq_service_tree *new_st = old_st;
10029 -+
10030 -+ if (entity->ioprio_changed) {
10031 -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
10032 -+
10033 -+ BUG_ON(old_st->wsum < entity->weight);
10034 -+ old_st->wsum -= entity->weight;
10035 -+
10036 -+ if (entity->new_weight != entity->orig_weight) {
10037 -+ entity->orig_weight = entity->new_weight;
10038 -+ entity->ioprio =
10039 -+ bfq_weight_to_ioprio(entity->orig_weight);
10040 -+ } else if (entity->new_ioprio != entity->ioprio) {
10041 -+ entity->ioprio = entity->new_ioprio;
10042 -+ entity->orig_weight =
10043 -+ bfq_ioprio_to_weight(entity->ioprio);
10044 -+ } else
10045 -+ entity->new_weight = entity->orig_weight =
10046 -+ bfq_ioprio_to_weight(entity->ioprio);
10047 -+
10048 -+ entity->ioprio_class = entity->new_ioprio_class;
10049 -+ entity->ioprio_changed = 0;
10050 -+
10051 -+ /*
10052 -+ * NOTE: here we may be changing the weight too early,
10053 -+ * this will cause unfairness. The correct approach
10054 -+ * would have required additional complexity to defer
10055 -+ * weight changes to the proper time instants (i.e.,
10056 -+ * when entity->finish <= old_st->vtime).
10057 -+ */
10058 -+ new_st = bfq_entity_service_tree(entity);
10059 -+ entity->weight = entity->orig_weight *
10060 -+ (bfqq != NULL ? bfqq->raising_coeff : 1);
10061 -+ new_st->wsum += entity->weight;
10062 -+
10063 -+ if (new_st != old_st)
10064 -+ entity->start = new_st->vtime;
10065 -+ }
10066 -+
10067 -+ return new_st;
10068 -+}
10069 -+
10070 -+/**
10071 -+ * bfq_bfqq_served - update the scheduler status after selection for service.
10072 -+ * @bfqq: the queue being served.
10073 -+ * @served: bytes to transfer.
10074 -+ *
10075 -+ * NOTE: this can be optimized, as the timestamps of upper level entities
10076 -+ * are synchronized every time a new bfqq is selected for service. By now,
10077 -+ * we keep it to better check consistency.
10078 -+ */
10079 -+static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served)
10080 -+{
10081 -+ struct bfq_entity *entity = &bfqq->entity;
10082 -+ struct bfq_service_tree *st;
10083 -+
10084 -+ for_each_entity(entity) {
10085 -+ st = bfq_entity_service_tree(entity);
10086 -+
10087 -+ entity->service += served;
10088 -+ BUG_ON(entity->service > entity->budget);
10089 -+ BUG_ON(st->wsum == 0);
10090 -+
10091 -+ st->vtime += bfq_delta(served, st->wsum);
10092 -+ bfq_forget_idle(st);
10093 -+ }
10094 -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served);
10095 -+}
10096 -+
10097 -+/**
10098 -+ * bfq_bfqq_charge_full_budget - set the service to the entity budget.
10099 -+ * @bfqq: the queue that needs a service update.
10100 -+ *
10101 -+ * When it's not possible to be fair in the service domain, because
10102 -+ * a queue is not consuming its budget fast enough (the meaning of
10103 -+ * fast depends on the timeout parameter), we charge it a full
10104 -+ * budget. In this way we should obtain a sort of time-domain
10105 -+ * fairness among all the seeky/slow queues.
10106 -+ */
10107 -+static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)
10108 -+{
10109 -+ struct bfq_entity *entity = &bfqq->entity;
10110 -+
10111 -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget");
10112 -+
10113 -+ bfq_bfqq_served(bfqq, entity->budget - entity->service);
10114 -+}
10115 -+
10116 -+/**
10117 -+ * __bfq_activate_entity - activate an entity.
10118 -+ * @entity: the entity being activated.
10119 -+ *
10120 -+ * Called whenever an entity is activated, i.e., it is not active and one
10121 -+ * of its children receives a new request, or has to be reactivated due to
10122 -+ * budget exhaustion. It uses the current budget of the entity (and the
10123 -+ * service received if @entity is active) of the queue to calculate its
10124 -+ * timestamps.
10125 -+ */
10126 -+static void __bfq_activate_entity(struct bfq_entity *entity)
10127 -+{
10128 -+ struct bfq_sched_data *sd = entity->sched_data;
10129 -+ struct bfq_service_tree *st = bfq_entity_service_tree(entity);
10130 -+
10131 -+ if (entity == sd->in_service_entity) {
10132 -+ BUG_ON(entity->tree != NULL);
10133 -+ /*
10134 -+ * If we are requeueing the current entity we have
10135 -+ * to take care of not charging to it service it has
10136 -+ * not received.
10137 -+ */
10138 -+ bfq_calc_finish(entity, entity->service);
10139 -+ entity->start = entity->finish;
10140 -+ sd->in_service_entity = NULL;
10141 -+ } else if (entity->tree == &st->active) {
10142 -+ /*
10143 -+ * Requeueing an entity due to a change of some
10144 -+ * next_in_service entity below it. We reuse the
10145 -+ * old start time.
10146 -+ */
10147 -+ bfq_active_extract(st, entity);
10148 -+ } else if (entity->tree == &st->idle) {
10149 -+ /*
10150 -+ * Must be on the idle tree, bfq_idle_extract() will
10151 -+ * check for that.
10152 -+ */
10153 -+ bfq_idle_extract(st, entity);
10154 -+ entity->start = bfq_gt(st->vtime, entity->finish) ?
10155 -+ st->vtime : entity->finish;
10156 -+ } else {
10157 -+ /*
10158 -+ * The finish time of the entity may be invalid, and
10159 -+ * it is in the past for sure, otherwise the queue
10160 -+ * would have been on the idle tree.
10161 -+ */
10162 -+ entity->start = st->vtime;
10163 -+ st->wsum += entity->weight;
10164 -+ bfq_get_entity(entity);
10165 -+
10166 -+ BUG_ON(entity->on_st);
10167 -+ entity->on_st = 1;
10168 -+ }
10169 -+
10170 -+ st = __bfq_entity_update_weight_prio(st, entity);
10171 -+ bfq_calc_finish(entity, entity->budget);
10172 -+ bfq_active_insert(st, entity);
10173 -+}
10174 -+
10175 -+/**
10176 -+ * bfq_activate_entity - activate an entity and its ancestors if necessary.
10177 -+ * @entity: the entity to activate.
10178 -+ *
10179 -+ * Activate @entity and all the entities on the path from it to the root.
10180 -+ */
10181 -+static void bfq_activate_entity(struct bfq_entity *entity)
10182 -+{
10183 -+ struct bfq_sched_data *sd;
10184 -+
10185 -+ for_each_entity(entity) {
10186 -+ __bfq_activate_entity(entity);
10187 -+
10188 -+ sd = entity->sched_data;
10189 -+ if (!bfq_update_next_in_service(sd))
10190 -+ /*
10191 -+ * No need to propagate the activation to the
10192 -+ * upper entities, as they will be updated when
10193 -+ * the in-service entity is rescheduled.
10194 -+ */
10195 -+ break;
10196 -+ }
10197 -+}
10198 -+
10199 -+/**
10200 -+ * __bfq_deactivate_entity - deactivate an entity from its service tree.
10201 -+ * @entity: the entity to deactivate.
10202 -+ * @requeue: if false, the entity will not be put into the idle tree.
10203 -+ *
10204 -+ * Deactivate an entity, independently from its previous state. If the
10205 -+ * entity was not on a service tree just return, otherwise if it is on
10206 -+ * any scheduler tree, extract it from that tree, and if necessary
10207 -+ * and if the caller did not specify @requeue, put it on the idle tree.
10208 -+ *
10209 -+ * Return %1 if the caller should update the entity hierarchy, i.e.,
10210 -+ * if the entity was under service or if it was the next_in_service for
10211 -+ * its sched_data; return %0 otherwise.
10212 -+ */
10213 -+static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
10214 -+{
10215 -+ struct bfq_sched_data *sd = entity->sched_data;
10216 -+ struct bfq_service_tree *st = bfq_entity_service_tree(entity);
10217 -+ int was_in_service = entity == sd->in_service_entity;
10218 -+ int ret = 0;
10219 -+
10220 -+ if (!entity->on_st)
10221 -+ return 0;
10222 -+
10223 -+ BUG_ON(was_in_service && entity->tree != NULL);
10224 -+
10225 -+ if (was_in_service) {
10226 -+ bfq_calc_finish(entity, entity->service);
10227 -+ sd->in_service_entity = NULL;
10228 -+ } else if (entity->tree == &st->active)
10229 -+ bfq_active_extract(st, entity);
10230 -+ else if (entity->tree == &st->idle)
10231 -+ bfq_idle_extract(st, entity);
10232 -+ else if (entity->tree != NULL)
10233 -+ BUG();
10234 -+
10235 -+ if (was_in_service || sd->next_in_service == entity)
10236 -+ ret = bfq_update_next_in_service(sd);
10237 -+
10238 -+ if (!requeue || !bfq_gt(entity->finish, st->vtime))
10239 -+ bfq_forget_entity(st, entity);
10240 -+ else
10241 -+ bfq_idle_insert(st, entity);
10242 -+
10243 -+ BUG_ON(sd->in_service_entity == entity);
10244 -+ BUG_ON(sd->next_in_service == entity);
10245 -+
10246 -+ return ret;
10247 -+}
10248 -+
10249 -+/**
10250 -+ * bfq_deactivate_entity - deactivate an entity.
10251 -+ * @entity: the entity to deactivate.
10252 -+ * @requeue: true if the entity can be put on the idle tree
10253 -+ */
10254 -+static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
10255 -+{
10256 -+ struct bfq_sched_data *sd;
10257 -+ struct bfq_entity *parent;
10258 -+
10259 -+ for_each_entity_safe(entity, parent) {
10260 -+ sd = entity->sched_data;
10261 -+
10262 -+ if (!__bfq_deactivate_entity(entity, requeue))
10263 -+ /*
10264 -+ * The parent entity is still backlogged, and
10265 -+ * we don't need to update it as it is still
10266 -+ * under service.
10267 -+ */
10268 -+ break;
10269 -+
10270 -+ if (sd->next_in_service != NULL)
10271 -+ /*
10272 -+ * The parent entity is still backlogged and
10273 -+ * the budgets on the path towards the root
10274 -+ * need to be updated.
10275 -+ */
10276 -+ goto update;
10277 -+
10278 -+ /*
10279 -+ * If we reach there the parent is no more backlogged and
10280 -+ * we want to propagate the dequeue upwards.
10281 -+ */
10282 -+ requeue = 1;
10283 -+ }
10284 -+
10285 -+ return;
10286 -+
10287 -+update:
10288 -+ entity = parent;
10289 -+ for_each_entity(entity) {
10290 -+ __bfq_activate_entity(entity);
10291 -+
10292 -+ sd = entity->sched_data;
10293 -+ if (!bfq_update_next_in_service(sd))
10294 -+ break;
10295 -+ }
10296 -+}
10297 -+
10298 -+/**
10299 -+ * bfq_update_vtime - update vtime if necessary.
10300 -+ * @st: the service tree to act upon.
10301 -+ *
10302 -+ * If necessary update the service tree vtime to have at least one
10303 -+ * eligible entity, skipping to its start time. Assumes that the
10304 -+ * active tree of the device is not empty.
10305 -+ *
10306 -+ * NOTE: this hierarchical implementation updates vtimes quite often,
10307 -+ * we may end up with reactivated tasks getting timestamps after a
10308 -+ * vtime skip done because we needed a ->first_active entity on some
10309 -+ * intermediate node.
10310 -+ */
10311 -+static void bfq_update_vtime(struct bfq_service_tree *st)
10312 -+{
10313 -+ struct bfq_entity *entry;
10314 -+ struct rb_node *node = st->active.rb_node;
10315 -+
10316 -+ entry = rb_entry(node, struct bfq_entity, rb_node);
10317 -+ if (bfq_gt(entry->min_start, st->vtime)) {
10318 -+ st->vtime = entry->min_start;
10319 -+ bfq_forget_idle(st);
10320 -+ }
10321 -+}
10322 -+
10323 -+/**
10324 -+ * bfq_first_active_entity - find the eligible entity with
10325 -+ * the smallest finish time
10326 -+ * @st: the service tree to select from.
10327 -+ *
10328 -+ * This function searches the first schedulable entity, starting from the
10329 -+ * root of the tree and going on the left every time on this side there is
10330 -+ * a subtree with at least one eligible (start >= vtime) entity. The path
10331 -+ * on the right is followed only if a) the left subtree contains no eligible
10332 -+ * entities and b) no eligible entity has been found yet.
10333 -+ */
10334 -+static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
10335 -+{
10336 -+ struct bfq_entity *entry, *first = NULL;
10337 -+ struct rb_node *node = st->active.rb_node;
10338 -+
10339 -+ while (node != NULL) {
10340 -+ entry = rb_entry(node, struct bfq_entity, rb_node);
10341 -+left:
10342 -+ if (!bfq_gt(entry->start, st->vtime))
10343 -+ first = entry;
10344 -+
10345 -+ BUG_ON(bfq_gt(entry->min_start, st->vtime));
10346 -+
10347 -+ if (node->rb_left != NULL) {
10348 -+ entry = rb_entry(node->rb_left,
10349 -+ struct bfq_entity, rb_node);
10350 -+ if (!bfq_gt(entry->min_start, st->vtime)) {
10351 -+ node = node->rb_left;
10352 -+ goto left;
10353 -+ }
10354 -+ }
10355 -+ if (first != NULL)
10356 -+ break;
10357 -+ node = node->rb_right;
10358 -+ }
10359 -+
10360 -+ BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active));
10361 -+ return first;
10362 -+}
10363 -+
10364 -+/**
10365 -+ * __bfq_lookup_next_entity - return the first eligible entity in @st.
10366 -+ * @st: the service tree.
10367 -+ *
10368 -+ * Update the virtual time in @st and return the first eligible entity
10369 -+ * it contains.
10370 -+ */
10371 -+static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st,
10372 -+ bool force)
10373 -+{
10374 -+ struct bfq_entity *entity, *new_next_in_service = NULL;
10375 -+
10376 -+ if (RB_EMPTY_ROOT(&st->active))
10377 -+ return NULL;
10378 -+
10379 -+ bfq_update_vtime(st);
10380 -+ entity = bfq_first_active_entity(st);
10381 -+ BUG_ON(bfq_gt(entity->start, st->vtime));
10382 -+
10383 -+ /*
10384 -+ * If the chosen entity does not match with the sched_data's
10385 -+ * next_in_service and we are forcedly serving the IDLE priority
10386 -+ * class tree, bubble up budget update.
10387 -+ */
10388 -+ if (unlikely(force && entity != entity->sched_data->next_in_service)) {
10389 -+ new_next_in_service = entity;
10390 -+ for_each_entity(new_next_in_service)
10391 -+ bfq_update_budget(new_next_in_service);
10392 -+ }
10393 -+
10394 -+ return entity;
10395 -+}
10396 -+
10397 -+/**
10398 -+ * bfq_lookup_next_entity - return the first eligible entity in @sd.
10399 -+ * @sd: the sched_data.
10400 -+ * @extract: if true the returned entity will be also extracted from @sd.
10401 -+ *
10402 -+ * NOTE: since we cache the next_in_service entity at each level of the
10403 -+ * hierarchy, the complexity of the lookup can be decreased with
10404 -+ * absolutely no effort just returning the cached next_in_service value;
10405 -+ * we prefer to do full lookups to test the consistency of * the data
10406 -+ * structures.
10407 -+ */
10408 -+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
10409 -+ int extract,
10410 -+ struct bfq_data *bfqd)
10411 -+{
10412 -+ struct bfq_service_tree *st = sd->service_tree;
10413 -+ struct bfq_entity *entity;
10414 -+ int i = 0;
10415 -+
10416 -+ BUG_ON(sd->in_service_entity != NULL);
10417 -+
10418 -+ if (bfqd != NULL &&
10419 -+ jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) {
10420 -+ entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1,
10421 -+ true);
10422 -+ if (entity != NULL) {
10423 -+ i = BFQ_IOPRIO_CLASSES - 1;
10424 -+ bfqd->bfq_class_idle_last_service = jiffies;
10425 -+ sd->next_in_service = entity;
10426 -+ }
10427 -+ }
10428 -+ for (; i < BFQ_IOPRIO_CLASSES; i++) {
10429 -+ entity = __bfq_lookup_next_entity(st + i, false);
10430 -+ if (entity != NULL) {
10431 -+ if (extract) {
10432 -+ bfq_check_next_in_service(sd, entity);
10433 -+ bfq_active_extract(st + i, entity);
10434 -+ sd->in_service_entity = entity;
10435 -+ sd->next_in_service = NULL;
10436 -+ }
10437 -+ break;
10438 -+ }
10439 -+ }
10440 -+
10441 -+ return entity;
10442 -+}
10443 -+
10444 -+/*
10445 -+ * Get next queue for service.
10446 -+ */
10447 -+static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
10448 -+{
10449 -+ struct bfq_entity *entity = NULL;
10450 -+ struct bfq_sched_data *sd;
10451 -+ struct bfq_queue *bfqq;
10452 -+
10453 -+ BUG_ON(bfqd->in_service_queue != NULL);
10454 -+
10455 -+ if (bfqd->busy_queues == 0)
10456 -+ return NULL;
10457 -+
10458 -+ sd = &bfqd->root_group->sched_data;
10459 -+ for (; sd != NULL; sd = entity->my_sched_data) {
10460 -+ entity = bfq_lookup_next_entity(sd, 1, bfqd);
10461 -+ BUG_ON(entity == NULL);
10462 -+ entity->service = 0;
10463 -+ }
10464 -+
10465 -+ bfqq = bfq_entity_to_bfqq(entity);
10466 -+ BUG_ON(bfqq == NULL);
10467 -+
10468 -+ return bfqq;
10469 -+}
10470 -+
10471 -+/*
10472 -+ * Forced extraction of the given queue.
10473 -+ */
10474 -+static void bfq_get_next_queue_forced(struct bfq_data *bfqd,
10475 -+ struct bfq_queue *bfqq)
10476 -+{
10477 -+ struct bfq_entity *entity;
10478 -+ struct bfq_sched_data *sd;
10479 -+
10480 -+ BUG_ON(bfqd->in_service_queue != NULL);
10481 -+
10482 -+ entity = &bfqq->entity;
10483 -+ /*
10484 -+ * Bubble up extraction/update from the leaf to the root.
10485 -+ */
10486 -+ for_each_entity(entity) {
10487 -+ sd = entity->sched_data;
10488 -+ bfq_update_budget(entity);
10489 -+ bfq_update_vtime(bfq_entity_service_tree(entity));
10490 -+ bfq_active_extract(bfq_entity_service_tree(entity), entity);
10491 -+ sd->active_entity = entity;
10492 -+ sd->next_active = NULL;
10493 -+ entity->service = 0;
10494 -+ }
10495 -+
10496 -+ return;
10497 -+}
10498 -+
10499 -+static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
10500 -+{
10501 -+ if (bfqd->in_service_bic != NULL) {
10502 -+ put_io_context(bfqd->in_service_bic->icq.ioc);
10503 -+ bfqd->in_service_bic = NULL;
10504 -+ }
10505 -+
10506 -+ bfqd->in_service_queue = NULL;
10507 -+ del_timer(&bfqd->idle_slice_timer);
10508 -+}
10509 -+
10510 -+static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
10511 -+ int requeue)
10512 -+{
10513 -+ struct bfq_entity *entity = &bfqq->entity;
10514 -+
10515 -+ if (bfqq == bfqd->in_service_queue)
10516 -+ __bfq_bfqd_reset_in_service(bfqd);
10517 -+
10518 -+ bfq_deactivate_entity(entity, requeue);
10519 -+}
10520 -+
10521 -+static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
10522 -+{
10523 -+ struct bfq_entity *entity = &bfqq->entity;
10524 -+
10525 -+ bfq_activate_entity(entity);
10526 -+}
10527 -+
10528 -+/*
10529 -+ * Called when the bfqq no longer has requests pending, remove it from
10530 -+ * the service tree.
10531 -+ */
10532 -+static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
10533 -+ int requeue)
10534 -+{
10535 -+ BUG_ON(!bfq_bfqq_busy(bfqq));
10536 -+ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
10537 -+
10538 -+ bfq_log_bfqq(bfqd, bfqq, "del from busy");
10539 -+
10540 -+ bfq_clear_bfqq_busy(bfqq);
10541 -+
10542 -+ BUG_ON(bfqd->busy_queues == 0);
10543 -+ bfqd->busy_queues--;
10544 -+ if (bfqq->raising_coeff > 1)
10545 -+ bfqd->raised_busy_queues--;
10546 -+
10547 -+ bfq_deactivate_bfqq(bfqd, bfqq, requeue);
10548 -+}
10549 -+
10550 -+/*
10551 -+ * Called when an inactive queue receives a new request.
10552 -+ */
10553 -+static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
10554 -+{
10555 -+ BUG_ON(bfq_bfqq_busy(bfqq));
10556 -+ BUG_ON(bfqq == bfqd->in_service_queue);
10557 -+
10558 -+ bfq_log_bfqq(bfqd, bfqq, "add to busy");
10559 -+
10560 -+ bfq_activate_bfqq(bfqd, bfqq);
10561 -+
10562 -+ bfq_mark_bfqq_busy(bfqq);
10563 -+ bfqd->busy_queues++;
10564 -+ if (bfqq->raising_coeff > 1)
10565 -+ bfqd->raised_busy_queues++;
10566 -+}
10567 -diff --git a/block/bfq.h b/block/bfq.h
10568 -new file mode 100644
10569 -index 0000000..3ca8482
10570 ---- /dev/null
10571 -+++ b/block/bfq.h
10572 -@@ -0,0 +1,622 @@
10573 -+/*
10574 -+ * BFQ-v7r2 for 3.14.0: data structures and common functions prototypes.
10575 -+ *
10576 -+ * Based on ideas and code from CFQ:
10577 -+ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk>
10578 -+ *
10579 -+ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it>
10580 -+ * Paolo Valente <paolo.valente@×××××××.it>
10581 -+ *
10582 -+ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it>
10583 -+ */
10584 -+
10585 -+#ifndef _BFQ_H
10586 -+#define _BFQ_H
10587 -+
10588 -+#include <linux/blktrace_api.h>
10589 -+#include <linux/hrtimer.h>
10590 -+#include <linux/ioprio.h>
10591 -+#include <linux/rbtree.h>
10592 -+
10593 -+#define BFQ_IOPRIO_CLASSES 3
10594 -+#define BFQ_CL_IDLE_TIMEOUT (HZ/5)
10595 -+
10596 -+#define BFQ_MIN_WEIGHT 1
10597 -+#define BFQ_MAX_WEIGHT 1000
10598 -+
10599 -+#define BFQ_DEFAULT_GRP_WEIGHT 10
10600 -+#define BFQ_DEFAULT_GRP_IOPRIO 0
10601 -+#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE
10602 -+
10603 -+struct bfq_entity;
10604 -+
10605 -+/**
10606 -+ * struct bfq_service_tree - per ioprio_class service tree.
10607 -+ * @active: tree for active entities (i.e., those backlogged).
10608 -+ * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i).
10609 -+ * @first_idle: idle entity with minimum F_i.
10610 -+ * @last_idle: idle entity with maximum F_i.
10611 -+ * @vtime: scheduler virtual time.
10612 -+ * @wsum: scheduler weight sum; active and idle entities contribute to it.
10613 -+ *
10614 -+ * Each service tree represents a B-WF2Q+ scheduler on its own. Each
10615 -+ * ioprio_class has its own independent scheduler, and so its own
10616 -+ * bfq_service_tree. All the fields are protected by the queue lock
10617 -+ * of the containing bfqd.
10618 -+ */
10619 -+struct bfq_service_tree {
10620 -+ struct rb_root active;
10621 -+ struct rb_root idle;
10622 -+
10623 -+ struct bfq_entity *first_idle;
10624 -+ struct bfq_entity *last_idle;
10625 -+
10626 -+ u64 vtime;
10627 -+ unsigned long wsum;
10628 -+};
10629 -+
10630 -+/**
10631 -+ * struct bfq_sched_data - multi-class scheduler.
10632 -+ * @in_service_entity: entity under service.
10633 -+ * @next_in_service: head-of-the-line entity in the scheduler.
10634 -+ * @service_tree: array of service trees, one per ioprio_class.
10635 -+ *
10636 -+ * bfq_sched_data is the basic scheduler queue. It supports three
10637 -+ * ioprio_classes, and can be used either as a toplevel queue or as
10638 -+ * an intermediate queue on a hierarchical setup.
10639 -+ * @next_in_service points to the active entity of the sched_data
10640 -+ * service trees that will be scheduled next.
10641 -+ *
10642 -+ * The supported ioprio_classes are the same as in CFQ, in descending
10643 -+ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
10644 -+ * Requests from higher priority queues are served before all the
10645 -+ * requests from lower priority queues; among requests of the same
10646 -+ * queue requests are served according to B-WF2Q+.
10647 -+ * All the fields are protected by the queue lock of the containing bfqd.
10648 -+ */
10649 -+struct bfq_sched_data {
10650 -+ struct bfq_entity *in_service_entity;
10651 -+ struct bfq_entity *next_in_service;
10652 -+ struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
10653 -+};
10654 -+
10655 -+/**
10656 -+ * struct bfq_entity - schedulable entity.
10657 -+ * @rb_node: service_tree member.
10658 -+ * @on_st: flag, true if the entity is on a tree (either the active or
10659 -+ * the idle one of its service_tree).
10660 -+ * @finish: B-WF2Q+ finish timestamp (aka F_i).
10661 -+ * @start: B-WF2Q+ start timestamp (aka S_i).
10662 -+ * @tree: tree the entity is enqueued into; %NULL if not on a tree.
10663 -+ * @min_start: minimum start time of the (active) subtree rooted at
10664 -+ * this entity; used for O(log N) lookups into active trees.
10665 -+ * @service: service received during the last round of service.
10666 -+ * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight.
10667 -+ * @weight: weight of the queue
10668 -+ * @parent: parent entity, for hierarchical scheduling.
10669 -+ * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the
10670 -+ * associated scheduler queue, %NULL on leaf nodes.
10671 -+ * @sched_data: the scheduler queue this entity belongs to.
10672 -+ * @ioprio: the ioprio in use.
10673 -+ * @new_weight: when a weight change is requested, the new weight value.
10674 -+ * @orig_weight: original weight, used to implement weight boosting
10675 -+ * @new_ioprio: when an ioprio change is requested, the new ioprio value.
10676 -+ * @ioprio_class: the ioprio_class in use.
10677 -+ * @new_ioprio_class: when an ioprio_class change is requested, the new
10678 -+ * ioprio_class value.
10679 -+ * @ioprio_changed: flag, true when the user requested a weight, ioprio or
10680 -+ * ioprio_class change.
10681 -+ *
10682 -+ * A bfq_entity is used to represent either a bfq_queue (leaf node in the
10683 -+ * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each
10684 -+ * entity belongs to the sched_data of the parent group in the cgroup
10685 -+ * hierarchy. Non-leaf entities have also their own sched_data, stored
10686 -+ * in @my_sched_data.
10687 -+ *
10688 -+ * Each entity stores independently its priority values; this would
10689 -+ * allow different weights on different devices, but this
10690 -+ * functionality is not exported to userspace by now. Priorities and
10691 -+ * weights are updated lazily, first storing the new values into the
10692 -+ * new_* fields, then setting the @ioprio_changed flag. As soon as
10693 -+ * there is a transition in the entity state that allows the priority
10694 -+ * update to take place the effective and the requested priority
10695 -+ * values are synchronized.
10696 -+ *
10697 -+ * Unless cgroups are used, the weight value is calculated from the
10698 -+ * ioprio to export the same interface as CFQ. When dealing with
10699 -+ * ``well-behaved'' queues (i.e., queues that do not spend too much
10700 -+ * time to consume their budget and have true sequential behavior, and
10701 -+ * when there are no external factors breaking anticipation) the
10702 -+ * relative weights at each level of the cgroups hierarchy should be
10703 -+ * guaranteed. All the fields are protected by the queue lock of the
10704 -+ * containing bfqd.
10705 -+ */
10706 -+struct bfq_entity {
10707 -+ struct rb_node rb_node;
10708 -+
10709 -+ int on_st;
10710 -+
10711 -+ u64 finish;
10712 -+ u64 start;
10713 -+
10714 -+ struct rb_root *tree;
10715 -+
10716 -+ u64 min_start;
10717 -+
10718 -+ unsigned long service, budget;
10719 -+ unsigned short weight, new_weight;
10720 -+ unsigned short orig_weight;
10721 -+
10722 -+ struct bfq_entity *parent;
10723 -+
10724 -+ struct bfq_sched_data *my_sched_data;
10725 -+ struct bfq_sched_data *sched_data;
10726 -+
10727 -+ unsigned short ioprio, new_ioprio;
10728 -+ unsigned short ioprio_class, new_ioprio_class;
10729 -+
10730 -+ int ioprio_changed;
10731 -+};
10732 -+
10733 -+struct bfq_group;
10734 -+
10735 -+/**
10736 -+ * struct bfq_queue - leaf schedulable entity.
10737 -+ * @ref: reference counter.
10738 -+ * @bfqd: parent bfq_data.
10739 -+ * @new_bfqq: shared bfq_queue if queue is cooperating with
10740 -+ * one or more other queues.
10741 -+ * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree).
10742 -+ * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree).
10743 -+ * @sort_list: sorted list of pending requests.
10744 -+ * @next_rq: if fifo isn't expired, next request to serve.
10745 -+ * @queued: nr of requests queued in @sort_list.
10746 -+ * @allocated: currently allocated requests.
10747 -+ * @meta_pending: pending metadata requests.
10748 -+ * @fifo: fifo list of requests in sort_list.
10749 -+ * @entity: entity representing this queue in the scheduler.
10750 -+ * @max_budget: maximum budget allowed from the feedback mechanism.
10751 -+ * @budget_timeout: budget expiration (in jiffies).
10752 -+ * @dispatched: number of requests on the dispatch list or inside driver.
10753 -+ * @org_ioprio: saved ioprio during boosted periods.
10754 -+ * @flags: status flags.
10755 -+ * @bfqq_list: node for active/idle bfqq list inside our bfqd.
10756 -+ * @seek_samples: number of seeks sampled
10757 -+ * @seek_total: sum of the distances of the seeks sampled
10758 -+ * @seek_mean: mean seek distance
10759 -+ * @last_request_pos: position of the last request enqueued
10760 -+ * @pid: pid of the process owning the queue, used for logging purposes.
10761 -+ * @last_rais_start_finish: start time of the current weight-raising period if
10762 -+ * the @bfq-queue is being weight-raised, otherwise
10763 -+ * finish time of the last weight-raising period
10764 -+ * @raising_cur_max_time: current max raising time for this queue
10765 -+ * @soft_rt_next_start: minimum time instant such that, only if a new request
10766 -+ * is enqueued after this time instant in an idle
10767 -+ * @bfq_queue with no outstanding requests, then the
10768 -+ * task associated with the queue it is deemed as soft
10769 -+ * real-time (see the comments to the function
10770 -+ * bfq_bfqq_softrt_next_start())
10771 -+ * @last_idle_bklogged: time of the last transition of the @bfq_queue from
10772 -+ * idle to backlogged
10773 -+ * @service_from_backlogged: cumulative service received from the @bfq_queue
10774 -+ * since the last transition from idle to backlogged
10775 -+ *
10776 -+ * A bfq_queue is a leaf request queue; it can be associated with an io_context
10777 -+ * or more, if it is async or shared between cooperating processes. @cgroup
10778 -+ * holds a reference to the cgroup, to be sure that it does not disappear while
10779 -+ * a bfqq still references it (mostly to avoid races between request issuing and
10780 -+ * task migration followed by cgroup destruction).
10781 -+ * All the fields are protected by the queue lock of the containing bfqd.
10782 -+ */
10783 -+struct bfq_queue {
10784 -+ atomic_t ref;
10785 -+ struct bfq_data *bfqd;
10786 -+
10787 -+ /* fields for cooperating queues handling */
10788 -+ struct bfq_queue *new_bfqq;
10789 -+ struct rb_node pos_node;
10790 -+ struct rb_root *pos_root;
10791 -+
10792 -+ struct rb_root sort_list;
10793 -+ struct request *next_rq;
10794 -+ int queued[2];
10795 -+ int allocated[2];
10796 -+ int meta_pending;
10797 -+ struct list_head fifo;
10798 -+
10799 -+ struct bfq_entity entity;
10800 -+
10801 -+ unsigned long max_budget;
10802 -+ unsigned long budget_timeout;
10803 -+
10804 -+ int dispatched;
10805 -+
10806 -+ unsigned short org_ioprio;
10807 -+
10808 -+ unsigned int flags;
10809 -+
10810 -+ struct list_head bfqq_list;
10811 -+
10812 -+ unsigned int seek_samples;
10813 -+ u64 seek_total;
10814 -+ sector_t seek_mean;
10815 -+ sector_t last_request_pos;
10816 -+
10817 -+ pid_t pid;
10818 -+
10819 -+ /* weight-raising fields */
10820 -+ unsigned long raising_cur_max_time;
10821 -+ unsigned long soft_rt_next_start;
10822 -+ unsigned long last_rais_start_finish;
10823 -+ unsigned int raising_coeff;
10824 -+ unsigned long last_idle_bklogged;
10825 -+ unsigned long service_from_backlogged;
10826 -+};
10827 -+
10828 -+/**
10829 -+ * struct bfq_ttime - per process thinktime stats.
10830 -+ * @ttime_total: total process thinktime
10831 -+ * @ttime_samples: number of thinktime samples
10832 -+ * @ttime_mean: average process thinktime
10833 -+ */
10834 -+struct bfq_ttime {
10835 -+ unsigned long last_end_request;
10836 -+
10837 -+ unsigned long ttime_total;
10838 -+ unsigned long ttime_samples;
10839 -+ unsigned long ttime_mean;
10840 -+};
10841 -+
10842 -+/**
10843 -+ * struct bfq_io_cq - per (request_queue, io_context) structure.
10844 -+ * @icq: associated io_cq structure
10845 -+ * @bfqq: array of two process queues, the sync and the async
10846 -+ * @ttime: associated @bfq_ttime struct
10847 -+ */
10848 -+struct bfq_io_cq {
10849 -+ struct io_cq icq; /* must be the first member */
10850 -+ struct bfq_queue *bfqq[2];
10851 -+ struct bfq_ttime ttime;
10852 -+ int ioprio;
10853 -+};
10854 -+
10855 -+/**
10856 -+ * struct bfq_data - per device data structure.
10857 -+ * @queue: request queue for the managed device.
10858 -+ * @root_group: root bfq_group for the device.
10859 -+ * @rq_pos_tree: rbtree sorted by next_request position,
10860 -+ * used when determining if two or more queues
10861 -+ * have interleaving requests (see bfq_close_cooperator).
10862 -+ * @busy_queues: number of bfq_queues containing requests (including the
10863 -+ * queue under service, even if it is idling).
10864 -+ * @raised_busy_queues: number of weight-raised busy bfq_queues.
10865 -+ * @queued: number of queued requests.
10866 -+ * @rq_in_driver: number of requests dispatched and waiting for completion.
10867 -+ * @sync_flight: number of sync requests in the driver.
10868 -+ * @max_rq_in_driver: max number of reqs in driver in the last @hw_tag_samples
10869 -+ * completed requests .
10870 -+ * @hw_tag_samples: nr of samples used to calculate hw_tag.
10871 -+ * @hw_tag: flag set to one if the driver is showing a queueing behavior.
10872 -+ * @budgets_assigned: number of budgets assigned.
10873 -+ * @idle_slice_timer: timer set when idling for the next sequential request
10874 -+ * from the queue under service.
10875 -+ * @unplug_work: delayed work to restart dispatching on the request queue.
10876 -+ * @in_service_queue: bfq_queue under service.
10877 -+ * @in_service_bic: bfq_io_cq (bic) associated with the @in_service_queue.
10878 -+ * @last_position: on-disk position of the last served request.
10879 -+ * @last_budget_start: beginning of the last budget.
10880 -+ * @last_idling_start: beginning of the last idle slice.
10881 -+ * @peak_rate: peak transfer rate observed for a budget.
10882 -+ * @peak_rate_samples: number of samples used to calculate @peak_rate.
10883 -+ * @bfq_max_budget: maximum budget allotted to a bfq_queue before rescheduling.
10884 -+ * @group_list: list of all the bfq_groups active on the device.
10885 -+ * @active_list: list of all the bfq_queues active on the device.
10886 -+ * @idle_list: list of all the bfq_queues idle on the device.
10887 -+ * @bfq_quantum: max number of requests dispatched per dispatch round.
10888 -+ * @bfq_fifo_expire: timeout for async/sync requests; when it expires
10889 -+ * requests are served in fifo order.
10890 -+ * @bfq_back_penalty: weight of backward seeks wrt forward ones.
10891 -+ * @bfq_back_max: maximum allowed backward seek.
10892 -+ * @bfq_slice_idle: maximum idling time.
10893 -+ * @bfq_user_max_budget: user-configured max budget value (0 for auto-tuning).
10894 -+ * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to
10895 -+ * async queues.
10896 -+ * @bfq_timeout: timeout for bfq_queues to consume their budget; used to
10897 -+ * to prevent seeky queues to impose long latencies to well
10898 -+ * behaved ones (this also implies that seeky queues cannot
10899 -+ * receive guarantees in the service domain; after a timeout
10900 -+ * they are charged for the whole allocated budget, to try
10901 -+ * to preserve a behavior reasonably fair among them, but
10902 -+ * without service-domain guarantees).
10903 -+ * @bfq_raising_coeff: Maximum factor by which the weight of a boosted
10904 -+ * queue is multiplied
10905 -+ * @bfq_raising_max_time: maximum duration of a weight-raising period (jiffies)
10906 -+ * @bfq_raising_rt_max_time: maximum duration for soft real-time processes
10907 -+ * @bfq_raising_min_idle_time: minimum idle period after which weight-raising
10908 -+ * may be reactivated for a queue (in jiffies)
10909 -+ * @bfq_raising_min_inter_arr_async: minimum period between request arrivals
10910 -+ * after which weight-raising may be
10911 -+ * reactivated for an already busy queue
10912 -+ * (in jiffies)
10913 -+ * @bfq_raising_max_softrt_rate: max service-rate for a soft real-time queue,
10914 -+ * sectors per seconds
10915 -+ * @RT_prod: cached value of the product R*T used for computing the maximum
10916 -+ * duration of the weight raising automatically
10917 -+ * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions
10918 -+ *
10919 -+ * All the fields are protected by the @queue lock.
10920 -+ */
10921 -+struct bfq_data {
10922 -+ struct request_queue *queue;
10923 -+
10924 -+ struct bfq_group *root_group;
10925 -+
10926 -+ struct rb_root rq_pos_tree;
10927 -+
10928 -+ int busy_queues;
10929 -+ int raised_busy_queues;
10930 -+ int queued;
10931 -+ int rq_in_driver;
10932 -+ int sync_flight;
10933 -+
10934 -+ int max_rq_in_driver;
10935 -+ int hw_tag_samples;
10936 -+ int hw_tag;
10937 -+
10938 -+ int budgets_assigned;
10939 -+
10940 -+ struct timer_list idle_slice_timer;
10941 -+ struct work_struct unplug_work;
10942 -+
10943 -+ struct bfq_queue *in_service_queue;
10944 -+ struct bfq_io_cq *in_service_bic;
10945 -+
10946 -+ sector_t last_position;
10947 -+
10948 -+ ktime_t last_budget_start;
10949 -+ ktime_t last_idling_start;
10950 -+ int peak_rate_samples;
10951 -+ u64 peak_rate;
10952 -+ unsigned long bfq_max_budget;
10953 -+
10954 -+ struct hlist_head group_list;
10955 -+ struct list_head active_list;
10956 -+ struct list_head idle_list;
10957 -+
10958 -+ unsigned int bfq_quantum;
10959 -+ unsigned int bfq_fifo_expire[2];
10960 -+ unsigned int bfq_back_penalty;
10961 -+ unsigned int bfq_back_max;
10962 -+ unsigned int bfq_slice_idle;
10963 -+ u64 bfq_class_idle_last_service;
10964 -+
10965 -+ unsigned int bfq_user_max_budget;
10966 -+ unsigned int bfq_max_budget_async_rq;
10967 -+ unsigned int bfq_timeout[2];
10968 -+
10969 -+ bool low_latency;
10970 -+
10971 -+ /* parameters of the low_latency heuristics */
10972 -+ unsigned int bfq_raising_coeff;
10973 -+ unsigned int bfq_raising_max_time;
10974 -+ unsigned int bfq_raising_rt_max_time;
10975 -+ unsigned int bfq_raising_min_idle_time;
10976 -+ unsigned long bfq_raising_min_inter_arr_async;
10977 -+ unsigned int bfq_raising_max_softrt_rate;
10978 -+ u64 RT_prod;
10979 -+
10980 -+ struct bfq_queue oom_bfqq;
10981 -+};
10982 -+
10983 -+enum bfqq_state_flags {
10984 -+ BFQ_BFQQ_FLAG_busy = 0, /* has requests or is under service */
10985 -+ BFQ_BFQQ_FLAG_wait_request, /* waiting for a request */
10986 -+ BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */
10987 -+ BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */
10988 -+ BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */
10989 -+ BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */
10990 -+ BFQ_BFQQ_FLAG_sync, /* synchronous queue */
10991 -+ BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */
10992 -+ BFQ_BFQQ_FLAG_coop, /* bfqq is shared */
10993 -+ BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */
10994 -+ BFQ_BFQQ_FLAG_softrt_update, /* needs softrt-next-start update */
10995 -+};
10996 -+
10997 -+#define BFQ_BFQQ_FNS(name) \
10998 -+static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \
10999 -+{ \
11000 -+ (bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name); \
11001 -+} \
11002 -+static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq) \
11003 -+{ \
11004 -+ (bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name); \
11005 -+} \
11006 -+static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq) \
11007 -+{ \
11008 -+ return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \
11009 -+}
11010 -+
11011 -+BFQ_BFQQ_FNS(busy);
11012 -+BFQ_BFQQ_FNS(wait_request);
11013 -+BFQ_BFQQ_FNS(must_alloc);
11014 -+BFQ_BFQQ_FNS(fifo_expire);
11015 -+BFQ_BFQQ_FNS(idle_window);
11016 -+BFQ_BFQQ_FNS(prio_changed);
11017 -+BFQ_BFQQ_FNS(sync);
11018 -+BFQ_BFQQ_FNS(budget_new);
11019 -+BFQ_BFQQ_FNS(coop);
11020 -+BFQ_BFQQ_FNS(split_coop);
11021 -+BFQ_BFQQ_FNS(softrt_update);
11022 -+#undef BFQ_BFQQ_FNS
11023 -+
11024 -+/* Logging facilities. */
11025 -+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \
11026 -+ blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args)
11027 -+
11028 -+#define bfq_log(bfqd, fmt, args...) \
11029 -+ blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
11030 -+
11031 -+/* Expiration reasons. */
11032 -+enum bfqq_expiration {
11033 -+ BFQ_BFQQ_TOO_IDLE = 0, /* queue has been idling for too long */
11034 -+ BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */
11035 -+ BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */
11036 -+ BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */
11037 -+};
11038 -+
11039 -+#ifdef CONFIG_CGROUP_BFQIO
11040 -+/**
11041 -+ * struct bfq_group - per (device, cgroup) data structure.
11042 -+ * @entity: schedulable entity to insert into the parent group sched_data.
11043 -+ * @sched_data: own sched_data, to contain child entities (they may be
11044 -+ * both bfq_queues and bfq_groups).
11045 -+ * @group_node: node to be inserted into the bfqio_cgroup->group_data
11046 -+ * list of the containing cgroup's bfqio_cgroup.
11047 -+ * @bfqd_node: node to be inserted into the @bfqd->group_list list
11048 -+ * of the groups active on the same device; used for cleanup.
11049 -+ * @bfqd: the bfq_data for the device this group acts upon.
11050 -+ * @async_bfqq: array of async queues for all the tasks belonging to
11051 -+ * the group, one queue per ioprio value per ioprio_class,
11052 -+ * except for the idle class that has only one queue.
11053 -+ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored).
11054 -+ * @my_entity: pointer to @entity, %NULL for the toplevel group; used
11055 -+ * to avoid too many special cases during group creation/migration.
11056 -+ *
11057 -+ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
11058 -+ * there is a set of bfq_groups, each one collecting the lower-level
11059 -+ * entities belonging to the group that are acting on the same device.
11060 -+ *
11061 -+ * Locking works as follows:
11062 -+ * o @group_node is protected by the bfqio_cgroup lock, and is accessed
11063 -+ * via RCU from its readers.
11064 -+ * o @bfqd is protected by the queue lock, RCU is used to access it
11065 -+ * from the readers.
11066 -+ * o All the other fields are protected by the @bfqd queue lock.
11067 -+ */
11068 -+struct bfq_group {
11069 -+ struct bfq_entity entity;
11070 -+ struct bfq_sched_data sched_data;
11071 -+
11072 -+ struct hlist_node group_node;
11073 -+ struct hlist_node bfqd_node;
11074 -+
11075 -+ void *bfqd;
11076 -+
11077 -+ struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
11078 -+ struct bfq_queue *async_idle_bfqq;
11079 -+
11080 -+ struct bfq_entity *my_entity;
11081 -+};
11082 -+
11083 -+/**
11084 -+ * struct bfqio_cgroup - bfq cgroup data structure.
11085 -+ * @css: subsystem state for bfq in the containing cgroup.
11086 -+ * @online: flag marked when the subsystem is inserted.
11087 -+ * @weight: cgroup weight.
11088 -+ * @ioprio: cgroup ioprio.
11089 -+ * @ioprio_class: cgroup ioprio_class.
11090 -+ * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data.
11091 -+ * @group_data: list containing the bfq_group belonging to this cgroup.
11092 -+ *
11093 -+ * @group_data is accessed using RCU, with @lock protecting the updates,
11094 -+ * @ioprio and @ioprio_class are protected by @lock.
11095 -+ */
11096 -+struct bfqio_cgroup {
11097 -+ struct cgroup_subsys_state css;
11098 -+ bool online;
11099 -+
11100 -+ unsigned short weight, ioprio, ioprio_class;
11101 -+
11102 -+ spinlock_t lock;
11103 -+ struct hlist_head group_data;
11104 -+};
11105 -+#else
11106 -+struct bfq_group {
11107 -+ struct bfq_sched_data sched_data;
11108 -+
11109 -+ struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
11110 -+ struct bfq_queue *async_idle_bfqq;
11111 -+};
11112 -+#endif
11113 -+
11114 -+static inline struct bfq_service_tree *
11115 -+bfq_entity_service_tree(struct bfq_entity *entity)
11116 -+{
11117 -+ struct bfq_sched_data *sched_data = entity->sched_data;
11118 -+ unsigned int idx = entity->ioprio_class - 1;
11119 -+
11120 -+ BUG_ON(idx >= BFQ_IOPRIO_CLASSES);
11121 -+ BUG_ON(sched_data == NULL);
11122 -+
11123 -+ return sched_data->service_tree + idx;
11124 -+}
11125 -+
11126 -+static inline struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic,
11127 -+ int is_sync)
11128 -+{
11129 -+ return bic->bfqq[!!is_sync];
11130 -+}
11131 -+
11132 -+static inline void bic_set_bfqq(struct bfq_io_cq *bic,
11133 -+ struct bfq_queue *bfqq, int is_sync)
11134 -+{
11135 -+ bic->bfqq[!!is_sync] = bfqq;
11136 -+}
11137 -+
11138 -+static inline struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
11139 -+{
11140 -+ return bic->icq.q->elevator->elevator_data;
11141 -+}
11142 -+
11143 -+/**
11144 -+ * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer.
11145 -+ * @ptr: a pointer to a bfqd.
11146 -+ * @flags: storage for the flags to be saved.
11147 -+ *
11148 -+ * This function allows bfqg->bfqd to be protected by the
11149 -+ * queue lock of the bfqd they reference; the pointer is dereferenced
11150 -+ * under RCU, so the storage for bfqd is assured to be safe as long
11151 -+ * as the RCU read side critical section does not end. After the
11152 -+ * bfqd->queue->queue_lock is taken the pointer is rechecked, to be
11153 -+ * sure that no other writer accessed it. If we raced with a writer,
11154 -+ * the function returns NULL, with the queue unlocked, otherwise it
11155 -+ * returns the dereferenced pointer, with the queue locked.
11156 -+ */
11157 -+static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr,
11158 -+ unsigned long *flags)
11159 -+{
11160 -+ struct bfq_data *bfqd;
11161 -+
11162 -+ rcu_read_lock();
11163 -+ bfqd = rcu_dereference(*(struct bfq_data **)ptr);
11164 -+
11165 -+ if (bfqd != NULL) {
11166 -+ spin_lock_irqsave(bfqd->queue->queue_lock, *flags);
11167 -+ if (*ptr == bfqd)
11168 -+ goto out;
11169 -+ spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
11170 -+ }
11171 -+
11172 -+ bfqd = NULL;
11173 -+out:
11174 -+ rcu_read_unlock();
11175 -+ return bfqd;
11176 -+}
11177 -+
11178 -+static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd,
11179 -+ unsigned long *flags)
11180 -+{
11181 -+ spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
11182 -+}
11183 -+
11184 -+static void bfq_changed_ioprio(struct bfq_io_cq *bic);
11185 -+static void bfq_put_queue(struct bfq_queue *bfqq);
11186 -+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);
11187 -+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
11188 -+ struct bfq_group *bfqg, int is_sync,
11189 -+ struct bfq_io_cq *bic, gfp_t gfp_mask);
11190 -+static void bfq_end_raising_async_queues(struct bfq_data *bfqd,
11191 -+ struct bfq_group *bfqg);
11192 -+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
11193 -+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
11194 -+#endif
11195 ---
11196 -1.9.0
11197 -
11198
11199 Deleted: genpatches-2.6/trunk/3.15/5003_BFQ-3-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r2-for-3.14.0.patch
11200 ===================================================================
11201 --- genpatches-2.6/trunk/3.15/5003_BFQ-3-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r2-for-3.14.0.patch 2014-05-05 14:22:27 UTC (rev 2771)
11202 +++ genpatches-2.6/trunk/3.15/5003_BFQ-3-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r2-for-3.14.0.patch 2014-05-05 14:28:48 UTC (rev 2772)
11203 @@ -1,1038 +0,0 @@
11204 -From 4fbeda28a90d7fccd05d28a89d9fc409b2344e0a Mon Sep 17 00:00:00 2001
11205 -From: Mauro Andreolini <mauro.andreolini@×××××××.it>
11206 -Date: Fri, 14 Feb 2014 12:52:49 +0100
11207 -Subject: [PATCH 3/3] block, bfq: add Early Queue Merge (EQM) to BFQ-v7r2 for
11208 - 3.14.0
11209 -
11210 -A set of processes may happen to perform interleaved reads, i.e., requests
11211 -whose union would give rise to a sequential read pattern. There are two
11212 -typical cases: in the first case, processes read fixed-size chunks of
11213 -data at a fixed distance from each other, while in the second case processes
11214 -may read variable-size chunks at variable distances. The latter case occurs
11215 -for example with KVM, which splits the I/O generated by the guest into
11216 -multiple chunks, and lets these chunks be served by a pool of cooperating
11217 -processes, iteratively assigning the next chunk of I/O to the first
11218 -available process. CFQ uses actual queue merging for the first type of
11219 -rocesses, whereas it uses preemption to get a sequential read pattern out
11220 -of the read requests performed by the second type of processes. In the end
11221 -it uses two different mechanisms to achieve the same goal: boosting the
11222 -throughput with interleaved I/O.
11223 -
11224 -This patch introduces Early Queue Merge (EQM), a unified mechanism to get a
11225 -sequential read pattern with both types of processes. The main idea is
11226 -checking newly arrived requests against the next request of the active queue
11227 -both in case of actual request insert and in case of request merge. By doing
11228 -so, both the types of processes can be handled by just merging their queues.
11229 -EQM is then simpler and more compact than the pair of mechanisms used in
11230 -CFQ.
11231 -
11232 -Finally, EQM also preserves the typical low-latency properties of BFQ, by
11233 -properly restoring the weight-raising state of a queue when it gets back to
11234 -a non-merged state.
11235 -
11236 -Signed-off-by: Mauro Andreolini <mauro.andreolini@×××××××.it>
11237 -Signed-off-by: Arianna Avanzini <avanzini.arianna@×××××.com>
11238 -Reviewed-by: Paolo Valente <paolo.valente@×××××××.it>
11239 ----
11240 - block/bfq-iosched.c | 657 ++++++++++++++++++++++++++++++++++++----------------
11241 - block/bfq-sched.c | 28 ---
11242 - block/bfq.h | 20 +-
11243 - 3 files changed, 476 insertions(+), 229 deletions(-)
11244 -
11245 -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
11246 -index f5f71e4..0d3503d 100644
11247 ---- a/block/bfq-iosched.c
11248 -+++ b/block/bfq-iosched.c
11249 -@@ -445,6 +445,46 @@ static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd)
11250 - return dur;
11251 - }
11252 -
11253 -+static inline void
11254 -+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
11255 -+{
11256 -+ if (bic->saved_idle_window)
11257 -+ bfq_mark_bfqq_idle_window(bfqq);
11258 -+ else
11259 -+ bfq_clear_bfqq_idle_window(bfqq);
11260 -+ if (bic->raising_time_left && bfqq->bfqd->low_latency) {
11261 -+ /*
11262 -+ * Start a weight raising period with the duration given by
11263 -+ * the raising_time_left snapshot.
11264 -+ */
11265 -+ if (bfq_bfqq_busy(bfqq))
11266 -+ bfqq->bfqd->raised_busy_queues++;
11267 -+ bfqq->raising_coeff = bfqq->bfqd->bfq_raising_coeff;
11268 -+ bfqq->raising_cur_max_time = bic->raising_time_left;
11269 -+ bfqq->last_rais_start_finish = jiffies;
11270 -+ bfqq->entity.ioprio_changed = 1;
11271 -+ }
11272 -+ /*
11273 -+ * Clear raising_time_left to prevent bfq_bfqq_save_state() from
11274 -+ * getting confused about the queue's need of a weight-raising
11275 -+ * period.
11276 -+ */
11277 -+ bic->raising_time_left = 0;
11278 -+}
11279 -+
11280 -+/*
11281 -+ * Must be called with the queue_lock held.
11282 -+ */
11283 -+static int bfqq_process_refs(struct bfq_queue *bfqq)
11284 -+{
11285 -+ int process_refs, io_refs;
11286 -+
11287 -+ io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
11288 -+ process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;
11289 -+ BUG_ON(process_refs < 0);
11290 -+ return process_refs;
11291 -+}
11292 -+
11293 - static void bfq_add_rq_rb(struct request *rq)
11294 - {
11295 - struct bfq_queue *bfqq = RQ_BFQQ(rq);
11296 -@@ -486,12 +526,20 @@ static void bfq_add_rq_rb(struct request *rq)
11297 - if (!bfqd->low_latency)
11298 - goto add_bfqq_busy;
11299 -
11300 -+ if (bfq_bfqq_just_split(bfqq))
11301 -+ goto set_ioprio_changed;
11302 -+
11303 - /*
11304 -- * If the queue is not being boosted and has been idle
11305 -- * for enough time, start a weight-raising period
11306 -+ * If the queue:
11307 -+ * - is not being boosted,
11308 -+ * - has been idle for enough time,
11309 -+ * - is not a sync queue or is linked to a bfq_io_cq (it is
11310 -+ * shared "for its nature" or it is not shared and its
11311 -+ * requests have not been redirected to a shared queue)
11312 -+ * start a weight-raising period.
11313 - */
11314 -- if (old_raising_coeff == 1 &&
11315 -- (idle_for_long_time || soft_rt)) {
11316 -+ if (old_raising_coeff == 1 && (idle_for_long_time || soft_rt) &&
11317 -+ (!bfq_bfqq_sync(bfqq) || bfqq->bic != NULL)) {
11318 - bfqq->raising_coeff = bfqd->bfq_raising_coeff;
11319 - if (idle_for_long_time)
11320 - bfqq->raising_cur_max_time =
11321 -@@ -574,6 +622,7 @@ static void bfq_add_rq_rb(struct request *rq)
11322 - bfqd->bfq_raising_rt_max_time;
11323 - }
11324 - }
11325 -+set_ioprio_changed:
11326 - if (old_raising_coeff != bfqq->raising_coeff)
11327 - entity->ioprio_changed = 1;
11328 - add_bfqq_busy:
11329 -@@ -756,90 +805,35 @@ static void bfq_end_raising(struct bfq_data *bfqd)
11330 - spin_unlock_irq(bfqd->queue->queue_lock);
11331 - }
11332 -
11333 --static int bfq_allow_merge(struct request_queue *q, struct request *rq,
11334 -- struct bio *bio)
11335 --{
11336 -- struct bfq_data *bfqd = q->elevator->elevator_data;
11337 -- struct bfq_io_cq *bic;
11338 -- struct bfq_queue *bfqq;
11339 --
11340 -- /*
11341 -- * Disallow merge of a sync bio into an async request.
11342 -- */
11343 -- if (bfq_bio_sync(bio) && !rq_is_sync(rq))
11344 -- return 0;
11345 --
11346 -- /*
11347 -- * Lookup the bfqq that this bio will be queued with. Allow
11348 -- * merge only if rq is queued there.
11349 -- * Queue lock is held here.
11350 -- */
11351 -- bic = bfq_bic_lookup(bfqd, current->io_context);
11352 -- if (bic == NULL)
11353 -- return 0;
11354 --
11355 -- bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
11356 -- return bfqq == RQ_BFQQ(rq);
11357 --}
11358 --
11359 --static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
11360 -- struct bfq_queue *bfqq)
11361 --{
11362 -- if (bfqq != NULL) {
11363 -- bfq_mark_bfqq_must_alloc(bfqq);
11364 -- bfq_mark_bfqq_budget_new(bfqq);
11365 -- bfq_clear_bfqq_fifo_expire(bfqq);
11366 --
11367 -- bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
11368 --
11369 -- bfq_log_bfqq(bfqd, bfqq,
11370 -- "set_in_service_queue, cur-budget = %lu",
11371 -- bfqq->entity.budget);
11372 -- }
11373 --
11374 -- bfqd->in_service_queue = bfqq;
11375 --}
11376 --
11377 --/*
11378 -- * Get and set a new queue for service.
11379 -- */
11380 --static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd,
11381 -- struct bfq_queue *bfqq)
11382 -+static inline sector_t bfq_io_struct_pos(void *io_struct, bool request)
11383 - {
11384 -- if (!bfqq)
11385 -- bfqq = bfq_get_next_queue(bfqd);
11386 -+ if (request)
11387 -+ return blk_rq_pos(io_struct);
11388 - else
11389 -- bfq_get_next_queue_forced(bfqd, bfqq);
11390 --
11391 -- __bfq_set_in_service_queue(bfqd, bfqq);
11392 -- return bfqq;
11393 -+ return ((struct bio *)io_struct)->bi_iter.bi_sector;
11394 - }
11395 -
11396 --static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd,
11397 -- struct request *rq)
11398 -+static inline sector_t bfq_dist_from(sector_t pos1,
11399 -+ sector_t pos2)
11400 - {
11401 -- if (blk_rq_pos(rq) >= bfqd->last_position)
11402 -- return blk_rq_pos(rq) - bfqd->last_position;
11403 -+ if (pos1 >= pos2)
11404 -+ return pos1 - pos2;
11405 - else
11406 -- return bfqd->last_position - blk_rq_pos(rq);
11407 -+ return pos2 - pos1;
11408 - }
11409 -
11410 --/*
11411 -- * Return true if bfqq has no request pending and rq is close enough to
11412 -- * bfqd->last_position, or if rq is closer to bfqd->last_position than
11413 -- * bfqq->next_rq
11414 -- */
11415 --static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq)
11416 -+static inline int bfq_rq_close_to_sector(void *io_struct, bool request,
11417 -+ sector_t sector)
11418 - {
11419 -- return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR;
11420 -+ return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <=
11421 -+ BFQQ_SEEK_THR;
11422 - }
11423 -
11424 --static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)
11425 -+static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector)
11426 - {
11427 - struct rb_root *root = &bfqd->rq_pos_tree;
11428 - struct rb_node *parent, *node;
11429 - struct bfq_queue *__bfqq;
11430 -- sector_t sector = bfqd->last_position;
11431 -
11432 - if (RB_EMPTY_ROOT(root))
11433 - return NULL;
11434 -@@ -858,7 +852,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)
11435 - * position).
11436 - */
11437 - __bfqq = rb_entry(parent, struct bfq_queue, pos_node);
11438 -- if (bfq_rq_close(bfqd, __bfqq->next_rq))
11439 -+ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
11440 - return __bfqq;
11441 -
11442 - if (blk_rq_pos(__bfqq->next_rq) < sector)
11443 -@@ -869,7 +863,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)
11444 - return NULL;
11445 -
11446 - __bfqq = rb_entry(node, struct bfq_queue, pos_node);
11447 -- if (bfq_rq_close(bfqd, __bfqq->next_rq))
11448 -+ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
11449 - return __bfqq;
11450 -
11451 - return NULL;
11452 -@@ -878,14 +872,12 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)
11453 - /*
11454 - * bfqd - obvious
11455 - * cur_bfqq - passed in so that we don't decide that the current queue
11456 -- * is closely cooperating with itself.
11457 -- *
11458 -- * We are assuming that cur_bfqq has dispatched at least one request,
11459 -- * and that bfqd->last_position reflects a position on the disk associated
11460 -- * with the I/O issued by cur_bfqq.
11461 -+ * is closely cooperating with itself
11462 -+ * sector - used as a reference point to search for a close queue
11463 - */
11464 - static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,
11465 -- struct bfq_queue *cur_bfqq)
11466 -+ struct bfq_queue *cur_bfqq,
11467 -+ sector_t sector)
11468 - {
11469 - struct bfq_queue *bfqq;
11470 -
11471 -@@ -905,7 +897,7 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,
11472 - * working closely on the same area of the disk. In that case,
11473 - * we can group them together and don't waste time idling.
11474 - */
11475 -- bfqq = bfqq_close(bfqd);
11476 -+ bfqq = bfqq_close(bfqd, sector);
11477 - if (bfqq == NULL || bfqq == cur_bfqq)
11478 - return NULL;
11479 -
11480 -@@ -932,6 +924,282 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,
11481 - return bfqq;
11482 - }
11483 -
11484 -+static struct bfq_queue *
11485 -+bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
11486 -+{
11487 -+ int process_refs, new_process_refs;
11488 -+ struct bfq_queue *__bfqq;
11489 -+
11490 -+ /*
11491 -+ * If there are no process references on the new_bfqq, then it is
11492 -+ * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
11493 -+ * may have dropped their last reference (not just their last process
11494 -+ * reference).
11495 -+ */
11496 -+ if (!bfqq_process_refs(new_bfqq))
11497 -+ return NULL;
11498 -+
11499 -+ /* Avoid a circular list and skip interim queue merges. */
11500 -+ while ((__bfqq = new_bfqq->new_bfqq)) {
11501 -+ if (__bfqq == bfqq)
11502 -+ return NULL;
11503 -+ new_bfqq = __bfqq;
11504 -+ }
11505 -+
11506 -+ process_refs = bfqq_process_refs(bfqq);
11507 -+ new_process_refs = bfqq_process_refs(new_bfqq);
11508 -+ /*
11509 -+ * If the process for the bfqq has gone away, there is no
11510 -+ * sense in merging the queues.
11511 -+ */
11512 -+ if (process_refs == 0 || new_process_refs == 0)
11513 -+ return NULL;
11514 -+
11515 -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
11516 -+ new_bfqq->pid);
11517 -+
11518 -+ /*
11519 -+ * Merging is just a redirection: the requests of the process owning
11520 -+ * one of the two queues are redirected to the other queue. The latter
11521 -+ * queue, in its turn, is set as shared if this is the first time that
11522 -+ * the requests of some process are redirected to it.
11523 -+ *
11524 -+ * We redirect bfqq to new_bfqq and not the opposite, because we
11525 -+ * are in the context of the process owning bfqq, hence we have the
11526 -+ * io_cq of this process. So we can immediately configure this io_cq
11527 -+ * to redirect the requests of the process to new_bfqq.
11528 -+ *
11529 -+ * NOTE, even if new_bfqq coincides with the in-service queue, the
11530 -+ * io_cq of new_bfqq is not available, because, if the in-service queue
11531 -+ * is shared, bfqd->in_service_bic may not point to the io_cq of the
11532 -+ * in-service queue.
11533 -+ * Redirecting the requests of the process owning bfqq to the currently
11534 -+ * in-service queue is in any case the best option, as we feed the
11535 -+ * in-service queue with new requests close to the last request served
11536 -+ * and, by doing so, hopefully increase the throughput.
11537 -+ */
11538 -+ bfqq->new_bfqq = new_bfqq;
11539 -+ atomic_add(process_refs, &new_bfqq->ref);
11540 -+ return new_bfqq;
11541 -+}
11542 -+
11543 -+/*
11544 -+ * Attempt to schedule a merge of bfqq with the currently in-service queue or
11545 -+ * with a close queue among the scheduled queues.
11546 -+ * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue
11547 -+ * structure otherwise.
11548 -+ */
11549 -+static struct bfq_queue *
11550 -+bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
11551 -+ void *io_struct, bool request)
11552 -+{
11553 -+ struct bfq_queue *in_service_bfqq, *new_bfqq;
11554 -+
11555 -+ if (bfqq->new_bfqq)
11556 -+ return bfqq->new_bfqq;
11557 -+
11558 -+ if (!io_struct)
11559 -+ return NULL;
11560 -+
11561 -+ in_service_bfqq = bfqd->in_service_queue;
11562 -+
11563 -+ if (in_service_bfqq == NULL || in_service_bfqq == bfqq ||
11564 -+ !bfqd->in_service_bic)
11565 -+ goto check_scheduled;
11566 -+
11567 -+ if (bfq_class_idle(in_service_bfqq) || bfq_class_idle(bfqq))
11568 -+ goto check_scheduled;
11569 -+
11570 -+ if (bfq_class_rt(in_service_bfqq) != bfq_class_rt(bfqq))
11571 -+ goto check_scheduled;
11572 -+
11573 -+ if (in_service_bfqq->entity.parent != bfqq->entity.parent)
11574 -+ goto check_scheduled;
11575 -+
11576 -+ if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&
11577 -+ bfq_bfqq_sync(in_service_bfqq) && bfq_bfqq_sync(bfqq)) {
11578 -+ new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq);
11579 -+ if (new_bfqq != NULL)
11580 -+ return new_bfqq; /* Merge with the in-service queue */
11581 -+ }
11582 -+
11583 -+ /*
11584 -+ * Check whether there is a cooperator among currently scheduled
11585 -+ * queues. The only thing we need is that the bio/request is not
11586 -+ * NULL, as we need it to establish whether a cooperator exists.
11587 -+ */
11588 -+check_scheduled:
11589 -+ new_bfqq = bfq_close_cooperator(bfqd, bfqq,
11590 -+ bfq_io_struct_pos(io_struct, request));
11591 -+ if (new_bfqq)
11592 -+ return bfq_setup_merge(bfqq, new_bfqq);
11593 -+
11594 -+ return NULL;
11595 -+}
11596 -+
11597 -+static inline void
11598 -+bfq_bfqq_save_state(struct bfq_queue *bfqq)
11599 -+{
11600 -+ /*
11601 -+ * If bfqq->bic == NULL, the queue is already shared or its requests
11602 -+ * have already been redirected to a shared queue; both idle window
11603 -+ * and weight raising state have already been saved. Do nothing.
11604 -+ */
11605 -+ if (bfqq->bic == NULL)
11606 -+ return;
11607 -+ if (bfqq->bic->raising_time_left)
11608 -+ /*
11609 -+ * This is the queue of a just-started process, and would
11610 -+ * deserve weight raising: we set raising_time_left to the full
11611 -+ * weight-raising duration to trigger weight-raising when and
11612 -+ * if the queue is split and the first request of the queue
11613 -+ * is enqueued.
11614 -+ */
11615 -+ bfqq->bic->raising_time_left = bfq_wrais_duration(bfqq->bfqd);
11616 -+ else if (bfqq->raising_coeff > 1) {
11617 -+ unsigned long wrais_duration =
11618 -+ jiffies - bfqq->last_rais_start_finish;
11619 -+ /*
11620 -+ * It may happen that a queue's weight raising period lasts
11621 -+ * longer than its raising_cur_max_time, as weight raising is
11622 -+ * handled only when a request is enqueued or dispatched (it
11623 -+ * does not use any timer). If the weight raising period is
11624 -+ * about to end, don't save it.
11625 -+ */
11626 -+ if (bfqq->raising_cur_max_time <= wrais_duration)
11627 -+ bfqq->bic->raising_time_left = 0;
11628 -+ else
11629 -+ bfqq->bic->raising_time_left =
11630 -+ bfqq->raising_cur_max_time - wrais_duration;
11631 -+ /*
11632 -+ * The bfq_queue is becoming shared or the requests of the
11633 -+ * process owning the queue are being redirected to a shared
11634 -+ * queue. Stop the weight raising period of the queue, as in
11635 -+ * both cases it should not be owned by an interactive or soft
11636 -+ * real-time application.
11637 -+ */
11638 -+ bfq_bfqq_end_raising(bfqq);
11639 -+ } else
11640 -+ bfqq->bic->raising_time_left = 0;
11641 -+ bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);
11642 -+}
11643 -+
11644 -+static inline void
11645 -+bfq_get_bic_reference(struct bfq_queue *bfqq)
11646 -+{
11647 -+ /*
11648 -+ * If bfqq->bic has a non-NULL value, the bic to which it belongs
11649 -+ * is about to begin using a shared bfq_queue.
11650 -+ */
11651 -+ if (bfqq->bic)
11652 -+ atomic_long_inc(&bfqq->bic->icq.ioc->refcount);
11653 -+}
11654 -+
11655 -+static void
11656 -+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
11657 -+ struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
11658 -+{
11659 -+ bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
11660 -+ (long unsigned)new_bfqq->pid);
11661 -+ /* Save weight raising and idle window of the merged queues */
11662 -+ bfq_bfqq_save_state(bfqq);
11663 -+ bfq_bfqq_save_state(new_bfqq);
11664 -+ /*
11665 -+ * Grab a reference to the bic, to prevent it from being destroyed
11666 -+ * before being possibly touched by a bfq_split_bfqq().
11667 -+ */
11668 -+ bfq_get_bic_reference(bfqq);
11669 -+ bfq_get_bic_reference(new_bfqq);
11670 -+ /* Merge queues (that is, let bic redirect its requests to new_bfqq) */
11671 -+ bic_set_bfqq(bic, new_bfqq, 1);
11672 -+ bfq_mark_bfqq_coop(new_bfqq);
11673 -+ /*
11674 -+ * new_bfqq now belongs to at least two bics (it is a shared queue): set
11675 -+ * new_bfqq->bic to NULL. bfqq either:
11676 -+ * - does not belong to any bic any more, and hence bfqq->bic must
11677 -+ * be set to NULL, or
11678 -+ * - is a queue whose owning bics have already been redirected to a
11679 -+ * different queue, hence the queue is destined to not belong to any
11680 -+ * bic soon and bfqq->bic is already NULL (therefore the next
11681 -+ * assignment causes no harm).
11682 -+ */
11683 -+ new_bfqq->bic = NULL;
11684 -+ bfqq->bic = NULL;
11685 -+ bfq_put_queue(bfqq);
11686 -+}
11687 -+
11688 -+static int bfq_allow_merge(struct request_queue *q, struct request *rq,
11689 -+ struct bio *bio)
11690 -+{
11691 -+ struct bfq_data *bfqd = q->elevator->elevator_data;
11692 -+ struct bfq_io_cq *bic;
11693 -+ struct bfq_queue *bfqq, *new_bfqq;
11694 -+
11695 -+ /*
11696 -+ * Disallow merge of a sync bio into an async request.
11697 -+ */
11698 -+ if (bfq_bio_sync(bio) && !rq_is_sync(rq))
11699 -+ return 0;
11700 -+
11701 -+ /*
11702 -+ * Lookup the bfqq that this bio will be queued with. Allow
11703 -+ * merge only if rq is queued there.
11704 -+ * Queue lock is held here.
11705 -+ */
11706 -+ bic = bfq_bic_lookup(bfqd, current->io_context);
11707 -+ if (bic == NULL)
11708 -+ return 0;
11709 -+
11710 -+ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
11711 -+ /*
11712 -+ * We take advantage of this function to perform an early merge
11713 -+ * of the queues of possible cooperating processes.
11714 -+ */
11715 -+ if (bfqq != NULL) {
11716 -+ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false);
11717 -+ if (new_bfqq != NULL) {
11718 -+ bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
11719 -+ /*
11720 -+ * If we get here, the bio will be queued in the shared
11721 -+ * queue, i.e., new_bfqq, so use new_bfqq to decide
11722 -+ * whether bio and rq can be merged.
11723 -+ */
11724 -+ bfqq = new_bfqq;
11725 -+ }
11726 -+ }
11727 -+
11728 -+ return bfqq == RQ_BFQQ(rq);
11729 -+}
11730 -+
11731 -+static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
11732 -+ struct bfq_queue *bfqq)
11733 -+{
11734 -+ if (bfqq != NULL) {
11735 -+ bfq_mark_bfqq_must_alloc(bfqq);
11736 -+ bfq_mark_bfqq_budget_new(bfqq);
11737 -+ bfq_clear_bfqq_fifo_expire(bfqq);
11738 -+
11739 -+ bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
11740 -+
11741 -+ bfq_log_bfqq(bfqd, bfqq,
11742 -+ "set_in_service_queue, cur-budget = %lu",
11743 -+ bfqq->entity.budget);
11744 -+ }
11745 -+
11746 -+ bfqd->in_service_queue = bfqq;
11747 -+}
11748 -+
11749 -+/*
11750 -+ * Get and set a new queue for service.
11751 -+ */
11752 -+static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
11753 -+{
11754 -+ struct bfq_queue *bfqq = bfq_get_next_queue(bfqd);
11755 -+
11756 -+ __bfq_set_in_service_queue(bfqd, bfqq);
11757 -+ return bfqq;
11758 -+}
11759 -+
11760 - /*
11761 - * If enough samples have been computed, return the current max budget
11762 - * stored in bfqd, which is dynamically updated according to the
11763 -@@ -1079,63 +1347,6 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
11764 - return rq;
11765 - }
11766 -
11767 --/*
11768 -- * Must be called with the queue_lock held.
11769 -- */
11770 --static int bfqq_process_refs(struct bfq_queue *bfqq)
11771 --{
11772 -- int process_refs, io_refs;
11773 --
11774 -- io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
11775 -- process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;
11776 -- BUG_ON(process_refs < 0);
11777 -- return process_refs;
11778 --}
11779 --
11780 --static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
11781 --{
11782 -- int process_refs, new_process_refs;
11783 -- struct bfq_queue *__bfqq;
11784 --
11785 -- /*
11786 -- * If there are no process references on the new_bfqq, then it is
11787 -- * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
11788 -- * may have dropped their last reference (not just their last process
11789 -- * reference).
11790 -- */
11791 -- if (!bfqq_process_refs(new_bfqq))
11792 -- return;
11793 --
11794 -- /* Avoid a circular list and skip interim queue merges. */
11795 -- while ((__bfqq = new_bfqq->new_bfqq)) {
11796 -- if (__bfqq == bfqq)
11797 -- return;
11798 -- new_bfqq = __bfqq;
11799 -- }
11800 --
11801 -- process_refs = bfqq_process_refs(bfqq);
11802 -- new_process_refs = bfqq_process_refs(new_bfqq);
11803 -- /*
11804 -- * If the process for the bfqq has gone away, there is no
11805 -- * sense in merging the queues.
11806 -- */
11807 -- if (process_refs == 0 || new_process_refs == 0)
11808 -- return;
11809 --
11810 -- /*
11811 -- * Merge in the direction of the lesser amount of work.
11812 -- */
11813 -- if (new_process_refs >= process_refs) {
11814 -- bfqq->new_bfqq = new_bfqq;
11815 -- atomic_add(process_refs, &new_bfqq->ref);
11816 -- } else {
11817 -- new_bfqq->new_bfqq = bfqq;
11818 -- atomic_add(new_process_refs, &bfqq->ref);
11819 -- }
11820 -- bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
11821 -- new_bfqq->pid);
11822 --}
11823 --
11824 - static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq)
11825 - {
11826 - struct bfq_entity *entity = &bfqq->entity;
11827 -@@ -1729,7 +1940,7 @@ static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
11828 - */
11829 - static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
11830 - {
11831 -- struct bfq_queue *bfqq, *new_bfqq = NULL;
11832 -+ struct bfq_queue *bfqq;
11833 - struct request *next_rq;
11834 - enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;
11835 -
11836 -@@ -1739,17 +1950,6 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
11837 -
11838 - bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue");
11839 -
11840 -- /*
11841 -- * If another queue has a request waiting within our mean seek
11842 -- * distance, let it run. The expire code will check for close
11843 -- * cooperators and put the close queue at the front of the
11844 -- * service tree. If possible, merge the expiring queue with the
11845 -- * new bfqq.
11846 -- */
11847 -- new_bfqq = bfq_close_cooperator(bfqd, bfqq);
11848 -- if (new_bfqq != NULL && bfqq->new_bfqq == NULL)
11849 -- bfq_setup_merge(bfqq, new_bfqq);
11850 --
11851 - if (bfq_may_expire_for_budg_timeout(bfqq) &&
11852 - !timer_pending(&bfqd->idle_slice_timer) &&
11853 - !bfq_bfqq_must_idle(bfqq))
11854 -@@ -1786,36 +1986,26 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
11855 - bfq_clear_bfqq_wait_request(bfqq);
11856 - del_timer(&bfqd->idle_slice_timer);
11857 - }
11858 -- if (new_bfqq == NULL)
11859 -- goto keep_queue;
11860 -- else
11861 -- goto expire;
11862 -+ goto keep_queue;
11863 - }
11864 - }
11865 -
11866 - /*
11867 -- * No requests pending. If the in-service queue has no cooperator and
11868 -- * still has requests in flight (possibly waiting for a completion)
11869 -- * or is idling for a new request, then keep it.
11870 -+ * No requests pending. If the in-service queue still has requests in
11871 -+ * flight (possibly waiting for a completion) or is idling for a new
11872 -+ * request, then keep it.
11873 - */
11874 -- if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) ||
11875 -- (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq)))) {
11876 -+ if (timer_pending(&bfqd->idle_slice_timer) ||
11877 -+ (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq))) {
11878 - bfqq = NULL;
11879 - goto keep_queue;
11880 -- } else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) {
11881 -- /*
11882 -- * Expiring the queue because there is a close cooperator,
11883 -- * cancel timer.
11884 -- */
11885 -- bfq_clear_bfqq_wait_request(bfqq);
11886 -- del_timer(&bfqd->idle_slice_timer);
11887 - }
11888 -
11889 - reason = BFQ_BFQQ_NO_MORE_REQUESTS;
11890 - expire:
11891 - bfq_bfqq_expire(bfqd, bfqq, 0, reason);
11892 - new_queue:
11893 -- bfqq = bfq_set_in_service_queue(bfqd, new_bfqq);
11894 -+ bfqq = bfq_set_in_service_queue(bfqd);
11895 - bfq_log(bfqd, "select_queue: new queue %d returned",
11896 - bfqq != NULL ? bfqq->pid : 0);
11897 - keep_queue:
11898 -@@ -1825,9 +2015,8 @@ keep_queue:
11899 - static void bfq_update_raising_data(struct bfq_data *bfqd,
11900 - struct bfq_queue *bfqq)
11901 - {
11902 -+ struct bfq_entity *entity = &bfqq->entity;
11903 - if (bfqq->raising_coeff > 1) { /* queue is being boosted */
11904 -- struct bfq_entity *entity = &bfqq->entity;
11905 --
11906 - bfq_log_bfqq(bfqd, bfqq,
11907 - "raising period dur %u/%u msec, "
11908 - "old raising coeff %u, w %d(%d)",
11909 -@@ -1844,7 +2033,7 @@ static void bfq_update_raising_data(struct bfq_data *bfqd,
11910 - "WARN: pending prio change");
11911 - /*
11912 - * If too much time has elapsed from the beginning
11913 -- * of this weight-raising, stop it.
11914 -+ * of this weight-raising period, stop it.
11915 - */
11916 - if (time_is_before_jiffies(bfqq->last_rais_start_finish +
11917 - bfqq->raising_cur_max_time)) {
11918 -@@ -1856,11 +2045,13 @@ static void bfq_update_raising_data(struct bfq_data *bfqd,
11919 - jiffies_to_msecs(bfqq->
11920 - raising_cur_max_time));
11921 - bfq_bfqq_end_raising(bfqq);
11922 -- __bfq_entity_update_weight_prio(
11923 -- bfq_entity_service_tree(entity),
11924 -- entity);
11925 - }
11926 - }
11927 -+ /* Update weight both if it must be raised and if it must be lowered */
11928 -+ if ((entity->weight > entity->orig_weight) != (bfqq->raising_coeff > 1))
11929 -+ __bfq_entity_update_weight_prio(
11930 -+ bfq_entity_service_tree(entity),
11931 -+ entity);
11932 - }
11933 -
11934 - /*
11935 -@@ -2101,6 +2292,25 @@ static void bfq_init_icq(struct io_cq *icq)
11936 - struct bfq_io_cq *bic = icq_to_bic(icq);
11937 -
11938 - bic->ttime.last_end_request = jiffies;
11939 -+ /*
11940 -+ * A newly created bic indicates that the process has just
11941 -+ * started doing I/O, and is probably mapping into memory its
11942 -+ * executable and libraries: it definitely needs weight raising.
11943 -+ * There is however the possibility that the process performs,
11944 -+ * for a while, I/O close to some other process. EQM intercepts
11945 -+ * this behavior and may merge the queue corresponding to the
11946 -+ * process with some other queue, BEFORE the weight of the queue
11947 -+ * is raised. Merged queues are not weight-raised (they are assumed
11948 -+ * to belong to processes that benefit only from high throughput).
11949 -+ * If the merge is basically the consequence of an accident, then
11950 -+ * the queue will be split soon and will get back its old weight.
11951 -+ * It is then important to write down somewhere that this queue
11952 -+ * does need weight raising, even if it did not make it to get its
11953 -+ * weight raised before being merged. To this purpose, we overload
11954 -+ * the field raising_time_left and assign 1 to it, to mark the queue
11955 -+ * as needing weight raising.
11956 -+ */
11957 -+ bic->raising_time_left = 1;
11958 - }
11959 -
11960 - static void bfq_exit_icq(struct io_cq *icq)
11961 -@@ -2114,6 +2324,13 @@ static void bfq_exit_icq(struct io_cq *icq)
11962 - }
11963 -
11964 - if (bic->bfqq[BLK_RW_SYNC]) {
11965 -+ /*
11966 -+ * If the bic is using a shared queue, put the reference
11967 -+ * taken on the io_context when the bic started using a
11968 -+ * shared bfq_queue.
11969 -+ */
11970 -+ if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC]))
11971 -+ put_io_context(icq->ioc);
11972 - bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);
11973 - bic->bfqq[BLK_RW_SYNC] = NULL;
11974 - }
11975 -@@ -2405,6 +2622,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,
11976 - if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))
11977 - return;
11978 -
11979 -+ /* Idle window just restored, statistics are meaningless. */
11980 -+ if (bfq_bfqq_just_split(bfqq))
11981 -+ return;
11982 -+
11983 - enable_idle = bfq_bfqq_idle_window(bfqq);
11984 -
11985 - if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
11986 -@@ -2445,6 +2666,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
11987 - if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
11988 - !BFQQ_SEEKY(bfqq))
11989 - bfq_update_idle_window(bfqd, bfqq, bic);
11990 -+ bfq_clear_bfqq_just_split(bfqq);
11991 -
11992 - bfq_log_bfqq(bfqd, bfqq,
11993 - "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",
11994 -@@ -2505,13 +2727,48 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
11995 - static void bfq_insert_request(struct request_queue *q, struct request *rq)
11996 - {
11997 - struct bfq_data *bfqd = q->elevator->elevator_data;
11998 -- struct bfq_queue *bfqq = RQ_BFQQ(rq);
11999 -+ struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq;
12000 -
12001 - assert_spin_locked(bfqd->queue->queue_lock);
12002 -+
12003 -+ /*
12004 -+ * An unplug may trigger a requeue of a request from the device
12005 -+ * driver: make sure we are in process context while trying to
12006 -+ * merge two bfq_queues.
12007 -+ */
12008 -+ if (!in_interrupt()) {
12009 -+ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
12010 -+ if (new_bfqq != NULL) {
12011 -+ if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
12012 -+ new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1);
12013 -+ /*
12014 -+ * Release the request's reference to the old bfqq
12015 -+ * and make sure one is taken to the shared queue.
12016 -+ */
12017 -+ new_bfqq->allocated[rq_data_dir(rq)]++;
12018 -+ bfqq->allocated[rq_data_dir(rq)]--;
12019 -+ atomic_inc(&new_bfqq->ref);
12020 -+ bfq_put_queue(bfqq);
12021 -+ if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)
12022 -+ bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
12023 -+ bfqq, new_bfqq);
12024 -+ rq->elv.priv[1] = new_bfqq;
12025 -+ bfqq = new_bfqq;
12026 -+ }
12027 -+ }
12028 -+
12029 - bfq_init_prio_data(bfqq, RQ_BIC(rq));
12030 -
12031 - bfq_add_rq_rb(rq);
12032 -
12033 -+ /*
12034 -+ * Here a newly-created bfq_queue has already started a weight-raising
12035 -+ * period: clear raising_time_left to prevent bfq_bfqq_save_state()
12036 -+ * from assigning it a full weight-raising period. See the detailed
12037 -+ * comments about this field in bfq_init_icq().
12038 -+ */
12039 -+ if (bfqq->bic != NULL)
12040 -+ bfqq->bic->raising_time_left = 0;
12041 - rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]);
12042 - list_add_tail(&rq->queuelist, &bfqq->fifo);
12043 -
12044 -@@ -2663,18 +2920,6 @@ static void bfq_put_request(struct request *rq)
12045 - }
12046 - }
12047 -
12048 --static struct bfq_queue *
12049 --bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
12050 -- struct bfq_queue *bfqq)
12051 --{
12052 -- bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
12053 -- (long unsigned)bfqq->new_bfqq->pid);
12054 -- bic_set_bfqq(bic, bfqq->new_bfqq, 1);
12055 -- bfq_mark_bfqq_coop(bfqq->new_bfqq);
12056 -- bfq_put_queue(bfqq);
12057 -- return bic_to_bfqq(bic, 1);
12058 --}
12059 --
12060 - /*
12061 - * Returns NULL if a new bfqq should be allocated, or the old bfqq if this
12062 - * was the last process referring to said bfqq.
12063 -@@ -2683,6 +2928,9 @@ static struct bfq_queue *
12064 - bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
12065 - {
12066 - bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");
12067 -+
12068 -+ put_io_context(bic->icq.ioc);
12069 -+
12070 - if (bfqq_process_refs(bfqq) == 1) {
12071 - bfqq->pid = current->pid;
12072 - bfq_clear_bfqq_coop(bfqq);
12073 -@@ -2711,6 +2959,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
12074 - struct bfq_queue *bfqq;
12075 - struct bfq_group *bfqg;
12076 - unsigned long flags;
12077 -+ bool split = false;
12078 -
12079 - might_sleep_if(gfp_mask & __GFP_WAIT);
12080 -
12081 -@@ -2729,24 +2978,14 @@ new_queue:
12082 - bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask);
12083 - bic_set_bfqq(bic, bfqq, is_sync);
12084 - } else {
12085 -- /*
12086 -- * If the queue was seeky for too long, break it apart.
12087 -- */
12088 -+ /* If the queue was seeky for too long, break it apart. */
12089 - if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
12090 - bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
12091 - bfqq = bfq_split_bfqq(bic, bfqq);
12092 -+ split = true;
12093 - if (!bfqq)
12094 - goto new_queue;
12095 - }
12096 --
12097 -- /*
12098 -- * Check to see if this queue is scheduled to merge with
12099 -- * another closely cooperating queue. The merging of queues
12100 -- * happens here as it must be done in process context.
12101 -- * The reference on new_bfqq was taken in merge_bfqqs.
12102 -- */
12103 -- if (bfqq->new_bfqq != NULL)
12104 -- bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq);
12105 - }
12106 -
12107 - bfqq->allocated[rw]++;
12108 -@@ -2757,6 +2996,26 @@ new_queue:
12109 - rq->elv.priv[0] = bic;
12110 - rq->elv.priv[1] = bfqq;
12111 -
12112 -+ /*
12113 -+ * If a bfq_queue has only one process reference, it is owned
12114 -+ * by only one bfq_io_cq: we can set the bic field of the
12115 -+ * bfq_queue to the address of that structure. Also, if the
12116 -+ * queue has just been split, mark a flag so that the
12117 -+ * information is available to the other scheduler hooks.
12118 -+ */
12119 -+ if (bfqq_process_refs(bfqq) == 1) {
12120 -+ bfqq->bic = bic;
12121 -+ if (split) {
12122 -+ bfq_mark_bfqq_just_split(bfqq);
12123 -+ /*
12124 -+ * If the queue has just been split from a shared queue,
12125 -+ * restore the idle window and the possible weight
12126 -+ * raising period.
12127 -+ */
12128 -+ bfq_bfqq_resume_state(bfqq, bic);
12129 -+ }
12130 -+ }
12131 -+
12132 - spin_unlock_irqrestore(q->queue_lock, flags);
12133 -
12134 - return 0;
12135 -diff --git a/block/bfq-sched.c b/block/bfq-sched.c
12136 -index 999b475..e54ea33 100644
12137 ---- a/block/bfq-sched.c
12138 -+++ b/block/bfq-sched.c
12139 -@@ -980,34 +980,6 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
12140 - return bfqq;
12141 - }
12142 -
12143 --/*
12144 -- * Forced extraction of the given queue.
12145 -- */
12146 --static void bfq_get_next_queue_forced(struct bfq_data *bfqd,
12147 -- struct bfq_queue *bfqq)
12148 --{
12149 -- struct bfq_entity *entity;
12150 -- struct bfq_sched_data *sd;
12151 --
12152 -- BUG_ON(bfqd->in_service_queue != NULL);
12153 --
12154 -- entity = &bfqq->entity;
12155 -- /*
12156 -- * Bubble up extraction/update from the leaf to the root.
12157 -- */
12158 -- for_each_entity(entity) {
12159 -- sd = entity->sched_data;
12160 -- bfq_update_budget(entity);
12161 -- bfq_update_vtime(bfq_entity_service_tree(entity));
12162 -- bfq_active_extract(bfq_entity_service_tree(entity), entity);
12163 -- sd->active_entity = entity;
12164 -- sd->next_active = NULL;
12165 -- entity->service = 0;
12166 -- }
12167 --
12168 -- return;
12169 --}
12170 --
12171 - static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
12172 - {
12173 - if (bfqd->in_service_bic != NULL) {
12174 -diff --git a/block/bfq.h b/block/bfq.h
12175 -index 3ca8482..c278796 100644
12176 ---- a/block/bfq.h
12177 -+++ b/block/bfq.h
12178 -@@ -200,6 +200,8 @@ struct bfq_group;
12179 - * idle to backlogged
12180 - * @service_from_backlogged: cumulative service received from the @bfq_queue
12181 - * since the last transition from idle to backlogged
12182 -+ * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the
12183 -+ * queue is shared
12184 - *
12185 - * A bfq_queue is a leaf request queue; it can be associated with an io_context
12186 - * or more, if it is async or shared between cooperating processes. @cgroup
12187 -@@ -243,6 +245,7 @@ struct bfq_queue {
12188 - sector_t last_request_pos;
12189 -
12190 - pid_t pid;
12191 -+ struct bfq_io_cq *bic;
12192 -
12193 - /* weight-raising fields */
12194 - unsigned long raising_cur_max_time;
12195 -@@ -272,12 +275,23 @@ struct bfq_ttime {
12196 - * @icq: associated io_cq structure
12197 - * @bfqq: array of two process queues, the sync and the async
12198 - * @ttime: associated @bfq_ttime struct
12199 -+ * @raising_time_left: snapshot of the time left before weight raising ends
12200 -+ * for the sync queue associated to this process; this
12201 -+ * snapshot is taken to remember this value while the weight
12202 -+ * raising is suspended because the queue is merged with a
12203 -+ * shared queue, and is used to set @raising_cur_max_time
12204 -+ * when the queue is split from the shared queue and its
12205 -+ * weight is raised again
12206 -+ * @saved_idle_window: same purpose as the previous field for the idle window
12207 - */
12208 - struct bfq_io_cq {
12209 - struct io_cq icq; /* must be the first member */
12210 - struct bfq_queue *bfqq[2];
12211 - struct bfq_ttime ttime;
12212 - int ioprio;
12213 -+
12214 -+ unsigned int raising_time_left;
12215 -+ unsigned int saved_idle_window;
12216 - };
12217 -
12218 - /**
12219 -@@ -418,8 +432,9 @@ enum bfqq_state_flags {
12220 - BFQ_BFQQ_FLAG_sync, /* synchronous queue */
12221 - BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */
12222 - BFQ_BFQQ_FLAG_coop, /* bfqq is shared */
12223 -- BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */
12224 -- BFQ_BFQQ_FLAG_softrt_update, /* needs softrt-next-start update */
12225 -+ BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be split */
12226 -+ BFQ_BFQQ_FLAG_just_split, /* queue has just been split */
12227 -+ BFQ_BFQQ_FLAG_softrt_update, /* may need softrt-next-start update */
12228 - };
12229 -
12230 - #define BFQ_BFQQ_FNS(name) \
12231 -@@ -446,6 +461,7 @@ BFQ_BFQQ_FNS(sync);
12232 - BFQ_BFQQ_FNS(budget_new);
12233 - BFQ_BFQQ_FNS(coop);
12234 - BFQ_BFQQ_FNS(split_coop);
12235 -+BFQ_BFQQ_FNS(just_split);
12236 - BFQ_BFQQ_FNS(softrt_update);
12237 - #undef BFQ_BFQQ_FNS
12238 -
12239 ---
12240 -1.9.0
12241 -