[gentoo-commits] linux-patches r2772 - genpatches-2.6/trunk/3.15 - gentoo-commits

From:	"Mike Pagano (mpagano)" <mpagano@g.o>
To:	gentoo-commits@l.g.o
Subject:	[gentoo-commits] linux-patches r2772 - genpatches-2.6/trunk/3.15
Date:	Mon, 05 May 2014 14:28:54
Message-Id:	`20140505142849.483CA2004C@flycatcher.gentoo.org`

1

Author: mpagano

2

Date: 2014-05-05 14:28:48 +0000 (Mon, 05 May 2014)

3

New Revision: 2772

4

5

Removed:

6

   genpatches-2.6/trunk/3.15/1000_linux-3.14.1.patch

7

   genpatches-2.6/trunk/3.15/1001_linux-3.14.2.patch

8

   genpatches-2.6/trunk/3.15/4200_fbcondecor-0.9.6.patch

9

   genpatches-2.6/trunk/3.15/5000_enable-additional-cpu-optimizations-for-gcc.patch

10

   genpatches-2.6/trunk/3.15/5001_BFQ-1-block-cgroups-kconfig-build-bits-for-BFQ-v7r2-3.14.patch

11

   genpatches-2.6/trunk/3.15/5002_BFQ-2-block-introduce-the-BFQ-v7r2-I-O-sched-for-3.14.patch1

12

   genpatches-2.6/trunk/3.15/5003_BFQ-3-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r2-for-3.14.0.patch

13

Modified:

14

   genpatches-2.6/trunk/3.15/0000_README

15

Log:

16

Remove patches incompatible with 3.15 until they can be worked on.

17

18

Modified: genpatches-2.6/trunk/3.15/0000_README

19

===================================================================

20

--- genpatches-2.6/trunk/3.15/0000_README	2014-05-05 14:22:27 UTC (rev 2771)

21

+++ genpatches-2.6/trunk/3.15/0000_README	2014-05-05 14:28:48 UTC (rev 2772)

22

@@ -42,14 +42,6 @@

23

24

 Individual Patch Descriptions:

25

 --------------------------------------------------------------------------

26

-Patch:  1000_linux-3.14.1.patch

27

-From:   http://www.kernel.org

28

-Desc:   Linux 3.14.1

29

-

30

-Patch:  1001_linux-3.14.2.patch

31

-From:   http://www.kernel.org

32

-Desc:   Linux 3.14.2

33

-

34

 Patch:  1500_XATTR_USER_PREFIX.patch

35

 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644

36

 Desc:   Support for namespace user.pax.* on tmpfs.

37

@@ -74,10 +66,6 @@

38

 From:   Al Viro <viro <at> ZenIV.linux.org.uk>

39

 Desc:   Do not lock when UMH is waiting on current thread spawned by linuxrc. (bug #481344)

40

41

-Patch:  4200_fbcondecor-0.9.6.patch

42

-From:   http://dev.gentoo.org/~spock

43

-Desc:   Bootsplash successor by Michal Januszewski ported by Jeremy (bug #452574)

44

-

45

 Patch:  4500_support-for-pogoplug-e02.patch

46

 From:   Cristoph Junghans <ottxor@g.o>

47

 Desc:   Support for Pogoplug e02 (bug #460350), adjusted to be opt-in by TomWij.

48

@@ -85,20 +73,3 @@

49

 Patch:  4567_distro-Gentoo-Kconfig.patch

50

 From:   Tom Wijsman <TomWij@g.o>

51

 Desc:   Add Gentoo Linux support config settings and defaults.

52

-

53

-Patch:  5000_enable-additional-cpu-optimizations-for-gcc.patch

54

-From:   https://github.com/graysky2/kernel_gcc_patch/

55

-Desc:   Kernel patch enables gcc optimizations for additional CPUs.

56

-

57

-Patch:  5001_BFQ-1-block-cgroups-kconfig-build-bits-for-v7r2-3.14.patch

58

-From:   http://algo.ing.unimo.it/people/paolo/disk_sched/

59

-Desc:   BFQ v7r2 patch 1 for 3.14: Build, cgroups and kconfig bits

60

-

61

-Patch:  5002_BFQ-2-block-introduce-the-v7r2-I-O-sched-for-3.14.patch1

62

-From:   http://algo.ing.unimo.it/people/paolo/disk_sched/

63

-Desc:   BFQ v7r2 patch 2 for 3.14: BFQ Scheduler

64

-

65

-Patch:  5003_BFQ-3-block-add-Early-Queue-Merge-EQM-v7r2-for-3.14.0.patch

66

-From:   http://algo.ing.unimo.it/people/paolo/disk_sched/

67

-Desc:   BFQ v7r2 patch 3 for 3.14: Early Queue Merge (EQM)

68

-

69

70

Deleted: genpatches-2.6/trunk/3.15/1000_linux-3.14.1.patch

71

===================================================================

72

--- genpatches-2.6/trunk/3.15/1000_linux-3.14.1.patch	2014-05-05 14:22:27 UTC (rev 2771)

73

+++ genpatches-2.6/trunk/3.15/1000_linux-3.14.1.patch	2014-05-05 14:28:48 UTC (rev 2772)

74

@@ -1,1221 +0,0 @@

75

-diff --git a/Makefile b/Makefile

76

-index e5ac8a62e6e5..7d0b6992d9ed 100644

77

---- a/Makefile

78

-+++ b/Makefile

79

-@@ -1,6 +1,6 @@

80

- VERSION = 3

81

- PATCHLEVEL = 14

82

--SUBLEVEL = 0

83

-+SUBLEVEL = 1

84

- EXTRAVERSION =

85

- NAME = Shuffling Zombie Juror

86

-

87

-diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts

88

-index ea16d782af58..4f31b2eb5cdf 100644

89

---- a/arch/arc/boot/dts/nsimosci.dts

90

-+++ b/arch/arc/boot/dts/nsimosci.dts

91

-@@ -11,13 +11,16 @@

92

-

93

- / {

94

- 	compatible = "snps,nsimosci";

95

--	clock-frequency = <80000000>;	/* 80 MHZ */

96

-+	clock-frequency = <20000000>;	/* 20 MHZ */

97

- 	#address-cells = <1>;

98

- 	#size-cells = <1>;

99

- 	interrupt-parent = <&intc>;

100

-

101

- 	chosen {

102

--		bootargs = "console=tty0 consoleblank=0";

103

-+		/* this is for console on PGU */

104

-+		/* bootargs = "console=tty0 consoleblank=0"; */

105

-+		/* this is for console on serial */

106

-+		bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=ttyS0,115200n8 consoleblank=0 debug";

107

- 	};

108

-

109

- 	aliases {

110

-@@ -44,15 +47,14 @@

111

- 		};

112

-

113

- 		uart0: serial@c0000000 {

114

--			compatible = "snps,dw-apb-uart";

115

-+			compatible = "ns8250";

116

- 			reg = <0xc0000000 0x2000>;

117

- 			interrupts = <11>;

118

--			#clock-frequency = <80000000>;

119

- 			clock-frequency = <3686400>;

120

- 			baud = <115200>;

121

- 			reg-shift = <2>;

122

- 			reg-io-width = <4>;

123

--			status = "okay";

124

-+			no-loopback-test = <1>;

125

- 		};

126

-

127

- 		pgu0: pgu@c9000000 {

128

-diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig

129

-index 451af30914f6..c01ba35a4eff 100644

130

---- a/arch/arc/configs/nsimosci_defconfig

131

-+++ b/arch/arc/configs/nsimosci_defconfig

132

-@@ -54,6 +54,7 @@ CONFIG_SERIO_ARC_PS2=y

133

- CONFIG_SERIAL_8250=y

134

- CONFIG_SERIAL_8250_CONSOLE=y

135

- CONFIG_SERIAL_8250_DW=y

136

-+CONFIG_SERIAL_OF_PLATFORM=y

137

- CONFIG_SERIAL_ARC=y

138

- CONFIG_SERIAL_ARC_CONSOLE=y

139

- # CONFIG_HW_RANDOM is not set

140

-diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig

141

-index dbdd2231c75d..b2e322939256 100644

142

---- a/arch/m68k/Kconfig

143

-+++ b/arch/m68k/Kconfig

144

-@@ -17,6 +17,7 @@ config M68K

145

- 	select FPU if MMU

146

- 	select ARCH_WANT_IPC_PARSE_VERSION

147

- 	select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE

148

-+	select HAVE_FUTEX_CMPXCHG if MMU && FUTEX

149

- 	select HAVE_MOD_ARCH_SPECIFIC

150

- 	select MODULES_USE_ELF_REL

151

- 	select MODULES_USE_ELF_RELA

152

-diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig

153

-index 65a07750f4f9..bb74b21f007a 100644

154

---- a/arch/s390/Kconfig

155

-+++ b/arch/s390/Kconfig

156

-@@ -117,6 +117,7 @@ config S390

157

- 	select HAVE_FUNCTION_GRAPH_TRACER

158

- 	select HAVE_FUNCTION_TRACER

159

- 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST

160

-+	select HAVE_FUTEX_CMPXCHG if FUTEX

161

- 	select HAVE_KERNEL_BZIP2

162

- 	select HAVE_KERNEL_GZIP

163

- 	select HAVE_KERNEL_LZ4

164

-diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S

165

-index 586f41aac361..185fad49d86f 100644

166

---- a/arch/x86/crypto/ghash-clmulni-intel_asm.S

167

-+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S

168

-@@ -24,10 +24,6 @@

169

- .align 16

170

- .Lbswap_mask:

171

- 	.octa 0x000102030405060708090a0b0c0d0e0f

172

--.Lpoly:

173

--	.octa 0xc2000000000000000000000000000001

174

--.Ltwo_one:

175

--	.octa 0x00000001000000000000000000000001

176

-

177

- #define DATA	%xmm0

178

- #define SHASH	%xmm1

179

-@@ -134,28 +130,3 @@ ENTRY(clmul_ghash_update)

180

- .Lupdate_just_ret:

181

- 	ret

182

- ENDPROC(clmul_ghash_update)

183

--

184

--/*

185

-- * void clmul_ghash_setkey(be128 *shash, const u8 *key);

186

-- *

187

-- * Calculate hash_key << 1 mod poly

188

-- */

189

--ENTRY(clmul_ghash_setkey)

190

--	movaps .Lbswap_mask, BSWAP

191

--	movups (%rsi), %xmm0

192

--	PSHUFB_XMM BSWAP %xmm0

193

--	movaps %xmm0, %xmm1

194

--	psllq $1, %xmm0

195

--	psrlq $63, %xmm1

196

--	movaps %xmm1, %xmm2

197

--	pslldq $8, %xmm1

198

--	psrldq $8, %xmm2

199

--	por %xmm1, %xmm0

200

--	# reduction

201

--	pshufd $0b00100100, %xmm2, %xmm1

202

--	pcmpeqd .Ltwo_one, %xmm1

203

--	pand .Lpoly, %xmm1

204

--	pxor %xmm1, %xmm0

205

--	movups %xmm0, (%rdi)

206

--	ret

207

--ENDPROC(clmul_ghash_setkey)

208

-diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c

209

-index 6759dd1135be..d785cf2c529c 100644

210

---- a/arch/x86/crypto/ghash-clmulni-intel_glue.c

211

-+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c

212

-@@ -30,8 +30,6 @@ void clmul_ghash_mul(char *dst, const be128 *shash);

213

- void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,

214

- 			const be128 *shash);

215

-

216

--void clmul_ghash_setkey(be128 *shash, const u8 *key);

217

--

218

- struct ghash_async_ctx {

219

- 	struct cryptd_ahash *cryptd_tfm;

220

- };

221

-@@ -58,13 +56,23 @@ static int ghash_setkey(struct crypto_shash *tfm,

222

- 			const u8 *key, unsigned int keylen)

223

- {

224

- 	struct ghash_ctx *ctx = crypto_shash_ctx(tfm);

225

-+	be128 *x = (be128 *)key;

226

-+	u64 a, b;

227

-

228

- 	if (keylen != GHASH_BLOCK_SIZE) {

229

- 		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);

230

- 		return -EINVAL;

231

- 	}

232

-

233

--	clmul_ghash_setkey(&ctx->shash, key);

234

-+	/* perform multiplication by 'x' in GF(2^128) */

235

-+	a = be64_to_cpu(x->a);

236

-+	b = be64_to_cpu(x->b);

237

-+

238

-+	ctx->shash.a = (__be64)((b << 1) | (a >> 63));

239

-+	ctx->shash.b = (__be64)((a << 1) | (b >> 63));

240

-+

241

-+	if (a >> 63)

242

-+		ctx->shash.b ^= cpu_to_be64(0xc2);

243

-

244

- 	return 0;

245

- }

246

-diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h

247

-index acd86c850414..f949715e3957 100644

248

---- a/arch/x86/include/asm/efi.h

249

-+++ b/arch/x86/include/asm/efi.h

250

-@@ -130,7 +130,8 @@ extern void efi_memory_uc(u64 addr, unsigned long size);

251

- extern void __init efi_map_region(efi_memory_desc_t *md);

252

- extern void __init efi_map_region_fixed(efi_memory_desc_t *md);

253

- extern void efi_sync_low_kernel_mappings(void);

254

--extern void efi_setup_page_tables(void);

255

-+extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);

256

-+extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);

257

- extern void __init old_map_region(efi_memory_desc_t *md);

258

- extern void __init runtime_code_page_mkexec(void);

259

- extern void __init efi_runtime_mkexec(void);

260

-diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h

261

-index 1aa9ccd43223..94e40f1efdfd 100644

262

---- a/arch/x86/include/asm/pgtable_types.h

263

-+++ b/arch/x86/include/asm/pgtable_types.h

264

-@@ -385,6 +385,8 @@ extern pte_t *lookup_address(unsigned long address, unsigned int *level);

265

- extern phys_addr_t slow_virt_to_phys(void *__address);

266

- extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,

267

- 				   unsigned numpages, unsigned long page_flags);

268

-+void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,

269

-+			       unsigned numpages);

270

- #endif	/* !__ASSEMBLY__ */

271

-

272

- #endif /* _ASM_X86_PGTABLE_DEFS_H */

273

-diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c

274

-index b3b19f46c016..a3488689e301 100644

275

---- a/arch/x86/mm/pageattr.c

276

-+++ b/arch/x86/mm/pageattr.c

277

-@@ -692,6 +692,18 @@ static bool try_to_free_pmd_page(pmd_t *pmd)

278

- 	return true;

279

- }

280

-

281

-+static bool try_to_free_pud_page(pud_t *pud)

282

-+{

283

-+	int i;

284

-+

285

-+	for (i = 0; i < PTRS_PER_PUD; i++)

286

-+		if (!pud_none(pud[i]))

287

-+			return false;

288

-+

289

-+	free_page((unsigned long)pud);

290

-+	return true;

291

-+}

292

-+

293

- static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)

294

- {

295

- 	pte_t *pte = pte_offset_kernel(pmd, start);

296

-@@ -805,6 +817,16 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)

297

- 	 */

298

- }

299

-

300

-+static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end)

301

-+{

302

-+	pgd_t *pgd_entry = root + pgd_index(addr);

303

-+

304

-+	unmap_pud_range(pgd_entry, addr, end);

305

-+

306

-+	if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry)))

307

-+		pgd_clear(pgd_entry);

308

-+}

309

-+

310

- static int alloc_pte_page(pmd_t *pmd)

311

- {

312

- 	pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);

313

-@@ -999,9 +1021,8 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,

314

- static int populate_pgd(struct cpa_data *cpa, unsigned long addr)

315

- {

316

- 	pgprot_t pgprot = __pgprot(_KERNPG_TABLE);

317

--	bool allocd_pgd = false;

318

--	pgd_t *pgd_entry;

319

- 	pud_t *pud = NULL;	/* shut up gcc */

320

-+	pgd_t *pgd_entry;

321

- 	int ret;

322

-

323

- 	pgd_entry = cpa->pgd + pgd_index(addr);

324

-@@ -1015,7 +1036,6 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)

325

- 			return -1;

326

-

327

- 		set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));

328

--		allocd_pgd = true;

329

- 	}

330

-

331

- 	pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);

332

-@@ -1023,19 +1043,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)

333

-

334

- 	ret = populate_pud(cpa, addr, pgd_entry, pgprot);

335

- 	if (ret < 0) {

336

--		unmap_pud_range(pgd_entry, addr,

337

-+		unmap_pgd_range(cpa->pgd, addr,

338

- 				addr + (cpa->numpages << PAGE_SHIFT));

339

--

340

--		if (allocd_pgd) {

341

--			/*

342

--			 * If I allocated this PUD page, I can just as well

343

--			 * free it in this error path.

344

--			 */

345

--			pgd_clear(pgd_entry);

346

--			free_page((unsigned long)pud);

347

--		}

348

- 		return ret;

349

- 	}

350

-+

351

- 	cpa->numpages = ret;

352

- 	return 0;

353

- }

354

-@@ -1861,6 +1873,12 @@ out:

355

- 	return retval;

356

- }

357

-

358

-+void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,

359

-+			       unsigned numpages)

360

-+{

361

-+	unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT));

362

-+}

363

-+

364

- /*

365

-  * The testcases use internal knowledge of the implementation that shouldn't

366

-  * be exposed to the rest of the kernel. Include these directly here.

367

-diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c

368

-index b97acecf3fd9..abb81b0ad83f 100644

369

---- a/arch/x86/platform/efi/efi.c

370

-+++ b/arch/x86/platform/efi/efi.c

371

-@@ -939,14 +939,36 @@ static void __init efi_map_regions_fixed(void)

372

-

373

- }

374

-

375

-+static void *realloc_pages(void *old_memmap, int old_shift)

376

-+{

377

-+	void *ret;

378

-+

379

-+	ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1);

380

-+	if (!ret)

381

-+		goto out;

382

-+

383

-+	/*

384

-+	 * A first-time allocation doesn't have anything to copy.

385

-+	 */

386

-+	if (!old_memmap)

387

-+		return ret;

388

-+

389

-+	memcpy(ret, old_memmap, PAGE_SIZE << old_shift);

390

-+

391

-+out:

392

-+	free_pages((unsigned long)old_memmap, old_shift);

393

-+	return ret;

394

-+}

395

-+

396

- /*

397

-- * Map efi memory ranges for runtime serivce and update new_memmap with virtual

398

-- * addresses.

399

-+ * Map the efi memory ranges of the runtime services and update new_mmap with

400

-+ * virtual addresses.

401

-  */

402

--static void * __init efi_map_regions(int *count)

403

-+static void * __init efi_map_regions(int *count, int *pg_shift)

404

- {

405

-+	void *p, *new_memmap = NULL;

406

-+	unsigned long left = 0;

407

- 	efi_memory_desc_t *md;

408

--	void *p, *tmp, *new_memmap = NULL;

409

-

410

- 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {

411

- 		md = p;

412

-@@ -961,20 +983,23 @@ static void * __init efi_map_regions(int *count)

413

- 		efi_map_region(md);

414

- 		get_systab_virt_addr(md);

415

-

416

--		tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size,

417

--			       GFP_KERNEL);

418

--		if (!tmp)

419

--			goto out;

420

--		new_memmap = tmp;

421

-+		if (left < memmap.desc_size) {

422

-+			new_memmap = realloc_pages(new_memmap, *pg_shift);

423

-+			if (!new_memmap)

424

-+				return NULL;

425

-+

426

-+			left += PAGE_SIZE << *pg_shift;

427

-+			(*pg_shift)++;

428

-+		}

429

-+

430

- 		memcpy(new_memmap + (*count * memmap.desc_size), md,

431

- 		       memmap.desc_size);

432

-+

433

-+		left -= memmap.desc_size;

434

- 		(*count)++;

435

- 	}

436

-

437

- 	return new_memmap;

438

--out:

439

--	kfree(new_memmap);

440

--	return NULL;

441

- }

442

-

443

- /*

444

-@@ -1000,9 +1025,9 @@ out:

445

-  */

446

- void __init efi_enter_virtual_mode(void)

447

- {

448

--	efi_status_t status;

449

-+	int err, count = 0, pg_shift = 0;

450

- 	void *new_memmap = NULL;

451

--	int err, count = 0;

452

-+	efi_status_t status;

453

-

454

- 	efi.systab = NULL;

455

-

456

-@@ -1019,20 +1044,24 @@ void __init efi_enter_virtual_mode(void)

457

- 		efi_map_regions_fixed();

458

- 	} else {

459

- 		efi_merge_regions();

460

--		new_memmap = efi_map_regions(&count);

461

-+		new_memmap = efi_map_regions(&count, &pg_shift);

462

- 		if (!new_memmap) {

463

- 			pr_err("Error reallocating memory, EFI runtime non-functional!\n");

464

- 			return;

465

- 		}

466

--	}

467

-

468

--	err = save_runtime_map();

469

--	if (err)

470

--		pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");

471

-+		err = save_runtime_map();

472

-+		if (err)

473

-+			pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");

474

-+	}

475

-

476

- 	BUG_ON(!efi.systab);

477

-

478

--	efi_setup_page_tables();

479

-+	if (!efi_setup) {

480

-+		if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift))

481

-+			return;

482

-+	}

483

-+

484

- 	efi_sync_low_kernel_mappings();

485

-

486

- 	if (!efi_setup) {

487

-@@ -1072,7 +1101,35 @@ void __init efi_enter_virtual_mode(void)

488

-

489

- 	efi_runtime_mkexec();

490

-

491

--	kfree(new_memmap);

492

-+

493

-+	/*

494

-+	 * We mapped the descriptor array into the EFI pagetable above but we're

495

-+	 * not unmapping it here. Here's why:

496

-+	 *

497

-+	 * We're copying select PGDs from the kernel page table to the EFI page

498

-+	 * table and when we do so and make changes to those PGDs like unmapping

499

-+	 * stuff from them, those changes appear in the kernel page table and we

500

-+	 * go boom.

501

-+	 *

502

-+	 * From setup_real_mode():

503

-+	 *

504

-+	 * ...

505

-+	 * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;

506

-+	 *

507

-+	 * In this particular case, our allocation is in PGD 0 of the EFI page

508

-+	 * table but we've copied that PGD from PGD[272] of the EFI page table:

509

-+	 *

510

-+	 *	pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272

511

-+	 *

512

-+	 * where the direct memory mapping in kernel space is.

513

-+	 *

514

-+	 * new_memmap's VA comes from that direct mapping and thus clearing it,

515

-+	 * it would get cleared in the kernel page table too.

516

-+	 *

517

-+	 * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);

518

-+	 */

519

-+	if (!efi_setup)

520

-+		free_pages((unsigned long)new_memmap, pg_shift);

521

-

522

- 	/* clean DUMMY object */

523

- 	efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,

524

-diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c

525

-index 0b74cdf7f816..9ee3491e31fb 100644

526

---- a/arch/x86/platform/efi/efi_32.c

527

-+++ b/arch/x86/platform/efi/efi_32.c

528

-@@ -40,7 +40,12 @@

529

- static unsigned long efi_rt_eflags;

530

-

531

- void efi_sync_low_kernel_mappings(void) {}

532

--void efi_setup_page_tables(void) {}

533

-+void __init efi_dump_pagetable(void) {}

534

-+int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)

535

-+{

536

-+	return 0;

537

-+}

538

-+void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {}

539

-

540

- void __init efi_map_region(efi_memory_desc_t *md)

541

- {

542

-diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c

543

-index 0c2a234fef1e..666b74a09092 100644

544

---- a/arch/x86/platform/efi/efi_64.c

545

-+++ b/arch/x86/platform/efi/efi_64.c

546

-@@ -137,12 +137,38 @@ void efi_sync_low_kernel_mappings(void)

547

- 		sizeof(pgd_t) * num_pgds);

548

- }

549

-

550

--void efi_setup_page_tables(void)

551

-+int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)

552

- {

553

-+	pgd_t *pgd;

554

-+

555

-+	if (efi_enabled(EFI_OLD_MEMMAP))

556

-+		return 0;

557

-+

558

- 	efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd;

559

-+	pgd = __va(efi_scratch.efi_pgt);

560

-

561

--	if (!efi_enabled(EFI_OLD_MEMMAP))

562

--		efi_scratch.use_pgd = true;

563

-+	/*

564

-+	 * It can happen that the physical address of new_memmap lands in memory

565

-+	 * which is not mapped in the EFI page table. Therefore we need to go

566

-+	 * and ident-map those pages containing the map before calling

567

-+	 * phys_efi_set_virtual_address_map().

568

-+	 */

569

-+	if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) {

570

-+		pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);

571

-+		return 1;

572

-+	}

573

-+

574

-+	efi_scratch.use_pgd = true;

575

-+

576

-+

577

-+	return 0;

578

-+}

579

-+

580

-+void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)

581

-+{

582

-+	pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);

583

-+

584

-+	kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages);

585

- }

586

-

587

- static void __init __map_region(efi_memory_desc_t *md, u64 va)

588

-diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c

589

-index 02125e6a9109..5a4da94aefb0 100644

590

---- a/drivers/isdn/isdnloop/isdnloop.c

591

-+++ b/drivers/isdn/isdnloop/isdnloop.c

592

-@@ -518,9 +518,9 @@ static isdnloop_stat isdnloop_cmd_table[] =

593

- static void

594

- isdnloop_fake_err(isdnloop_card *card)

595

- {

596

--	char buf[60];

597

-+	char buf[64];

598

-

599

--	sprintf(buf, "E%s", card->omsg);

600

-+	snprintf(buf, sizeof(buf), "E%s", card->omsg);

601

- 	isdnloop_fake(card, buf, -1);

602

- 	isdnloop_fake(card, "NAK", -1);

603

- }

604

-@@ -903,6 +903,8 @@ isdnloop_parse_cmd(isdnloop_card *card)

605

- 	case 7:

606

- 		/* 0x;EAZ */

607

- 		p += 3;

608

-+		if (strlen(p) >= sizeof(card->eazlist[0]))

609

-+			break;

610

- 		strcpy(card->eazlist[ch - 1], p);

611

- 		break;

612

- 	case 8:

613

-@@ -1070,6 +1072,12 @@ isdnloop_start(isdnloop_card *card, isdnloop_sdef *sdefp)

614

- 		return -EBUSY;

615

- 	if (copy_from_user((char *) &sdef, (char *) sdefp, sizeof(sdef)))

616

- 		return -EFAULT;

617

-+

618

-+	for (i = 0; i < 3; i++) {

619

-+		if (!memchr(sdef.num[i], 0, sizeof(sdef.num[i])))

620

-+			return -EINVAL;

621

-+	}

622

-+

623

- 	spin_lock_irqsave(&card->isdnloop_lock, flags);

624

- 	switch (sdef.ptype) {

625

- 	case ISDN_PTYPE_EURO:

626

-@@ -1127,7 +1135,7 @@ isdnloop_command(isdn_ctrl *c, isdnloop_card *card)

627

- {

628

- 	ulong a;

629

- 	int i;

630

--	char cbuf[60];

631

-+	char cbuf[80];

632

- 	isdn_ctrl cmd;

633

- 	isdnloop_cdef cdef;

634

-

635

-@@ -1192,7 +1200,6 @@ isdnloop_command(isdn_ctrl *c, isdnloop_card *card)

636

- 			break;

637

- 		if ((c->arg & 255) < ISDNLOOP_BCH) {

638

- 			char *p;

639

--			char dial[50];

640

- 			char dcode[4];

641

-

642

- 			a = c->arg;

643

-@@ -1204,10 +1211,10 @@ isdnloop_command(isdn_ctrl *c, isdnloop_card *card)

644

- 			} else

645

- 				/* Normal Dial */

646

- 				strcpy(dcode, "CAL");

647

--			strcpy(dial, p);

648

--			sprintf(cbuf, "%02d;D%s_R%s,%02d,%02d,%s\n", (int) (a + 1),

649

--				dcode, dial, c->parm.setup.si1,

650

--				c->parm.setup.si2, c->parm.setup.eazmsn);

651

-+			snprintf(cbuf, sizeof(cbuf),

652

-+				 "%02d;D%s_R%s,%02d,%02d,%s\n", (int) (a + 1),

653

-+				 dcode, p, c->parm.setup.si1,

654

-+				 c->parm.setup.si2, c->parm.setup.eazmsn);

655

- 			i = isdnloop_writecmd(cbuf, strlen(cbuf), 0, card);

656

- 		}

657

- 		break;

658

-diff --git a/drivers/net/ethernet/cadence/at91_ether.c b/drivers/net/ethernet/cadence/at91_ether.c

659

-index ce75de9bae9e..4a79edaf3885 100644

660

---- a/drivers/net/ethernet/cadence/at91_ether.c

661

-+++ b/drivers/net/ethernet/cadence/at91_ether.c

662

-@@ -342,6 +342,9 @@ static int __init at91ether_probe(struct platform_device *pdev)

663

- 	}

664

- 	clk_enable(lp->pclk);

665

-

666

-+	lp->hclk = ERR_PTR(-ENOENT);

667

-+	lp->tx_clk = ERR_PTR(-ENOENT);

668

-+

669

- 	/* Install the interrupt handler */

670

- 	dev->irq = platform_get_irq(pdev, 0);

671

- 	res = devm_request_irq(&pdev->dev, dev->irq, at91ether_interrupt, 0, dev->name, dev);

672

-diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c

673

-index 174a92f5fe51..7645a3ce3854 100644

674

---- a/drivers/net/ethernet/sfc/ef10.c

675

-+++ b/drivers/net/ethernet/sfc/ef10.c

676

-@@ -565,10 +565,17 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)

677

- 	 * several of each (in fact that's the only option if host

678

- 	 * page size is >4K).  So we may allocate some extra VIs just

679

- 	 * for writing PIO buffers through.

680

-+	 *

681

-+	 * The UC mapping contains (min_vis - 1) complete VIs and the

682

-+	 * first half of the next VI.  Then the WC mapping begins with

683

-+	 * the second half of this last VI.

684

- 	 */

685

- 	uc_mem_map_size = PAGE_ALIGN((min_vis - 1) * EFX_VI_PAGE_SIZE +

686

- 				     ER_DZ_TX_PIOBUF);

687

- 	if (nic_data->n_piobufs) {

688

-+		/* pio_write_vi_base rounds down to give the number of complete

689

-+		 * VIs inside the UC mapping.

690

-+		 */

691

- 		pio_write_vi_base = uc_mem_map_size / EFX_VI_PAGE_SIZE;

692

- 		wc_mem_map_size = (PAGE_ALIGN((pio_write_vi_base +

693

- 					       nic_data->n_piobufs) *

694

-diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c

695

-index 83d464347021..f06c790fba5a 100644

696

---- a/drivers/net/ethernet/sfc/efx.c

697

-+++ b/drivers/net/ethernet/sfc/efx.c

698

-@@ -1603,6 +1603,8 @@ static int efx_probe_nic(struct efx_nic *efx)

699

- 	if (rc)

700

- 		goto fail1;

701

-

702

-+	efx_set_channels(efx);

703

-+

704

- 	rc = efx->type->dimension_resources(efx);

705

- 	if (rc)

706

- 		goto fail2;

707

-@@ -1613,7 +1615,6 @@ static int efx_probe_nic(struct efx_nic *efx)

708

- 		efx->rx_indir_table[i] =

709

- 			ethtool_rxfh_indir_default(i, efx->rss_spread);

710

-

711

--	efx_set_channels(efx);

712

- 	netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);

713

- 	netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);

714

-

715

-diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c

716

-index 1236812c7be6..d091e52b00e1 100644

717

---- a/drivers/net/vxlan.c

718

-+++ b/drivers/net/vxlan.c

719

-@@ -871,6 +871,9 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],

720

- 	if (err)

721

- 		return err;

722

-

723

-+	if (vxlan->default_dst.remote_ip.sa.sa_family != ip.sa.sa_family)

724

-+		return -EAFNOSUPPORT;

725

-+

726

- 	spin_lock_bh(&vxlan->hash_lock);

727

- 	err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags,

728

- 			       port, vni, ifindex, ndm->ndm_flags);

729

-@@ -2612,9 +2615,10 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,

730

- 	vni = nla_get_u32(data[IFLA_VXLAN_ID]);

731

- 	dst->remote_vni = vni;

732

-

733

-+	/* Unless IPv6 is explicitly requested, assume IPv4 */

734

-+	dst->remote_ip.sa.sa_family = AF_INET;

735

- 	if (data[IFLA_VXLAN_GROUP]) {

736

- 		dst->remote_ip.sin.sin_addr.s_addr = nla_get_be32(data[IFLA_VXLAN_GROUP]);

737

--		dst->remote_ip.sa.sa_family = AF_INET;

738

- 	} else if (data[IFLA_VXLAN_GROUP6]) {

739

- 		if (!IS_ENABLED(CONFIG_IPV6))

740

- 			return -EPFNOSUPPORT;

741

-diff --git a/drivers/net/wireless/iwlwifi/mvm/rs.c b/drivers/net/wireless/iwlwifi/mvm/rs.c

742

-index 6abf74e1351f..5bc871513505 100644

743

---- a/drivers/net/wireless/iwlwifi/mvm/rs.c

744

-+++ b/drivers/net/wireless/iwlwifi/mvm/rs.c

745

-@@ -211,9 +211,9 @@ static const struct rs_tx_column rs_tx_columns[] = {

746

- 		.next_columns = {

747

- 			RS_COLUMN_LEGACY_ANT_B,

748

- 			RS_COLUMN_SISO_ANT_A,

749

-+			RS_COLUMN_SISO_ANT_B,

750

- 			RS_COLUMN_MIMO2,

751

--			RS_COLUMN_INVALID,

752

--			RS_COLUMN_INVALID,

753

-+			RS_COLUMN_MIMO2_SGI,

754

- 		},

755

- 	},

756

- 	[RS_COLUMN_LEGACY_ANT_B] = {

757

-@@ -221,10 +221,10 @@ static const struct rs_tx_column rs_tx_columns[] = {

758

- 		.ant = ANT_B,

759

- 		.next_columns = {

760

- 			RS_COLUMN_LEGACY_ANT_A,

761

-+			RS_COLUMN_SISO_ANT_A,

762

- 			RS_COLUMN_SISO_ANT_B,

763

- 			RS_COLUMN_MIMO2,

764

--			RS_COLUMN_INVALID,

765

--			RS_COLUMN_INVALID,

766

-+			RS_COLUMN_MIMO2_SGI,

767

- 		},

768

- 	},

769

- 	[RS_COLUMN_SISO_ANT_A] = {

770

-@@ -234,8 +234,8 @@ static const struct rs_tx_column rs_tx_columns[] = {

771

- 			RS_COLUMN_SISO_ANT_B,

772

- 			RS_COLUMN_MIMO2,

773

- 			RS_COLUMN_SISO_ANT_A_SGI,

774

--			RS_COLUMN_INVALID,

775

--			RS_COLUMN_INVALID,

776

-+			RS_COLUMN_SISO_ANT_B_SGI,

777

-+			RS_COLUMN_MIMO2_SGI,

778

- 		},

779

- 		.checks = {

780

- 			rs_siso_allow,

781

-@@ -248,8 +248,8 @@ static const struct rs_tx_column rs_tx_columns[] = {

782

- 			RS_COLUMN_SISO_ANT_A,

783

- 			RS_COLUMN_MIMO2,

784

- 			RS_COLUMN_SISO_ANT_B_SGI,

785

--			RS_COLUMN_INVALID,

786

--			RS_COLUMN_INVALID,

787

-+			RS_COLUMN_SISO_ANT_A_SGI,

788

-+			RS_COLUMN_MIMO2_SGI,

789

- 		},

790

- 		.checks = {

791

- 			rs_siso_allow,

792

-@@ -263,8 +263,8 @@ static const struct rs_tx_column rs_tx_columns[] = {

793

- 			RS_COLUMN_SISO_ANT_B_SGI,

794

- 			RS_COLUMN_MIMO2_SGI,

795

- 			RS_COLUMN_SISO_ANT_A,

796

--			RS_COLUMN_INVALID,

797

--			RS_COLUMN_INVALID,

798

-+			RS_COLUMN_SISO_ANT_B,

799

-+			RS_COLUMN_MIMO2,

800

- 		},

801

- 		.checks = {

802

- 			rs_siso_allow,

803

-@@ -279,8 +279,8 @@ static const struct rs_tx_column rs_tx_columns[] = {

804

- 			RS_COLUMN_SISO_ANT_A_SGI,

805

- 			RS_COLUMN_MIMO2_SGI,

806

- 			RS_COLUMN_SISO_ANT_B,

807

--			RS_COLUMN_INVALID,

808

--			RS_COLUMN_INVALID,

809

-+			RS_COLUMN_SISO_ANT_A,

810

-+			RS_COLUMN_MIMO2,

811

- 		},

812

- 		.checks = {

813

- 			rs_siso_allow,

814

-@@ -292,10 +292,10 @@ static const struct rs_tx_column rs_tx_columns[] = {

815

- 		.ant = ANT_AB,

816

- 		.next_columns = {

817

- 			RS_COLUMN_SISO_ANT_A,

818

-+			RS_COLUMN_SISO_ANT_B,

819

-+			RS_COLUMN_SISO_ANT_A_SGI,

820

-+			RS_COLUMN_SISO_ANT_B_SGI,

821

- 			RS_COLUMN_MIMO2_SGI,

822

--			RS_COLUMN_INVALID,

823

--			RS_COLUMN_INVALID,

824

--			RS_COLUMN_INVALID,

825

- 		},

826

- 		.checks = {

827

- 			rs_mimo_allow,

828

-@@ -307,10 +307,10 @@ static const struct rs_tx_column rs_tx_columns[] = {

829

- 		.sgi = true,

830

- 		.next_columns = {

831

- 			RS_COLUMN_SISO_ANT_A_SGI,

832

-+			RS_COLUMN_SISO_ANT_B_SGI,

833

-+			RS_COLUMN_SISO_ANT_A,

834

-+			RS_COLUMN_SISO_ANT_B,

835

- 			RS_COLUMN_MIMO2,

836

--			RS_COLUMN_INVALID,

837

--			RS_COLUMN_INVALID,

838

--			RS_COLUMN_INVALID,

839

- 		},

840

- 		.checks = {

841

- 			rs_mimo_allow,

842

-diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h

843

-index ae413a2cbee7..4bf5b334664e 100644

844

---- a/drivers/net/xen-netback/common.h

845

-+++ b/drivers/net/xen-netback/common.h

846

-@@ -113,6 +113,11 @@ struct xenvif {

847

- 	domid_t          domid;

848

- 	unsigned int     handle;

849

-

850

-+	/* Is this interface disabled? True when backend discovers

851

-+	 * frontend is rogue.

852

-+	 */

853

-+	bool disabled;

854

-+

855

- 	/* Use NAPI for guest TX */

856

- 	struct napi_struct napi;

857

- 	/* When feature-split-event-channels = 0, tx_irq = rx_irq. */

858

-diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c

859

-index 301cc037fda8..2e92d52c0a6d 100644

860

---- a/drivers/net/xen-netback/interface.c

861

-+++ b/drivers/net/xen-netback/interface.c

862

-@@ -62,6 +62,15 @@ static int xenvif_poll(struct napi_struct *napi, int budget)

863

- 	struct xenvif *vif = container_of(napi, struct xenvif, napi);

864

- 	int work_done;

865

-

866

-+	/* This vif is rogue, we pretend we've there is nothing to do

867

-+	 * for this vif to deschedule it from NAPI. But this interface

868

-+	 * will be turned off in thread context later.

869

-+	 */

870

-+	if (unlikely(vif->disabled)) {

871

-+		napi_complete(napi);

872

-+		return 0;

873

-+	}

874

-+

875

- 	work_done = xenvif_tx_action(vif, budget);

876

-

877

- 	if (work_done < budget) {

878

-@@ -321,6 +330,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,

879

- 	vif->ip_csum = 1;

880

- 	vif->dev = dev;

881

-

882

-+	vif->disabled = false;

883

-+

884

- 	vif->credit_bytes = vif->remaining_credit = ~0UL;

885

- 	vif->credit_usec  = 0UL;

886

- 	init_timer(&vif->credit_timeout);

887

-diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c

888

-index 438d0c09b7e6..97030c193afd 100644

889

---- a/drivers/net/xen-netback/netback.c

890

-+++ b/drivers/net/xen-netback/netback.c

891

-@@ -192,8 +192,8 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)

892

- 	 * into multiple copies tend to give large frags their

893

- 	 * own buffers as before.

894

- 	 */

895

--	if ((offset + size > MAX_BUFFER_OFFSET) &&

896

--	    (size <= MAX_BUFFER_OFFSET) && offset && !head)

897

-+	BUG_ON(size > MAX_BUFFER_OFFSET);

898

-+	if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head)

899

- 		return true;

900

-

901

- 	return false;

902

-@@ -482,6 +482,8 @@ static void xenvif_rx_action(struct xenvif *vif)

903

-

904

- 	while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {

905

- 		RING_IDX max_slots_needed;

906

-+		RING_IDX old_req_cons;

907

-+		RING_IDX ring_slots_used;

908

- 		int i;

909

-

910

- 		/* We need a cheap worse case estimate for the number of

911

-@@ -493,9 +495,28 @@ static void xenvif_rx_action(struct xenvif *vif)

912

- 						PAGE_SIZE);

913

- 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {

914

- 			unsigned int size;

915

-+			unsigned int offset;

916

-+

917

- 			size = skb_frag_size(&skb_shinfo(skb)->frags[i]);

918

--			max_slots_needed += DIV_ROUND_UP(size, PAGE_SIZE);

919

-+			offset = skb_shinfo(skb)->frags[i].page_offset;

920

-+

921

-+			/* For a worse-case estimate we need to factor in

922

-+			 * the fragment page offset as this will affect the

923

-+			 * number of times xenvif_gop_frag_copy() will

924

-+			 * call start_new_rx_buffer().

925

-+			 */

926

-+			max_slots_needed += DIV_ROUND_UP(offset + size,

927

-+							 PAGE_SIZE);

928

- 		}

929

-+

930

-+		/* To avoid the estimate becoming too pessimal for some

931

-+		 * frontends that limit posted rx requests, cap the estimate

932

-+		 * at MAX_SKB_FRAGS.

933

-+		 */

934

-+		if (max_slots_needed > MAX_SKB_FRAGS)

935

-+			max_slots_needed = MAX_SKB_FRAGS;

936

-+

937

-+		/* We may need one more slot for GSO metadata */

938

- 		if (skb_is_gso(skb) &&

939

- 		   (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 ||

940

- 		    skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))

941

-@@ -511,8 +532,12 @@ static void xenvif_rx_action(struct xenvif *vif)

942

- 			vif->rx_last_skb_slots = 0;

943

-

944

- 		sco = (struct skb_cb_overlay *)skb->cb;

945

-+

946

-+		old_req_cons = vif->rx.req_cons;

947

- 		sco->meta_slots_used = xenvif_gop_skb(skb, &npo);

948

--		BUG_ON(sco->meta_slots_used > max_slots_needed);

949

-+		ring_slots_used = vif->rx.req_cons - old_req_cons;

950

-+

951

-+		BUG_ON(ring_slots_used > max_slots_needed);

952

-

953

- 		__skb_queue_tail(&rxq, skb);

954

- 	}

955

-@@ -655,7 +680,8 @@ static void xenvif_tx_err(struct xenvif *vif,

956

- static void xenvif_fatal_tx_err(struct xenvif *vif)

957

- {

958

- 	netdev_err(vif->dev, "fatal error; disabling device\n");

959

--	xenvif_carrier_off(vif);

960

-+	vif->disabled = true;

961

-+	xenvif_kick_thread(vif);

962

- }

963

-

964

- static int xenvif_count_requests(struct xenvif *vif,

965

-@@ -1126,7 +1152,7 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)

966

- 				   vif->tx.sring->req_prod, vif->tx.req_cons,

967

- 				   XEN_NETIF_TX_RING_SIZE);

968

- 			xenvif_fatal_tx_err(vif);

969

--			continue;

970

-+			break;

971

- 		}

972

-

973

- 		work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx);

974

-@@ -1548,7 +1574,18 @@ int xenvif_kthread(void *data)

975

- 	while (!kthread_should_stop()) {

976

- 		wait_event_interruptible(vif->wq,

977

- 					 rx_work_todo(vif) ||

978

-+					 vif->disabled ||

979

- 					 kthread_should_stop());

980

-+

981

-+		/* This frontend is found to be rogue, disable it in

982

-+		 * kthread context. Currently this is only set when

983

-+		 * netback finds out frontend sends malformed packet,

984

-+		 * but we cannot disable the interface in softirq

985

-+		 * context so we defer it here.

986

-+		 */

987

-+		if (unlikely(vif->disabled && netif_carrier_ok(vif->dev)))

988

-+			xenvif_carrier_off(vif);

989

-+

990

- 		if (kthread_should_stop())

991

- 			break;

992

-

993

-diff --git a/include/linux/futex.h b/include/linux/futex.h

994

-index b0d95cac826e..6435f46d6e13 100644

995

---- a/include/linux/futex.h

996

-+++ b/include/linux/futex.h

997

-@@ -55,7 +55,11 @@ union futex_key {

998

- #ifdef CONFIG_FUTEX

999

- extern void exit_robust_list(struct task_struct *curr);

1000

- extern void exit_pi_state_list(struct task_struct *curr);

1001

-+#ifdef CONFIG_HAVE_FUTEX_CMPXCHG

1002

-+#define futex_cmpxchg_enabled 1

1003

-+#else

1004

- extern int futex_cmpxchg_enabled;

1005

-+#endif

1006

- #else

1007

- static inline void exit_robust_list(struct task_struct *curr)

1008

- {

1009

-diff --git a/init/Kconfig b/init/Kconfig

1010

-index 009a797dd242..d56cb03c1b49 100644

1011

---- a/init/Kconfig

1012

-+++ b/init/Kconfig

1013

-@@ -1387,6 +1387,13 @@ config FUTEX

1014

- 	  support for "fast userspace mutexes".  The resulting kernel may not

1015

- 	  run glibc-based applications correctly.

1016

-

1017

-+config HAVE_FUTEX_CMPXCHG

1018

-+	bool

1019

-+	help

1020

-+	  Architectures should select this if futex_atomic_cmpxchg_inatomic()

1021

-+	  is implemented and always working. This removes a couple of runtime

1022

-+	  checks.

1023

-+

1024

- config EPOLL

1025

- 	bool "Enable eventpoll support" if EXPERT

1026

- 	default y

1027

-diff --git a/kernel/futex.c b/kernel/futex.c

1028

-index 08ec814ad9d2..6801b3751a95 100644

1029

---- a/kernel/futex.c

1030

-+++ b/kernel/futex.c

1031

-@@ -157,7 +157,9 @@

1032

-  * enqueue.

1033

-  */

1034

-

1035

-+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG

1036

- int __read_mostly futex_cmpxchg_enabled;

1037

-+#endif

1038

-

1039

- /*

1040

-  * Futex flags used to encode options to functions and preserve them across

1041

-@@ -1450,6 +1452,7 @@ retry:

1042

- 	hb2 = hash_futex(&key2);

1043

-

1044

- retry_private:

1045

-+	hb_waiters_inc(hb2);

1046

- 	double_lock_hb(hb1, hb2);

1047

-

1048

- 	if (likely(cmpval != NULL)) {

1049

-@@ -1459,6 +1462,7 @@ retry_private:

1050

-

1051

- 		if (unlikely(ret)) {

1052

- 			double_unlock_hb(hb1, hb2);

1053

-+			hb_waiters_dec(hb2);

1054

-

1055

- 			ret = get_user(curval, uaddr1);

1056

- 			if (ret)

1057

-@@ -1508,6 +1512,7 @@ retry_private:

1058

- 			break;

1059

- 		case -EFAULT:

1060

- 			double_unlock_hb(hb1, hb2);

1061

-+			hb_waiters_dec(hb2);

1062

- 			put_futex_key(&key2);

1063

- 			put_futex_key(&key1);

1064

- 			ret = fault_in_user_writeable(uaddr2);

1065

-@@ -1517,6 +1522,7 @@ retry_private:

1066

- 		case -EAGAIN:

1067

- 			/* The owner was exiting, try again. */

1068

- 			double_unlock_hb(hb1, hb2);

1069

-+			hb_waiters_dec(hb2);

1070

- 			put_futex_key(&key2);

1071

- 			put_futex_key(&key1);

1072

- 			cond_resched();

1073

-@@ -1592,6 +1598,7 @@ retry_private:

1074

-

1075

- out_unlock:

1076

- 	double_unlock_hb(hb1, hb2);

1077

-+	hb_waiters_dec(hb2);

1078

-

1079

- 	/*

1080

- 	 * drop_futex_key_refs() must be called outside the spinlocks. During

1081

-@@ -2875,9 +2882,28 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,

1082

- 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);

1083

- }

1084

-

1085

--static int __init futex_init(void)

1086

-+static void __init futex_detect_cmpxchg(void)

1087

- {

1088

-+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG

1089

- 	u32 curval;

1090

-+

1091

-+	/*

1092

-+	 * This will fail and we want it. Some arch implementations do

1093

-+	 * runtime detection of the futex_atomic_cmpxchg_inatomic()

1094

-+	 * functionality. We want to know that before we call in any

1095

-+	 * of the complex code paths. Also we want to prevent

1096

-+	 * registration of robust lists in that case. NULL is

1097

-+	 * guaranteed to fault and we get -EFAULT on functional

1098

-+	 * implementation, the non-functional ones will return

1099

-+	 * -ENOSYS.

1100

-+	 */

1101

-+	if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)

1102

-+		futex_cmpxchg_enabled = 1;

1103

-+#endif

1104

-+}

1105

-+

1106

-+static int __init futex_init(void)

1107

-+{

1108

- 	unsigned int futex_shift;

1109

- 	unsigned long i;

1110

-

1111

-@@ -2893,18 +2919,8 @@ static int __init futex_init(void)

1112

- 					       &futex_shift, NULL,

1113

- 					       futex_hashsize, futex_hashsize);

1114

- 	futex_hashsize = 1UL << futex_shift;

1115

--	/*

1116

--	 * This will fail and we want it. Some arch implementations do

1117

--	 * runtime detection of the futex_atomic_cmpxchg_inatomic()

1118

--	 * functionality. We want to know that before we call in any

1119

--	 * of the complex code paths. Also we want to prevent

1120

--	 * registration of robust lists in that case. NULL is

1121

--	 * guaranteed to fault and we get -EFAULT on functional

1122

--	 * implementation, the non-functional ones will return

1123

--	 * -ENOSYS.

1124

--	 */

1125

--	if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)

1126

--		futex_cmpxchg_enabled = 1;

1127

-+

1128

-+	futex_detect_cmpxchg();

1129

-

1130

- 	for (i = 0; i < futex_hashsize; i++) {

1131

- 		atomic_set(&futex_queues[i].waiters, 0);

1132

-diff --git a/lib/nlattr.c b/lib/nlattr.c

1133

-index 18eca7809b08..fc6754720ced 100644

1134

---- a/lib/nlattr.c

1135

-+++ b/lib/nlattr.c

1136

-@@ -303,9 +303,15 @@ int nla_memcmp(const struct nlattr *nla, const void *data,

1137

-  */

1138

- int nla_strcmp(const struct nlattr *nla, const char *str)

1139

- {

1140

--	int len = strlen(str) + 1;

1141

--	int d = nla_len(nla) - len;

1142

-+	int len = strlen(str);

1143

-+	char *buf = nla_data(nla);

1144

-+	int attrlen = nla_len(nla);

1145

-+	int d;

1146

-

1147

-+	if (attrlen > 0 && buf[attrlen - 1] == '\0')

1148

-+		attrlen--;

1149

-+

1150

-+	d = attrlen - len;

1151

- 	if (d == 0)

1152

- 		d = memcmp(nla_data(nla), str, len);

1153

-

1154

-diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c

1155

-index f2610e157660..7b326529e6a2 100644

1156

---- a/net/ipv6/icmp.c

1157

-+++ b/net/ipv6/icmp.c

1158

-@@ -520,7 +520,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)

1159

- 			      np->tclass, NULL, &fl6, (struct rt6_info *)dst,

1160

- 			      MSG_DONTWAIT, np->dontfrag);

1161

- 	if (err) {

1162

--		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);

1163

-+		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);

1164

- 		ip6_flush_pending_frames(sk);

1165

- 	} else {

1166

- 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,

1167

-diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c

1168

-index 64d6073731d3..3702d179506d 100644

1169

---- a/net/ipv6/ip6_output.c

1170

-+++ b/net/ipv6/ip6_output.c

1171

-@@ -1566,8 +1566,8 @@ int ip6_push_pending_frames(struct sock *sk)

1172

- 	if (proto == IPPROTO_ICMPV6) {

1173

- 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));

1174

-

1175

--		ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);

1176

--		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);

1177

-+		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);

1178

-+		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);

1179

- 	}

1180

-

1181

- 	err = ip6_local_out(skb);

1182

-diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c

1183

-index e1e47350784b..08b367c6b9cf 100644

1184

---- a/net/ipv6/mcast.c

1185

-+++ b/net/ipv6/mcast.c

1186

-@@ -1620,11 +1620,12 @@ static void mld_sendpack(struct sk_buff *skb)

1187

- 		      dst_output);

1188

- out:

1189

- 	if (!err) {

1190

--		ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT);

1191

--		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);

1192

--		IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);

1193

--	} else

1194

--		IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);

1195

-+		ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);

1196

-+		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);

1197

-+		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);

1198

-+	} else {

1199

-+		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);

1200

-+	}

1201

-

1202

- 	rcu_read_unlock();

1203

- 	return;

1204

-diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c

1205

-index 587bbdcb22b4..bda74291c3e0 100644

1206

---- a/net/ipv6/ping.c

1207

-+++ b/net/ipv6/ping.c

1208

-@@ -182,8 +182,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,

1209

- 			      MSG_DONTWAIT, np->dontfrag);

1210

-

1211

- 	if (err) {

1212

--		ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev,

1213

--				   ICMP6_MIB_OUTERRORS);

1214

-+		ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,

1215

-+				ICMP6_MIB_OUTERRORS);

1216

- 		ip6_flush_pending_frames(sk);

1217

- 	} else {

1218

- 		err = icmpv6_push_pending_frames(sk, &fl6,

1219

-diff --git a/net/rds/iw.c b/net/rds/iw.c

1220

-index 7826d46baa70..589935661d66 100644

1221

---- a/net/rds/iw.c

1222

-+++ b/net/rds/iw.c

1223

-@@ -239,7 +239,8 @@ static int rds_iw_laddr_check(__be32 addr)

1224

- 	ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);

1225

- 	/* due to this, we will claim to support IB devices unless we

1226

- 	   check node_type. */

1227

--	if (ret || cm_id->device->node_type != RDMA_NODE_RNIC)

1228

-+	if (ret || !cm_id->device ||

1229

-+	    cm_id->device->node_type != RDMA_NODE_RNIC)

1230

- 		ret = -EADDRNOTAVAIL;

1231

-

1232

- 	rdsdebug("addr %pI4 ret %d node type %d\n",

1233

-diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c

1234

-index b332e2cc0954..e294b86c8d88 100644

1235

---- a/security/selinux/hooks.c

1236

-+++ b/security/selinux/hooks.c

1237

-@@ -1418,15 +1418,33 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent

1238

- 		isec->sid = sbsec->sid;

1239

-

1240

- 		if ((sbsec->flags & SE_SBPROC) && !S_ISLNK(inode->i_mode)) {

1241

--			if (opt_dentry) {

1242

--				isec->sclass = inode_mode_to_security_class(inode->i_mode);

1243

--				rc = selinux_proc_get_sid(opt_dentry,

1244

--							  isec->sclass,

1245

--							  &sid);

1246

--				if (rc)

1247

--					goto out_unlock;

1248

--				isec->sid = sid;

1249

--			}

1250

-+			/* We must have a dentry to determine the label on

1251

-+			 * procfs inodes */

1252

-+			if (opt_dentry)

1253

-+				/* Called from d_instantiate or

1254

-+				 * d_splice_alias. */

1255

-+				dentry = dget(opt_dentry);

1256

-+			else

1257

-+				/* Called from selinux_complete_init, try to

1258

-+				 * find a dentry. */

1259

-+				dentry = d_find_alias(inode);

1260

-+			/*

1261

-+			 * This can be hit on boot when a file is accessed

1262

-+			 * before the policy is loaded.  When we load policy we

1263

-+			 * may find inodes that have no dentry on the

1264

-+			 * sbsec->isec_head list.  No reason to complain as

1265

-+			 * these will get fixed up the next time we go through

1266

-+			 * inode_doinit() with a dentry, before these inodes

1267

-+			 * could be used again by userspace.

1268

-+			 */

1269

-+			if (!dentry)

1270

-+				goto out_unlock;

1271

-+			isec->sclass = inode_mode_to_security_class(inode->i_mode);

1272

-+			rc = selinux_proc_get_sid(dentry, isec->sclass, &sid);

1273

-+			dput(dentry);

1274

-+			if (rc)

1275

-+				goto out_unlock;

1276

-+			isec->sid = sid;

1277

- 		}

1278

- 		break;

1279

- 	}

1280

-diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c

1281

-index e354ab1ec20f..a8dec9e9e876 100644

1282

---- a/sound/pci/hda/hda_intel.c

1283

-+++ b/sound/pci/hda/hda_intel.c

1284

-@@ -297,9 +297,9 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 };

1285

- #define ULI_NUM_CAPTURE		5

1286

- #define ULI_NUM_PLAYBACK	6

1287

-

1288

--/* ATI HDMI may have up to 8 playbacks and 0 capture */

1289

-+/* ATI HDMI has 1 playback and 0 capture */

1290

- #define ATIHDMI_NUM_CAPTURE	0

1291

--#define ATIHDMI_NUM_PLAYBACK	8

1292

-+#define ATIHDMI_NUM_PLAYBACK	1

1293

-

1294

- /* TERA has 4 playback and 3 capture */

1295

- #define TERA_NUM_CAPTURE	3

1296

1297

Deleted: genpatches-2.6/trunk/3.15/1001_linux-3.14.2.patch

1298

===================================================================

1299

--- genpatches-2.6/trunk/3.15/1001_linux-3.14.2.patch	2014-05-05 14:22:27 UTC (rev 2771)

1300

+++ genpatches-2.6/trunk/3.15/1001_linux-3.14.2.patch	2014-05-05 14:28:48 UTC (rev 2772)

1301

@@ -1,1201 +0,0 @@

1302

-diff --git a/Makefile b/Makefile

1303

-index 7d0b6992d9ed..b2f7de81e9a2 100644

1304

---- a/Makefile

1305

-+++ b/Makefile

1306

-@@ -1,6 +1,6 @@

1307

- VERSION = 3

1308

- PATCHLEVEL = 14

1309

--SUBLEVEL = 1

1310

-+SUBLEVEL = 2

1311

- EXTRAVERSION =

1312

- NAME = Shuffling Zombie Juror

1313

-

1314

-diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c

1315

-index 9f7ca266864a..832d05a914ba 100644

1316

---- a/arch/x86/kernel/cpu/mshyperv.c

1317

-+++ b/arch/x86/kernel/cpu/mshyperv.c

1318

-@@ -26,6 +26,7 @@

1319

- #include <asm/irq_regs.h>

1320

- #include <asm/i8259.h>

1321

- #include <asm/apic.h>

1322

-+#include <asm/timer.h>

1323

-

1324

- struct ms_hyperv_info ms_hyperv;

1325

- EXPORT_SYMBOL_GPL(ms_hyperv);

1326

-@@ -105,6 +106,11 @@ static void __init ms_hyperv_init_platform(void)

1327

-

1328

- 	if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)

1329

- 		clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);

1330

-+

1331

-+#ifdef CONFIG_X86_IO_APIC

1332

-+	no_timer_check = 1;

1333

-+#endif

1334

-+

1335

- }

1336

-

1337

- const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {

1338

-diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c

1339

-index bc4a088f9023..6d7d5a1260a6 100644

1340

---- a/arch/x86/kernel/early-quirks.c

1341

-+++ b/arch/x86/kernel/early-quirks.c

1342

-@@ -203,18 +203,15 @@ static void __init intel_remapping_check(int num, int slot, int func)

1343

- 	revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID);

1344

-

1345

- 	/*

1346

-- 	 * Revision 13 of all triggering devices id in this quirk have

1347

--	 * a problem draining interrupts when irq remapping is enabled,

1348

--	 * and should be flagged as broken.  Additionally revisions 0x12

1349

--	 * and 0x22 of device id 0x3405 has this problem.

1350

-+	 * Revision <= 13 of all triggering devices id in this quirk

1351

-+	 * have a problem draining interrupts when irq remapping is

1352

-+	 * enabled, and should be flagged as broken. Additionally

1353

-+	 * revision 0x22 of device id 0x3405 has this problem.

1354

- 	 */

1355

--	if (revision == 0x13)

1356

-+	if (revision <= 0x13)

1357

- 		set_irq_remapping_broken();

1358

--	else if ((device == 0x3405) &&

1359

--	    ((revision == 0x12) ||

1360

--	     (revision == 0x22)))

1361

-+	else if (device == 0x3405 && revision == 0x22)

1362

- 		set_irq_remapping_broken();

1363

--

1364

- }

1365

-

1366

- /*

1367

-diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c

1368

-index 714e957a871a..db35594d4df7 100644

1369

---- a/drivers/acpi/button.c

1370

-+++ b/drivers/acpi/button.c

1371

-@@ -302,6 +302,10 @@ static void acpi_button_notify(struct acpi_device *device, u32 event)

1372

- 			input_sync(input);

1373

-

1374

- 			pm_wakeup_event(&device->dev, 0);

1375

-+			acpi_bus_generate_netlink_event(

1376

-+					device->pnp.device_class,

1377

-+					dev_name(&device->dev),

1378

-+					event, ++button->pushed);

1379

- 		}

1380

- 		break;

1381

- 	default:

1382

-diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c

1383

-index f5e4cd7617f6..61e71616689b 100644

1384

---- a/drivers/char/ipmi/ipmi_bt_sm.c

1385

-+++ b/drivers/char/ipmi/ipmi_bt_sm.c

1386

-@@ -352,7 +352,7 @@ static inline void write_all_bytes(struct si_sm_data *bt)

1387

-

1388

- static inline int read_all_bytes(struct si_sm_data *bt)

1389

- {

1390

--	unsigned char i;

1391

-+	unsigned int i;

1392

-

1393

- 	/*

1394

- 	 * length is "framing info", minimum = 4: NetFn, Seq, Cmd, cCode.

1395

-diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c

1396

-index 17ce88f79d2b..f173dd09fce4 100644

1397

---- a/drivers/pci/host/pcie-designware.c

1398

-+++ b/drivers/pci/host/pcie-designware.c

1399

-@@ -522,13 +522,13 @@ static void dw_pcie_prog_viewport_cfg1(struct pcie_port *pp, u32 busdev)

1400

- 	dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX1,

1401

- 			  PCIE_ATU_VIEWPORT);

1402

- 	dw_pcie_writel_rc(pp, PCIE_ATU_TYPE_CFG1, PCIE_ATU_CR1);

1403

--	dw_pcie_writel_rc(pp, PCIE_ATU_ENABLE, PCIE_ATU_CR2);

1404

- 	dw_pcie_writel_rc(pp, pp->cfg1_base, PCIE_ATU_LOWER_BASE);

1405

- 	dw_pcie_writel_rc(pp, (pp->cfg1_base >> 32), PCIE_ATU_UPPER_BASE);

1406

- 	dw_pcie_writel_rc(pp, pp->cfg1_base + pp->config.cfg1_size - 1,

1407

- 			  PCIE_ATU_LIMIT);

1408

- 	dw_pcie_writel_rc(pp, busdev, PCIE_ATU_LOWER_TARGET);

1409

- 	dw_pcie_writel_rc(pp, 0, PCIE_ATU_UPPER_TARGET);

1410

-+	dw_pcie_writel_rc(pp, PCIE_ATU_ENABLE, PCIE_ATU_CR2);

1411

- }

1412

-

1413

- static void dw_pcie_prog_viewport_mem_outbound(struct pcie_port *pp)

1414

-@@ -537,7 +537,6 @@ static void dw_pcie_prog_viewport_mem_outbound(struct pcie_port *pp)

1415

- 	dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX0,

1416

- 			  PCIE_ATU_VIEWPORT);

1417

- 	dw_pcie_writel_rc(pp, PCIE_ATU_TYPE_MEM, PCIE_ATU_CR1);

1418

--	dw_pcie_writel_rc(pp, PCIE_ATU_ENABLE, PCIE_ATU_CR2);

1419

- 	dw_pcie_writel_rc(pp, pp->mem_base, PCIE_ATU_LOWER_BASE);

1420

- 	dw_pcie_writel_rc(pp, (pp->mem_base >> 32), PCIE_ATU_UPPER_BASE);

1421

- 	dw_pcie_writel_rc(pp, pp->mem_base + pp->config.mem_size - 1,

1422

-@@ -545,6 +544,7 @@ static void dw_pcie_prog_viewport_mem_outbound(struct pcie_port *pp)

1423

- 	dw_pcie_writel_rc(pp, pp->config.mem_bus_addr, PCIE_ATU_LOWER_TARGET);

1424

- 	dw_pcie_writel_rc(pp, upper_32_bits(pp->config.mem_bus_addr),

1425

- 			  PCIE_ATU_UPPER_TARGET);

1426

-+	dw_pcie_writel_rc(pp, PCIE_ATU_ENABLE, PCIE_ATU_CR2);

1427

- }

1428

-

1429

- static void dw_pcie_prog_viewport_io_outbound(struct pcie_port *pp)

1430

-@@ -553,7 +553,6 @@ static void dw_pcie_prog_viewport_io_outbound(struct pcie_port *pp)

1431

- 	dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX1,

1432

- 			  PCIE_ATU_VIEWPORT);

1433

- 	dw_pcie_writel_rc(pp, PCIE_ATU_TYPE_IO, PCIE_ATU_CR1);

1434

--	dw_pcie_writel_rc(pp, PCIE_ATU_ENABLE, PCIE_ATU_CR2);

1435

- 	dw_pcie_writel_rc(pp, pp->io_base, PCIE_ATU_LOWER_BASE);

1436

- 	dw_pcie_writel_rc(pp, (pp->io_base >> 32), PCIE_ATU_UPPER_BASE);

1437

- 	dw_pcie_writel_rc(pp, pp->io_base + pp->config.io_size - 1,

1438

-@@ -561,6 +560,7 @@ static void dw_pcie_prog_viewport_io_outbound(struct pcie_port *pp)

1439

- 	dw_pcie_writel_rc(pp, pp->config.io_bus_addr, PCIE_ATU_LOWER_TARGET);

1440

- 	dw_pcie_writel_rc(pp, upper_32_bits(pp->config.io_bus_addr),

1441

- 			  PCIE_ATU_UPPER_TARGET);

1442

-+	dw_pcie_writel_rc(pp, PCIE_ATU_ENABLE, PCIE_ATU_CR2);

1443

- }

1444

-

1445

- static int dw_pcie_rd_other_conf(struct pcie_port *pp, struct pci_bus *bus,

1446

-@@ -800,7 +800,7 @@ void dw_pcie_setup_rc(struct pcie_port *pp)

1447

-

1448

- 	/* setup RC BARs */

1449

- 	dw_pcie_writel_rc(pp, 0x00000004, PCI_BASE_ADDRESS_0);

1450

--	dw_pcie_writel_rc(pp, 0x00000004, PCI_BASE_ADDRESS_1);

1451

-+	dw_pcie_writel_rc(pp, 0x00000000, PCI_BASE_ADDRESS_1);

1452

-

1453

- 	/* setup interrupt pins */

1454

- 	dw_pcie_readl_rc(pp, PCI_INTERRUPT_LINE, &val);

1455

-diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c

1456

-index 470954aba728..36d1a23f14be 100644

1457

---- a/drivers/scsi/sd.c

1458

-+++ b/drivers/scsi/sd.c

1459

-@@ -1463,8 +1463,8 @@ static int sd_sync_cache(struct scsi_disk *sdkp)

1460

- 			sd_print_sense_hdr(sdkp, &sshdr);

1461

- 		/* we need to evaluate the error return  */

1462

- 		if (scsi_sense_valid(&sshdr) &&

1463

--			/* 0x3a is medium not present */

1464

--			sshdr.asc == 0x3a)

1465

-+			(sshdr.asc == 0x3a ||	/* medium not present */

1466

-+			 sshdr.asc == 0x20))	/* invalid command */

1467

- 				/* this is no error here */

1468

- 				return 0;

1469

-

1470

-diff --git a/drivers/staging/comedi/comedi_buf.c b/drivers/staging/comedi/comedi_buf.c

1471

-index 924fce977985..257595016161 100644

1472

---- a/drivers/staging/comedi/comedi_buf.c

1473

-+++ b/drivers/staging/comedi/comedi_buf.c

1474

-@@ -61,6 +61,8 @@ static void __comedi_buf_free(struct comedi_device *dev,

1475

- 			      struct comedi_subdevice *s)

1476

- {

1477

- 	struct comedi_async *async = s->async;

1478

-+	struct comedi_buf_map *bm;

1479

-+	unsigned long flags;

1480

-

1481

- 	if (async->prealloc_buf) {

1482

- 		vunmap(async->prealloc_buf);

1483

-@@ -68,8 +70,11 @@ static void __comedi_buf_free(struct comedi_device *dev,

1484

- 		async->prealloc_bufsz = 0;

1485

- 	}

1486

-

1487

--	comedi_buf_map_put(async->buf_map);

1488

-+	spin_lock_irqsave(&s->spin_lock, flags);

1489

-+	bm = async->buf_map;

1490

- 	async->buf_map = NULL;

1491

-+	spin_unlock_irqrestore(&s->spin_lock, flags);

1492

-+	comedi_buf_map_put(bm);

1493

- }

1494

-

1495

- static void __comedi_buf_alloc(struct comedi_device *dev,

1496

-@@ -80,6 +85,7 @@ static void __comedi_buf_alloc(struct comedi_device *dev,

1497

- 	struct page **pages = NULL;

1498

- 	struct comedi_buf_map *bm;

1499

- 	struct comedi_buf_page *buf;

1500

-+	unsigned long flags;

1501

- 	unsigned i;

1502

-

1503

- 	if (!IS_ENABLED(CONFIG_HAS_DMA) && s->async_dma_dir != DMA_NONE) {

1504

-@@ -92,8 +98,10 @@ static void __comedi_buf_alloc(struct comedi_device *dev,

1505

- 	if (!bm)

1506

- 		return;

1507

-

1508

--	async->buf_map = bm;

1509

- 	kref_init(&bm->refcount);

1510

-+	spin_lock_irqsave(&s->spin_lock, flags);

1511

-+	async->buf_map = bm;

1512

-+	spin_unlock_irqrestore(&s->spin_lock, flags);

1513

- 	bm->dma_dir = s->async_dma_dir;

1514

- 	if (bm->dma_dir != DMA_NONE)

1515

- 		/* Need ref to hardware device to free buffer later. */

1516

-@@ -127,7 +135,9 @@ static void __comedi_buf_alloc(struct comedi_device *dev,

1517

-

1518

- 		pages[i] = virt_to_page(buf->virt_addr);

1519

- 	}

1520

-+	spin_lock_irqsave(&s->spin_lock, flags);

1521

- 	bm->n_pages = i;

1522

-+	spin_unlock_irqrestore(&s->spin_lock, flags);

1523

-

1524

- 	/* vmap the prealloc_buf if all the pages were allocated */

1525

- 	if (i == n_pages)

1526

-@@ -150,6 +160,29 @@ int comedi_buf_map_put(struct comedi_buf_map *bm)

1527

- 	return 1;

1528

- }

1529

-

1530

-+/* returns s->async->buf_map and increments its kref refcount */

1531

-+struct comedi_buf_map *

1532

-+comedi_buf_map_from_subdev_get(struct comedi_subdevice *s)

1533

-+{

1534

-+	struct comedi_async *async = s->async;

1535

-+	struct comedi_buf_map *bm = NULL;

1536

-+	unsigned long flags;

1537

-+

1538

-+	if (!async)

1539

-+		return NULL;

1540

-+

1541

-+	spin_lock_irqsave(&s->spin_lock, flags);

1542

-+	bm = async->buf_map;

1543

-+	/* only want it if buffer pages allocated */

1544

-+	if (bm && bm->n_pages)

1545

-+		comedi_buf_map_get(bm);

1546

-+	else

1547

-+		bm = NULL;

1548

-+	spin_unlock_irqrestore(&s->spin_lock, flags);

1549

-+

1550

-+	return bm;

1551

-+}

1552

-+

1553

- bool comedi_buf_is_mmapped(struct comedi_async *async)

1554

- {

1555

- 	struct comedi_buf_map *bm = async->buf_map;

1556

-diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c

1557

-index c22c617b0da1..eae3ee139330 100644

1558

---- a/drivers/staging/comedi/comedi_fops.c

1559

-+++ b/drivers/staging/comedi/comedi_fops.c

1560

-@@ -1923,14 +1923,21 @@ static int comedi_mmap(struct file *file, struct vm_area_struct *vma)

1561

- 	struct comedi_device *dev = file->private_data;

1562

- 	struct comedi_subdevice *s;

1563

- 	struct comedi_async *async;

1564

--	struct comedi_buf_map *bm;

1565

-+	struct comedi_buf_map *bm = NULL;

1566

- 	unsigned long start = vma->vm_start;

1567

- 	unsigned long size;

1568

- 	int n_pages;

1569

- 	int i;

1570

- 	int retval;

1571

-

1572

--	mutex_lock(&dev->mutex);

1573

-+	/*

1574

-+	 * 'trylock' avoids circular dependency with current->mm->mmap_sem

1575

-+	 * and down-reading &dev->attach_lock should normally succeed without

1576

-+	 * contention unless the device is in the process of being attached

1577

-+	 * or detached.

1578

-+	 */

1579

-+	if (!down_read_trylock(&dev->attach_lock))

1580

-+		return -EAGAIN;

1581

-

1582

- 	if (!dev->attached) {

1583

- 		dev_dbg(dev->class_dev, "no driver attached\n");

1584

-@@ -1970,7 +1977,9 @@ static int comedi_mmap(struct file *file, struct vm_area_struct *vma)

1585

- 	}

1586

-

1587

- 	n_pages = size >> PAGE_SHIFT;

1588

--	bm = async->buf_map;

1589

-+

1590

-+	/* get reference to current buf map (if any) */

1591

-+	bm = comedi_buf_map_from_subdev_get(s);

1592

- 	if (!bm || n_pages > bm->n_pages) {

1593

- 		retval = -EINVAL;

1594

- 		goto done;

1595

-@@ -1994,7 +2003,8 @@ static int comedi_mmap(struct file *file, struct vm_area_struct *vma)

1596

-

1597

- 	retval = 0;

1598

- done:

1599

--	mutex_unlock(&dev->mutex);

1600

-+	up_read(&dev->attach_lock);

1601

-+	comedi_buf_map_put(bm);	/* put reference to buf map - okay if NULL */

1602

- 	return retval;

1603

- }

1604

-

1605

-diff --git a/drivers/staging/comedi/comedi_internal.h b/drivers/staging/comedi/comedi_internal.h

1606

-index 9a746570f161..a492f2d2436e 100644

1607

---- a/drivers/staging/comedi/comedi_internal.h

1608

-+++ b/drivers/staging/comedi/comedi_internal.h

1609

-@@ -19,6 +19,8 @@ void comedi_buf_reset(struct comedi_async *async);

1610

- bool comedi_buf_is_mmapped(struct comedi_async *async);

1611

- void comedi_buf_map_get(struct comedi_buf_map *bm);

1612

- int comedi_buf_map_put(struct comedi_buf_map *bm);

1613

-+struct comedi_buf_map *comedi_buf_map_from_subdev_get(

1614

-+		struct comedi_subdevice *s);

1615

- unsigned int comedi_buf_write_n_allocated(struct comedi_async *async);

1616

- void comedi_device_cancel_all(struct comedi_device *dev);

1617

-

1618

-diff --git a/drivers/staging/comedi/drivers/8255_pci.c b/drivers/staging/comedi/drivers/8255_pci.c

1619

-index 8a57c3c1ade0..1097dc6a3086 100644

1620

---- a/drivers/staging/comedi/drivers/8255_pci.c

1621

-+++ b/drivers/staging/comedi/drivers/8255_pci.c

1622

-@@ -56,6 +56,7 @@ Configuration Options: not applicable, uses PCI auto config

1623

- #include "../comedidev.h"

1624

-

1625

- #include "8255.h"

1626

-+#include "mite.h"

1627

-

1628

- enum pci_8255_boardid {

1629

- 	BOARD_ADLINK_PCI7224,

1630

-@@ -79,6 +80,7 @@ struct pci_8255_boardinfo {

1631

- 	const char *name;

1632

- 	int dio_badr;

1633

- 	int n_8255;

1634

-+	unsigned int has_mite:1;

1635

- };

1636

-

1637

- static const struct pci_8255_boardinfo pci_8255_boards[] = {

1638

-@@ -126,36 +128,43 @@ static const struct pci_8255_boardinfo pci_8255_boards[] = {

1639

- 		.name		= "ni_pci-dio-96",

1640

- 		.dio_badr	= 1,

1641

- 		.n_8255		= 4,

1642

-+		.has_mite	= 1,

1643

- 	},

1644

- 	[BOARD_NI_PCIDIO96B] = {

1645

- 		.name		= "ni_pci-dio-96b",

1646

- 		.dio_badr	= 1,

1647

- 		.n_8255		= 4,

1648

-+		.has_mite	= 1,

1649

- 	},

1650

- 	[BOARD_NI_PXI6508] = {

1651

- 		.name		= "ni_pxi-6508",

1652

- 		.dio_badr	= 1,

1653

- 		.n_8255		= 4,

1654

-+		.has_mite	= 1,

1655

- 	},

1656

- 	[BOARD_NI_PCI6503] = {

1657

- 		.name		= "ni_pci-6503",

1658

- 		.dio_badr	= 1,

1659

- 		.n_8255		= 1,

1660

-+		.has_mite	= 1,

1661

- 	},

1662

- 	[BOARD_NI_PCI6503B] = {

1663

- 		.name		= "ni_pci-6503b",

1664

- 		.dio_badr	= 1,

1665

- 		.n_8255		= 1,

1666

-+		.has_mite	= 1,

1667

- 	},

1668

- 	[BOARD_NI_PCI6503X] = {

1669

- 		.name		= "ni_pci-6503x",

1670

- 		.dio_badr	= 1,

1671

- 		.n_8255		= 1,

1672

-+		.has_mite	= 1,

1673

- 	},

1674

- 	[BOARD_NI_PXI_6503] = {

1675

- 		.name		= "ni_pxi-6503",

1676

- 		.dio_badr	= 1,

1677

- 		.n_8255		= 1,

1678

-+		.has_mite	= 1,

1679

- 	},

1680

- };

1681

-

1682

-@@ -163,6 +172,25 @@ struct pci_8255_private {

1683

- 	void __iomem *mmio_base;

1684

- };

1685

-

1686

-+static int pci_8255_mite_init(struct pci_dev *pcidev)

1687

-+{

1688

-+	void __iomem *mite_base;

1689

-+	u32 main_phys_addr;

1690

-+

1691

-+	/* ioremap the MITE registers (BAR 0) temporarily */

1692

-+	mite_base = pci_ioremap_bar(pcidev, 0);

1693

-+	if (!mite_base)

1694

-+		return -ENOMEM;

1695

-+

1696

-+	/* set data window to main registers (BAR 1) */

1697

-+	main_phys_addr = pci_resource_start(pcidev, 1);

1698

-+	writel(main_phys_addr | WENAB, mite_base + MITE_IODWBSR);

1699

-+

1700

-+	/* finished with MITE registers */

1701

-+	iounmap(mite_base);

1702

-+	return 0;

1703

-+}

1704

-+

1705

- static int pci_8255_mmio(int dir, int port, int data, unsigned long iobase)

1706

- {

1707

- 	void __iomem *mmio_base = (void __iomem *)iobase;

1708

-@@ -201,6 +229,12 @@ static int pci_8255_auto_attach(struct comedi_device *dev,

1709

- 	if (ret)

1710

- 		return ret;

1711

-

1712

-+	if (board->has_mite) {

1713

-+		ret = pci_8255_mite_init(pcidev);

1714

-+		if (ret)

1715

-+			return ret;

1716

-+	}

1717

-+

1718

- 	is_mmio = (pci_resource_flags(pcidev, board->dio_badr) &

1719

- 		   IORESOURCE_MEM) != 0;

1720

- 	if (is_mmio) {

1721

-diff --git a/drivers/tty/ipwireless/tty.c b/drivers/tty/ipwireless/tty.c

1722

-index ebd5bff0f5c1..17ee3bf0926b 100644

1723

---- a/drivers/tty/ipwireless/tty.c

1724

-+++ b/drivers/tty/ipwireless/tty.c

1725

-@@ -176,9 +176,6 @@ void ipwireless_tty_received(struct ipw_tty *tty, unsigned char *data,

1726

- 				": %d chars not inserted to flip buffer!\n",

1727

- 				length - work);

1728

-

1729

--	/*

1730

--	 * This may sleep if ->low_latency is set

1731

--	 */

1732

- 	if (work)

1733

- 		tty_flip_buffer_push(&tty->port);

1734

- }

1735

-diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c

1736

-index 765125dff20e..8ebd9f88a6f6 100644

1737

---- a/drivers/tty/tty_buffer.c

1738

-+++ b/drivers/tty/tty_buffer.c

1739

-@@ -351,14 +351,11 @@ EXPORT_SYMBOL(tty_insert_flip_string_flags);

1740

-  *	Takes any pending buffers and transfers their ownership to the

1741

-  *	ldisc side of the queue. It then schedules those characters for

1742

-  *	processing by the line discipline.

1743

-- *	Note that this function can only be used when the low_latency flag

1744

-- *	is unset. Otherwise the workqueue won't be flushed.

1745

-  */

1746

-

1747

- void tty_schedule_flip(struct tty_port *port)

1748

- {

1749

- 	struct tty_bufhead *buf = &port->buf;

1750

--	WARN_ON(port->low_latency);

1751

-

1752

- 	buf->tail->commit = buf->tail->used;

1753

- 	schedule_work(&buf->work);

1754

-@@ -482,17 +479,15 @@ static void flush_to_ldisc(struct work_struct *work)

1755

-  */

1756

- void tty_flush_to_ldisc(struct tty_struct *tty)

1757

- {

1758

--	if (!tty->port->low_latency)

1759

--		flush_work(&tty->port->buf.work);

1760

-+	flush_work(&tty->port->buf.work);

1761

- }

1762

-

1763

- /**

1764

-  *	tty_flip_buffer_push	-	terminal

1765

-  *	@port: tty port to push

1766

-  *

1767

-- *	Queue a push of the terminal flip buffers to the line discipline. This

1768

-- *	function must not be called from IRQ context if port->low_latency is

1769

-- *	set.

1770

-+ *	Queue a push of the terminal flip buffers to the line discipline.

1771

-+ *	Can be called from IRQ/atomic context.

1772

-  *

1773

-  *	In the event of the queue being busy for flipping the work will be

1774

-  *	held off and retried later.

1775

-@@ -500,14 +495,7 @@ void tty_flush_to_ldisc(struct tty_struct *tty)

1776

-

1777

- void tty_flip_buffer_push(struct tty_port *port)

1778

- {

1779

--	struct tty_bufhead *buf = &port->buf;

1780

--

1781

--	buf->tail->commit = buf->tail->used;

1782

--

1783

--	if (port->low_latency)

1784

--		flush_to_ldisc(&buf->work);

1785

--	else

1786

--		schedule_work(&buf->work);

1787

-+	tty_schedule_flip(port);

1788

- }

1789

- EXPORT_SYMBOL(tty_flip_buffer_push);

1790

-

1791

-diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c

1792

-index c74a00ad7add..d3448a90f0f9 100644

1793

---- a/drivers/tty/tty_io.c

1794

-+++ b/drivers/tty/tty_io.c

1795

-@@ -1271,12 +1271,13 @@ static void pty_line_name(struct tty_driver *driver, int index, char *p)

1796

-  *

1797

-  *	Locking: None

1798

-  */

1799

--static void tty_line_name(struct tty_driver *driver, int index, char *p)

1800

-+static ssize_t tty_line_name(struct tty_driver *driver, int index, char *p)

1801

- {

1802

- 	if (driver->flags & TTY_DRIVER_UNNUMBERED_NODE)

1803

--		strcpy(p, driver->name);

1804

-+		return sprintf(p, "%s", driver->name);

1805

- 	else

1806

--		sprintf(p, "%s%d", driver->name, index + driver->name_base);

1807

-+		return sprintf(p, "%s%d", driver->name,

1808

-+			       index + driver->name_base);

1809

- }

1810

-

1811

- /**

1812

-@@ -3545,9 +3546,19 @@ static ssize_t show_cons_active(struct device *dev,

1813

- 		if (i >= ARRAY_SIZE(cs))

1814

- 			break;

1815

- 	}

1816

--	while (i--)

1817

--		count += sprintf(buf + count, "%s%d%c",

1818

--				 cs[i]->name, cs[i]->index, i ? ' ':'\n');

1819

-+	while (i--) {

1820

-+		int index = cs[i]->index;

1821

-+		struct tty_driver *drv = cs[i]->device(cs[i], &index);

1822

-+

1823

-+		/* don't resolve tty0 as some programs depend on it */

1824

-+		if (drv && (cs[i]->index > 0 || drv->major != TTY_MAJOR))

1825

-+			count += tty_line_name(drv, index, buf + count);

1826

-+		else

1827

-+			count += sprintf(buf + count, "%s%d",

1828

-+					 cs[i]->name, cs[i]->index);

1829

-+

1830

-+		count += sprintf(buf + count, "%c", i ? ' ':'\n');

1831

-+	}

1832

- 	console_unlock();

1833

-

1834

- 	return count;

1835

-diff --git a/drivers/usb/gadget/u_serial.c b/drivers/usb/gadget/u_serial.c

1836

-index b369292d4b90..ad0aca812002 100644

1837

---- a/drivers/usb/gadget/u_serial.c

1838

-+++ b/drivers/usb/gadget/u_serial.c

1839

-@@ -549,8 +549,8 @@ static void gs_rx_push(unsigned long _port)

1840

- 		port->read_started--;

1841

- 	}

1842

-

1843

--	/* Push from tty to ldisc; without low_latency set this is handled by

1844

--	 * a workqueue, so we won't get callbacks and can hold port_lock

1845

-+	/* Push from tty to ldisc; this is handled by a workqueue,

1846

-+	 * so we won't get callbacks and can hold port_lock

1847

- 	 */

1848

- 	if (do_push)

1849

- 		tty_flip_buffer_push(&port->port);

1850

-diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

1851

-index 81ea55314b1f..9a527a1826df 100644

1852

---- a/fs/btrfs/disk-io.c

1853

-+++ b/fs/btrfs/disk-io.c

1854

-@@ -3244,6 +3244,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)

1855

- 	/* send down all the barriers */

1856

- 	head = &info->fs_devices->devices;

1857

- 	list_for_each_entry_rcu(dev, head, dev_list) {

1858

-+		if (dev->missing)

1859

-+			continue;

1860

- 		if (!dev->bdev) {

1861

- 			errors_send++;

1862

- 			continue;

1863

-@@ -3258,6 +3260,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)

1864

-

1865

- 	/* wait for all the barriers */

1866

- 	list_for_each_entry_rcu(dev, head, dev_list) {

1867

-+		if (dev->missing)

1868

-+			continue;

1869

- 		if (!dev->bdev) {

1870

- 			errors_wait++;

1871

- 			continue;

1872

-diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

1873

-index 32312e09f0f5..3c8e68da9ef8 100644

1874

---- a/fs/btrfs/extent-tree.c

1875

-+++ b/fs/btrfs/extent-tree.c

1876

-@@ -2444,7 +2444,8 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,

1877

- 			spin_unlock(&locked_ref->lock);

1878

- 			spin_lock(&delayed_refs->lock);

1879

- 			spin_lock(&locked_ref->lock);

1880

--			if (rb_first(&locked_ref->ref_root)) {

1881

-+			if (rb_first(&locked_ref->ref_root) ||

1882

-+			    locked_ref->extent_op) {

1883

- 				spin_unlock(&locked_ref->lock);

1884

- 				spin_unlock(&delayed_refs->lock);

1885

- 				continue;

1886

-diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

1887

-index 34cd83184c4a..b05bf58b9395 100644

1888

---- a/fs/btrfs/transaction.c

1889

-+++ b/fs/btrfs/transaction.c

1890

-@@ -683,7 +683,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,

1891

- 	int lock = (trans->type != TRANS_JOIN_NOLOCK);

1892

- 	int err = 0;

1893

-

1894

--	if (--trans->use_count) {

1895

-+	if (trans->use_count > 1) {

1896

-+		trans->use_count--;

1897

- 		trans->block_rsv = trans->orig_rsv;

1898

- 		return 0;

1899

- 	}

1900

-@@ -731,17 +732,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,

1901

- 	}

1902

-

1903

- 	if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {

1904

--		if (throttle) {

1905

--			/*

1906

--			 * We may race with somebody else here so end up having

1907

--			 * to call end_transaction on ourselves again, so inc

1908

--			 * our use_count.

1909

--			 */

1910

--			trans->use_count++;

1911

-+		if (throttle)

1912

- 			return btrfs_commit_transaction(trans, root);

1913

--		} else {

1914

-+		else

1915

- 			wake_up_process(info->transaction_kthread);

1916

--		}

1917

- 	}

1918

-

1919

- 	if (trans->type & __TRANS_FREEZABLE)

1920

-diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c

1921

-index 74bc2d549c58..47188916dd8d 100644

1922

---- a/fs/ext4/extents.c

1923

-+++ b/fs/ext4/extents.c

1924

-@@ -2585,6 +2585,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,

1925

- 	ex_ee_block = le32_to_cpu(ex->ee_block);

1926

- 	ex_ee_len = ext4_ext_get_actual_len(ex);

1927

-

1928

-+	/*

1929

-+	 * If we're starting with an extent other than the last one in the

1930

-+	 * node, we need to see if it shares a cluster with the extent to

1931

-+	 * the right (towards the end of the file). If its leftmost cluster

1932

-+	 * is this extent's rightmost cluster and it is not cluster aligned,

1933

-+	 * we'll mark it as a partial that is not to be deallocated.

1934

-+	 */

1935

-+

1936

-+	if (ex != EXT_LAST_EXTENT(eh)) {

1937

-+		ext4_fsblk_t current_pblk, right_pblk;

1938

-+		long long current_cluster, right_cluster;

1939

-+

1940

-+		current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;

1941

-+		current_cluster = (long long)EXT4_B2C(sbi, current_pblk);

1942

-+		right_pblk = ext4_ext_pblock(ex + 1);

1943

-+		right_cluster = (long long)EXT4_B2C(sbi, right_pblk);

1944

-+		if (current_cluster == right_cluster &&

1945

-+			EXT4_PBLK_COFF(sbi, right_pblk))

1946

-+			*partial_cluster = -right_cluster;

1947

-+	}

1948

-+

1949

- 	trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);

1950

-

1951

- 	while (ex >= EXT_FIRST_EXTENT(eh) &&

1952

-@@ -2710,10 +2731,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,

1953

- 		err = ext4_ext_correct_indexes(handle, inode, path);

1954

-

1955

- 	/*

1956

--	 * Free the partial cluster only if the current extent does not

1957

--	 * reference it. Otherwise we might free used cluster.

1958

-+	 * If there's a partial cluster and at least one extent remains in

1959

-+	 * the leaf, free the partial cluster if it isn't shared with the

1960

-+	 * current extent.  If there's a partial cluster and no extents

1961

-+	 * remain in the leaf, it can't be freed here.  It can only be

1962

-+	 * freed when it's possible to determine if it's not shared with

1963

-+	 * any other extent - when the next leaf is processed or when space

1964

-+	 * removal is complete.

1965

- 	 */

1966

--	if (*partial_cluster > 0 &&

1967

-+	if (*partial_cluster > 0 && eh->eh_entries &&

1968

- 	    (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=

1969

- 	     *partial_cluster)) {

1970

- 		int flags = get_default_free_blocks_flags(inode);

1971

-@@ -4128,7 +4154,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,

1972

- 	struct ext4_extent newex, *ex, *ex2;

1973

- 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);

1974

- 	ext4_fsblk_t newblock = 0;

1975

--	int free_on_err = 0, err = 0, depth;

1976

-+	int free_on_err = 0, err = 0, depth, ret;

1977

- 	unsigned int allocated = 0, offset = 0;

1978

- 	unsigned int allocated_clusters = 0;

1979

- 	struct ext4_allocation_request ar;

1980

-@@ -4189,9 +4215,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,

1981

- 			if (!ext4_ext_is_uninitialized(ex))

1982

- 				goto out;

1983

-

1984

--			allocated = ext4_ext_handle_uninitialized_extents(

1985

-+			ret = ext4_ext_handle_uninitialized_extents(

1986

- 				handle, inode, map, path, flags,

1987

- 				allocated, newblock);

1988

-+			if (ret < 0)

1989

-+				err = ret;

1990

-+			else

1991

-+				allocated = ret;

1992

- 			goto out3;

1993

- 		}

1994

- 	}

1995

-diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c

1996

-index d754e3cf99a8..a16315957ef3 100644

1997

---- a/fs/fs-writeback.c

1998

-+++ b/fs/fs-writeback.c

1999

-@@ -89,16 +89,29 @@ static inline struct inode *wb_inode(struct list_head *head)

2000

- #define CREATE_TRACE_POINTS

2001

- #include <trace/events/writeback.h>

2002

-

2003

-+static void bdi_wakeup_thread(struct backing_dev_info *bdi)

2004

-+{

2005

-+	spin_lock_bh(&bdi->wb_lock);

2006

-+	if (test_bit(BDI_registered, &bdi->state))

2007

-+		mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);

2008

-+	spin_unlock_bh(&bdi->wb_lock);

2009

-+}

2010

-+

2011

- static void bdi_queue_work(struct backing_dev_info *bdi,

2012

- 			   struct wb_writeback_work *work)

2013

- {

2014

- 	trace_writeback_queue(bdi, work);

2015

-

2016

- 	spin_lock_bh(&bdi->wb_lock);

2017

-+	if (!test_bit(BDI_registered, &bdi->state)) {

2018

-+		if (work->done)

2019

-+			complete(work->done);

2020

-+		goto out_unlock;

2021

-+	}

2022

- 	list_add_tail(&work->list, &bdi->work_list);

2023

--	spin_unlock_bh(&bdi->wb_lock);

2024

--

2025

- 	mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);

2026

-+out_unlock:

2027

-+	spin_unlock_bh(&bdi->wb_lock);

2028

- }

2029

-

2030

- static void

2031

-@@ -114,7 +127,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,

2032

- 	work = kzalloc(sizeof(*work), GFP_ATOMIC);

2033

- 	if (!work) {

2034

- 		trace_writeback_nowork(bdi);

2035

--		mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);

2036

-+		bdi_wakeup_thread(bdi);

2037

- 		return;

2038

- 	}

2039

-

2040

-@@ -161,7 +174,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)

2041

- 	 * writeback as soon as there is no other work to do.

2042

- 	 */

2043

- 	trace_writeback_wake_background(bdi);

2044

--	mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);

2045

-+	bdi_wakeup_thread(bdi);

2046

- }

2047

-

2048

- /*

2049

-@@ -1017,7 +1030,7 @@ void bdi_writeback_workfn(struct work_struct *work)

2050

- 	current->flags |= PF_SWAPWRITE;

2051

-

2052

- 	if (likely(!current_is_workqueue_rescuer() ||

2053

--		   list_empty(&bdi->bdi_list))) {

2054

-+		   !test_bit(BDI_registered, &bdi->state))) {

2055

- 		/*

2056

- 		 * The normal path.  Keep writing back @bdi until its

2057

- 		 * work_list is empty.  Note that this path is also taken

2058

-@@ -1039,10 +1052,10 @@ void bdi_writeback_workfn(struct work_struct *work)

2059

- 		trace_writeback_pages_written(pages_written);

2060

- 	}

2061

-

2062

--	if (!list_empty(&bdi->work_list) ||

2063

--	    (wb_has_dirty_io(wb) && dirty_writeback_interval))

2064

--		queue_delayed_work(bdi_wq, &wb->dwork,

2065

--			msecs_to_jiffies(dirty_writeback_interval * 10));

2066

-+	if (!list_empty(&bdi->work_list))

2067

-+		mod_delayed_work(bdi_wq, &wb->dwork, 0);

2068

-+	else if (wb_has_dirty_io(wb) && dirty_writeback_interval)

2069

-+		bdi_wakeup_thread_delayed(bdi);

2070

-

2071

- 	current->flags &= ~PF_SWAPWRITE;

2072

- }

2073

-diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c

2074

-index 16a5047903a6..406d9cc84ba8 100644

2075

---- a/fs/jffs2/compr_rtime.c

2076

-+++ b/fs/jffs2/compr_rtime.c

2077

-@@ -33,7 +33,7 @@ static int jffs2_rtime_compress(unsigned char *data_in,

2078

- 				unsigned char *cpage_out,

2079

- 				uint32_t *sourcelen, uint32_t *dstlen)

2080

- {

2081

--	short positions[256];

2082

-+	unsigned short positions[256];

2083

- 	int outpos = 0;

2084

- 	int pos=0;

2085

-

2086

-@@ -74,7 +74,7 @@ static int jffs2_rtime_decompress(unsigned char *data_in,

2087

- 				  unsigned char *cpage_out,

2088

- 				  uint32_t srclen, uint32_t destlen)

2089

- {

2090

--	short positions[256];

2091

-+	unsigned short positions[256];

2092

- 	int outpos = 0;

2093

- 	int pos=0;

2094

-

2095

-diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h

2096

-index e4619b00f7c5..fa35ff79ab35 100644

2097

---- a/fs/jffs2/nodelist.h

2098

-+++ b/fs/jffs2/nodelist.h

2099

-@@ -231,7 +231,7 @@ struct jffs2_tmp_dnode_info

2100

- 	uint32_t version;

2101

- 	uint32_t data_crc;

2102

- 	uint32_t partial_crc;

2103

--	uint16_t csize;

2104

-+	uint32_t csize;

2105

- 	uint16_t overlapped;

2106

- };

2107

-

2108

-diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c

2109

-index 03310721712f..b6bd4affd9ad 100644

2110

---- a/fs/jffs2/nodemgmt.c

2111

-+++ b/fs/jffs2/nodemgmt.c

2112

-@@ -179,6 +179,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,

2113

- 					spin_unlock(&c->erase_completion_lock);

2114

-

2115

- 					schedule();

2116

-+					remove_wait_queue(&c->erase_wait, &wait);

2117

- 				} else

2118

- 					spin_unlock(&c->erase_completion_lock);

2119

- 			} else if (ret)

2120

-@@ -211,20 +212,25 @@ out:

2121

- int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize,

2122

- 			   uint32_t *len, uint32_t sumsize)

2123

- {

2124

--	int ret = -EAGAIN;

2125

-+	int ret;

2126

- 	minsize = PAD(minsize);

2127

-

2128

- 	jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize);

2129

-

2130

--	spin_lock(&c->erase_completion_lock);

2131

--	while(ret == -EAGAIN) {

2132

-+	while (true) {

2133

-+		spin_lock(&c->erase_completion_lock);

2134

- 		ret = jffs2_do_reserve_space(c, minsize, len, sumsize);

2135

- 		if (ret) {

2136

- 			jffs2_dbg(1, "%s(): looping, ret is %d\n",

2137

- 				  __func__, ret);

2138

- 		}

2139

-+		spin_unlock(&c->erase_completion_lock);

2140

-+

2141

-+		if (ret == -EAGAIN)

2142

-+			cond_resched();

2143

-+		else

2144

-+			break;

2145

- 	}

2146

--	spin_unlock(&c->erase_completion_lock);

2147

- 	if (!ret)

2148

- 		ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1);

2149

-

2150

-diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c

2151

-index bd6e18be6e1a..39c0143fb3af 100644

2152

---- a/fs/kernfs/dir.c

2153

-+++ b/fs/kernfs/dir.c

2154

-@@ -37,7 +37,7 @@ static unsigned int kernfs_name_hash(const char *name, const void *ns)

2155

- 	hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));

2156

- 	hash &= 0x7fffffffU;

2157

- 	/* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */

2158

--	if (hash < 1)

2159

-+	if (hash < 2)

2160

- 		hash += 2;

2161

- 	if (hash >= INT_MAX)

2162

- 		hash = INT_MAX - 1;

2163

-diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c

2164

-index e55126f85bd2..553946c9d952 100644

2165

---- a/fs/kernfs/inode.c

2166

-+++ b/fs/kernfs/inode.c

2167

-@@ -48,14 +48,18 @@ void __init kernfs_inode_init(void)

2168

-

2169

- static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn)

2170

- {

2171

-+	static DEFINE_MUTEX(iattr_mutex);

2172

-+	struct kernfs_iattrs *ret;

2173

- 	struct iattr *iattrs;

2174

-

2175

-+	mutex_lock(&iattr_mutex);

2176

-+

2177

- 	if (kn->iattr)

2178

--		return kn->iattr;

2179

-+		goto out_unlock;

2180

-

2181

- 	kn->iattr = kzalloc(sizeof(struct kernfs_iattrs), GFP_KERNEL);

2182

- 	if (!kn->iattr)

2183

--		return NULL;

2184

-+		goto out_unlock;

2185

- 	iattrs = &kn->iattr->ia_iattr;

2186

-

2187

- 	/* assign default attributes */

2188

-@@ -65,8 +69,10 @@ static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn)

2189

- 	iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;

2190

-

2191

- 	simple_xattrs_init(&kn->iattr->xattrs);

2192

--

2193

--	return kn->iattr;

2194

-+out_unlock:

2195

-+	ret = kn->iattr;

2196

-+	mutex_unlock(&iattr_mutex);

2197

-+	return ret;

2198

- }

2199

-

2200

- static int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)

2201

-diff --git a/fs/posix_acl.c b/fs/posix_acl.c

2202

-index 11c54fd51e16..9e363e41dacc 100644

2203

---- a/fs/posix_acl.c

2204

-+++ b/fs/posix_acl.c

2205

-@@ -723,7 +723,7 @@ posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl,

2206

- 		   void *buffer, size_t size)

2207

- {

2208

- 	posix_acl_xattr_header *ext_acl = (posix_acl_xattr_header *)buffer;

2209

--	posix_acl_xattr_entry *ext_entry = ext_acl->a_entries;

2210

-+	posix_acl_xattr_entry *ext_entry;

2211

- 	int real_size, n;

2212

-

2213

- 	real_size = posix_acl_xattr_size(acl->a_count);

2214

-@@ -731,7 +731,8 @@ posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl,

2215

- 		return real_size;

2216

- 	if (real_size > size)

2217

- 		return -ERANGE;

2218

--

2219

-+

2220

-+	ext_entry = ext_acl->a_entries;

2221

- 	ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);

2222

-

2223

- 	for (n=0; n < acl->a_count; n++, ext_entry++) {

2224

-diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c

2225

-index 796272a2e129..e69d57be866b 100644

2226

---- a/fs/xfs/xfs_da_btree.c

2227

-+++ b/fs/xfs/xfs_da_btree.c

2228

-@@ -1295,7 +1295,7 @@ xfs_da3_fixhashpath(

2229

- 		node = blk->bp->b_addr;

2230

- 		dp->d_ops->node_hdr_from_disk(&nodehdr, node);

2231

- 		btree = dp->d_ops->node_tree_p(node);

2232

--		if (be32_to_cpu(btree->hashval) == lasthash)

2233

-+		if (be32_to_cpu(btree[blk->index].hashval) == lasthash)

2234

- 			break;

2235

- 		blk->hashval = lasthash;

2236

- 		btree[blk->index].hashval = cpu_to_be32(lasthash);

2237

-diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h

2238

-index 24819001f5c8..e488e9459a93 100644

2239

---- a/include/linux/backing-dev.h

2240

-+++ b/include/linux/backing-dev.h

2241

-@@ -95,7 +95,7 @@ struct backing_dev_info {

2242

- 	unsigned int max_ratio, max_prop_frac;

2243

-

2244

- 	struct bdi_writeback wb;  /* default writeback info for this bdi */

2245

--	spinlock_t wb_lock;	  /* protects work_list */

2246

-+	spinlock_t wb_lock;	  /* protects work_list & wb.dwork scheduling */

2247

-

2248

- 	struct list_head work_list;

2249

-

2250

-diff --git a/include/linux/tty.h b/include/linux/tty.h

2251

-index 90b4fdc8a61f..b90b5c221ff0 100644

2252

---- a/include/linux/tty.h

2253

-+++ b/include/linux/tty.h

2254

-@@ -208,7 +208,7 @@ struct tty_port {

2255

- 	wait_queue_head_t	delta_msr_wait;	/* Modem status change */

2256

- 	unsigned long		flags;		/* TTY flags ASY_*/

2257

- 	unsigned char		console:1,	/* port is a console */

2258

--				low_latency:1;	/* direct buffer flush */

2259

-+				low_latency:1;	/* optional: tune for latency */

2260

- 	struct mutex		mutex;		/* Locking */

2261

- 	struct mutex		buf_mutex;	/* Buffer alloc lock */

2262

- 	unsigned char		*xmit_buf;	/* Optional buffer */

2263

-diff --git a/kernel/exit.c b/kernel/exit.c

2264

-index 1e77fc645317..81b3d6789ee8 100644

2265

---- a/kernel/exit.c

2266

-+++ b/kernel/exit.c

2267

-@@ -560,9 +560,6 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,

2268

- 				struct list_head *dead)

2269

- {

2270

- 	list_move_tail(&p->sibling, &p->real_parent->children);

2271

--

2272

--	if (p->exit_state == EXIT_DEAD)

2273

--		return;

2274

- 	/*

2275

- 	 * If this is a threaded reparent there is no need to

2276

- 	 * notify anyone anything has happened.

2277

-@@ -570,9 +567,19 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,

2278

- 	if (same_thread_group(p->real_parent, father))

2279

- 		return;

2280

-

2281

--	/* We don't want people slaying init.  */

2282

-+	/*

2283

-+	 * We don't want people slaying init.

2284

-+	 *

2285

-+	 * Note: we do this even if it is EXIT_DEAD, wait_task_zombie()

2286

-+	 * can change ->exit_state to EXIT_ZOMBIE. If this is the final

2287

-+	 * state, do_notify_parent() was already called and ->exit_signal

2288

-+	 * doesn't matter.

2289

-+	 */

2290

- 	p->exit_signal = SIGCHLD;

2291

-

2292

-+	if (p->exit_state == EXIT_DEAD)

2293

-+		return;

2294

-+

2295

- 	/* If it has exited notify the new parent about this child's death. */

2296

- 	if (!p->ptrace &&

2297

- 	    p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {

2298

-@@ -784,6 +791,8 @@ void do_exit(long code)

2299

- 	exit_shm(tsk);

2300

- 	exit_files(tsk);

2301

- 	exit_fs(tsk);

2302

-+	if (group_dead)

2303

-+		disassociate_ctty(1);

2304

- 	exit_task_namespaces(tsk);

2305

- 	exit_task_work(tsk);

2306

- 	check_stack_usage();

2307

-@@ -799,13 +808,9 @@ void do_exit(long code)

2308

-

2309

- 	cgroup_exit(tsk, 1);

2310

-

2311

--	if (group_dead)

2312

--		disassociate_ctty(1);

2313

--

2314

- 	module_put(task_thread_info(tsk)->exec_domain->module);

2315

-

2316

- 	proc_exit_connector(tsk);

2317

--

2318

- 	/*

2319

- 	 * FIXME: do that only when needed, using sched_exit tracepoint

2320

- 	 */

2321

-diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c

2322

-index 06c62de9c711..db95d8eb761b 100644

2323

---- a/kernel/pid_namespace.c

2324

-+++ b/kernel/pid_namespace.c

2325

-@@ -318,7 +318,9 @@ static void *pidns_get(struct task_struct *task)

2326

- 	struct pid_namespace *ns;

2327

-

2328

- 	rcu_read_lock();

2329

--	ns = get_pid_ns(task_active_pid_ns(task));

2330

-+	ns = task_active_pid_ns(task);

2331

-+	if (ns)

2332

-+		get_pid_ns(ns);

2333

- 	rcu_read_unlock();

2334

-

2335

- 	return ns;

2336

-diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c

2337

-index dd06439b9c84..80a57afd8647 100644

2338

---- a/kernel/user_namespace.c

2339

-+++ b/kernel/user_namespace.c

2340

-@@ -152,7 +152,7 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)

2341

-

2342

- 	/* Find the matching extent */

2343

- 	extents = map->nr_extents;

2344

--	smp_read_barrier_depends();

2345

-+	smp_rmb();

2346

- 	for (idx = 0; idx < extents; idx++) {

2347

- 		first = map->extent[idx].first;

2348

- 		last = first + map->extent[idx].count - 1;

2349

-@@ -176,7 +176,7 @@ static u32 map_id_down(struct uid_gid_map *map, u32 id)

2350

-

2351

- 	/* Find the matching extent */

2352

- 	extents = map->nr_extents;

2353

--	smp_read_barrier_depends();

2354

-+	smp_rmb();

2355

- 	for (idx = 0; idx < extents; idx++) {

2356

- 		first = map->extent[idx].first;

2357

- 		last = first + map->extent[idx].count - 1;

2358

-@@ -199,7 +199,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id)

2359

-

2360

- 	/* Find the matching extent */

2361

- 	extents = map->nr_extents;

2362

--	smp_read_barrier_depends();

2363

-+	smp_rmb();

2364

- 	for (idx = 0; idx < extents; idx++) {

2365

- 		first = map->extent[idx].lower_first;

2366

- 		last = first + map->extent[idx].count - 1;

2367

-@@ -615,9 +615,8 @@ static ssize_t map_write(struct file *file, const char __user *buf,

2368

- 	 * were written before the count of the extents.

2369

- 	 *

2370

- 	 * To achieve this smp_wmb() is used on guarantee the write

2371

--	 * order and smp_read_barrier_depends() is guaranteed that we

2372

--	 * don't have crazy architectures returning stale data.

2373

--	 *

2374

-+	 * order and smp_rmb() is guaranteed that we don't have crazy

2375

-+	 * architectures returning stale data.

2376

- 	 */

2377

- 	mutex_lock(&id_map_mutex);

2378

-

2379

-diff --git a/mm/backing-dev.c b/mm/backing-dev.c

2380

-index ce682f7a4f29..09d9591b7708 100644

2381

---- a/mm/backing-dev.c

2382

-+++ b/mm/backing-dev.c

2383

-@@ -288,13 +288,19 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)

2384

-  * Note, we wouldn't bother setting up the timer, but this function is on the

2385

-  * fast-path (used by '__mark_inode_dirty()'), so we save few context switches

2386

-  * by delaying the wake-up.

2387

-+ *

2388

-+ * We have to be careful not to postpone flush work if it is scheduled for

2389

-+ * earlier. Thus we use queue_delayed_work().

2390

-  */

2391

- void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)

2392

- {

2393

- 	unsigned long timeout;

2394

-

2395

- 	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);

2396

--	mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);

2397

-+	spin_lock_bh(&bdi->wb_lock);

2398

-+	if (test_bit(BDI_registered, &bdi->state))

2399

-+		queue_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);

2400

-+	spin_unlock_bh(&bdi->wb_lock);

2401

- }

2402

-

2403

- /*

2404

-@@ -307,9 +313,6 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)

2405

- 	spin_unlock_bh(&bdi_lock);

2406

-

2407

- 	synchronize_rcu_expedited();

2408

--

2409

--	/* bdi_list is now unused, clear it to mark @bdi dying */

2410

--	INIT_LIST_HEAD(&bdi->bdi_list);

2411

- }

2412

-

2413

- int bdi_register(struct backing_dev_info *bdi, struct device *parent,

2414

-@@ -360,6 +363,11 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)

2415

- 	 */

2416

- 	bdi_remove_from_list(bdi);

2417

-

2418

-+	/* Make sure nobody queues further work */

2419

-+	spin_lock_bh(&bdi->wb_lock);

2420

-+	clear_bit(BDI_registered, &bdi->state);

2421

-+	spin_unlock_bh(&bdi->wb_lock);

2422

-+

2423

- 	/*

2424

- 	 * Drain work list and shutdown the delayed_work.  At this point,

2425

- 	 * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi

2426

-diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c

2427

-index 5f812455a450..60828cf02eb8 100644

2428

---- a/net/bluetooth/hci_event.c

2429

-+++ b/net/bluetooth/hci_event.c

2430

-@@ -3593,7 +3593,13 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)

2431

-

2432

- 	hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp);

2433

-

2434

--	if (ltk->type & HCI_SMP_STK) {

2435

-+	/* Ref. Bluetooth Core SPEC pages 1975 and 2004. STK is a

2436

-+	 * temporary key used to encrypt a connection following

2437

-+	 * pairing. It is used during the Encrypted Session Setup to

2438

-+	 * distribute the keys. Later, security can be re-established

2439

-+	 * using a distributed LTK.

2440

-+	 */

2441

-+	if (ltk->type == HCI_SMP_STK_SLAVE) {

2442

- 		list_del(&ltk->list);

2443

- 		kfree(ltk);

2444

- 	}

2445

-diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h

2446

-index 0356e1d437ca..f79fa8be203c 100644

2447

---- a/security/integrity/ima/ima.h

2448

-+++ b/security/integrity/ima/ima.h

2449

-@@ -27,7 +27,7 @@

2450

- #include "../integrity.h"

2451

-

2452

- enum ima_show_type { IMA_SHOW_BINARY, IMA_SHOW_BINARY_NO_FIELD_LEN,

2453

--		     IMA_SHOW_ASCII };

2454

-+		     IMA_SHOW_BINARY_OLD_STRING_FMT, IMA_SHOW_ASCII };

2455

- enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8 };

2456

-

2457

- /* digest size for IMA, fits SHA1 or MD5 */

2458

-diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c

2459

-index db01125926bd..468a3ba3c539 100644

2460

---- a/security/integrity/ima/ima_fs.c

2461

-+++ b/security/integrity/ima/ima_fs.c

2462

-@@ -160,6 +160,8 @@ static int ima_measurements_show(struct seq_file *m, void *v)

2463

-

2464

- 		if (is_ima_template && strcmp(field->field_id, "d") == 0)

2465

- 			show = IMA_SHOW_BINARY_NO_FIELD_LEN;

2466

-+		if (is_ima_template && strcmp(field->field_id, "n") == 0)

2467

-+			show = IMA_SHOW_BINARY_OLD_STRING_FMT;

2468

- 		field->field_show(m, show, &e->template_data[i]);

2469

- 	}

2470

- 	return 0;

2471

-diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c

2472

-index 1683bbf289a4..e8592e7bfc21 100644

2473

---- a/security/integrity/ima/ima_template_lib.c

2474

-+++ b/security/integrity/ima/ima_template_lib.c

2475

-@@ -109,13 +109,16 @@ static void ima_show_template_data_binary(struct seq_file *m,

2476

- 					  enum data_formats datafmt,

2477

- 					  struct ima_field_data *field_data)

2478

- {

2479

-+	u32 len = (show == IMA_SHOW_BINARY_OLD_STRING_FMT) ?

2480

-+	    strlen(field_data->data) : field_data->len;

2481

-+

2482

- 	if (show != IMA_SHOW_BINARY_NO_FIELD_LEN)

2483

--		ima_putc(m, &field_data->len, sizeof(u32));

2484

-+		ima_putc(m, &len, sizeof(len));

2485

-

2486

--	if (!field_data->len)

2487

-+	if (!len)

2488

- 		return;

2489

-

2490

--	ima_putc(m, field_data->data, field_data->len);

2491

-+	ima_putc(m, field_data->data, len);

2492

- }

2493

-

2494

- static void ima_show_template_field_data(struct seq_file *m,

2495

-@@ -129,6 +132,7 @@ static void ima_show_template_field_data(struct seq_file *m,

2496

- 		break;

2497

- 	case IMA_SHOW_BINARY:

2498

- 	case IMA_SHOW_BINARY_NO_FIELD_LEN:

2499

-+	case IMA_SHOW_BINARY_OLD_STRING_FMT:

2500

- 		ima_show_template_data_binary(m, show, datafmt, field_data);

2501

- 		break;

2502

- 	default:

2503

2504

Deleted: genpatches-2.6/trunk/3.15/4200_fbcondecor-0.9.6.patch

2505

===================================================================

2506

--- genpatches-2.6/trunk/3.15/4200_fbcondecor-0.9.6.patch	2014-05-05 14:22:27 UTC (rev 2771)

2507

+++ genpatches-2.6/trunk/3.15/4200_fbcondecor-0.9.6.patch	2014-05-05 14:28:48 UTC (rev 2772)

2508

@@ -1,2177 +0,0 @@

2509

-diff --git a/Documentation/fb/00-INDEX b/Documentation/fb/00-INDEX

2510

-index 30a7054..9b6a733 100644

2511

---- a/Documentation/fb/00-INDEX

2512

-+++ b/Documentation/fb/00-INDEX

2513

-@@ -21,6 +21,8 @@ ep93xx-fb.txt

2514

- 	- info on the driver for EP93xx LCD controller.

2515

- fbcon.txt

2516

- 	- intro to and usage guide for the framebuffer console (fbcon).

2517

-+fbcondecor.txt

2518

-+	- info on the Framebuffer Console Decoration

2519

- framebuffer.txt

2520

- 	- introduction to frame buffer devices.

2521

- gxfb.txt

2522

-diff --git a/Documentation/fb/fbcondecor.txt b/Documentation/fb/fbcondecor.txt

2523

-new file mode 100644

2524

-index 0000000..15889f3

2525

---- /dev/null

2526

-+++ b/Documentation/fb/fbcondecor.txt

2527

-@@ -0,0 +1,207 @@

2528

-+What is it?

2529

-+-----------

2530

-+

2531

-+The framebuffer decorations are a kernel feature which allows displaying a 

2532

-+background picture on selected consoles.

2533

-+

2534

-+What do I need to get it to work?

2535

-+---------------------------------

2536

-+

2537

-+To get fbcondecor up-and-running you will have to:

2538

-+ 1) get a copy of splashutils [1] or a similar program

2539

-+ 2) get some fbcondecor themes

2540

-+ 3) build the kernel helper program

2541

-+ 4) build your kernel with the FB_CON_DECOR option enabled.

2542

-+

2543

-+To get fbcondecor operational right after fbcon initialization is finished, you

2544

-+will have to include a theme and the kernel helper into your initramfs image.

2545

-+Please refer to splashutils documentation for instructions on how to do that.

2546

-+

2547

-+[1] The splashutils package can be downloaded from:

2548

-+    http://dev.gentoo.org/~spock/projects/splashutils/

2549

-+

2550

-+The userspace helper

2551

-+--------------------

2552

-+

2553

-+The userspace fbcondecor helper (by default: /sbin/fbcondecor_helper) is called by the

2554

-+kernel whenever an important event occurs and the kernel needs some kind of

2555

-+job to be carried out. Important events include console switches and video

2556

-+mode switches (the kernel requests background images and configuration

2557

-+parameters for the current console). The fbcondecor helper must be accessible at

2558

-+all times. If it's not, fbcondecor will be switched off automatically.

2559

-+

2560

-+It's possible to set path to the fbcondecor helper by writing it to

2561

-+/proc/sys/kernel/fbcondecor.

2562

-+

2563

-+*****************************************************************************

2564

-+

2565

-+The information below is mostly technical stuff. There's probably no need to

2566

-+read it unless you plan to develop a userspace helper.

2567

-+

2568

-+The fbcondecor protocol

2569

-+-----------------------

2570

-+

2571

-+The fbcondecor protocol defines a communication interface between the kernel and

2572

-+the userspace fbcondecor helper.

2573

-+

2574

-+The kernel side is responsible for:

2575

-+

2576

-+ * rendering console text, using an image as a background (instead of a

2577

-+   standard solid color fbcon uses),

2578

-+ * accepting commands from the user via ioctls on the fbcondecor device,

2579

-+ * calling the userspace helper to set things up as soon as the fb subsystem 

2580

-+   is initialized.

2581

-+

2582

-+The userspace helper is responsible for everything else, including parsing

2583

-+configuration files, decompressing the image files whenever the kernel needs

2584

-+it, and communicating with the kernel if necessary.

2585

-+

2586

-+The fbcondecor protocol specifies how communication is done in both ways:

2587

-+kernel->userspace and userspace->helper.

2588

-+

2589

-+Kernel -> Userspace

2590

-+-------------------

2591

-+

2592

-+The kernel communicates with the userspace helper by calling it and specifying

2593

-+the task to be done in a series of arguments.

2594

-+

2595

-+The arguments follow the pattern:

2596

-+<fbcondecor protocol version> <command> <parameters>

2597

-+

2598

-+All commands defined in fbcondecor protocol v2 have the following parameters:

2599

-+ virtual console

2600

-+ framebuffer number

2601

-+ theme

2602

-+

2603

-+Fbcondecor protocol v1 specified an additional 'fbcondecor mode' after the

2604

-+framebuffer number. Fbcondecor protocol v1 is deprecated and should not be used.

2605

-+

2606

-+Fbcondecor protocol v2 specifies the following commands:

2607

-+

2608

-+getpic

2609

-+------

2610

-+ The kernel issues this command to request image data. It's up to the 

2611

-+ userspace  helper to find a background image appropriate for the specified 

2612

-+ theme and the current resolution. The userspace helper should respond by 

2613

-+ issuing the FBIOCONDECOR_SETPIC ioctl.

2614

-+

2615

-+init

2616

-+----

2617

-+ The kernel issues this command after the fbcondecor device is created and

2618

-+ the fbcondecor interface is initialized. Upon receiving 'init', the userspace

2619

-+ helper should parse the kernel command line (/proc/cmdline) or otherwise

2620

-+ decide whether fbcondecor is to be activated.

2621

-+

2622

-+ To activate fbcondecor on the first console the helper should issue the

2623

-+ FBIOCONDECOR_SETCFG, FBIOCONDECOR_SETPIC and FBIOCONDECOR_SETSTATE commands,

2624

-+ in the above-mentioned order.

2625

-+

2626

-+ When the userspace helper is called in an early phase of the boot process

2627

-+ (right after the initialization of fbcon), no filesystems will be mounted.

2628

-+ The helper program should mount sysfs and then create the appropriate

2629

-+ framebuffer, fbcondecor and tty0 devices (if they don't already exist) to get

2630

-+ current display settings and to be able to communicate with the kernel side.

2631

-+ It should probably also mount the procfs to be able to parse the kernel

2632

-+ command line parameters.

2633

-+

2634

-+ Note that the console sem is not held when the kernel calls fbcondecor_helper

2635

-+ with the 'init' command. The fbcondecor helper should perform all ioctls with

2636

-+ origin set to FBCON_DECOR_IO_ORIG_USER.

2637

-+

2638

-+modechange

2639

-+----------

2640

-+ The kernel issues this command on a mode change. The helper's response should

2641

-+ be similar to the response to the 'init' command. Note that this time the

2642

-+ console sem is held and all ioctls must be performed with origin set to

2643

-+ FBCON_DECOR_IO_ORIG_KERNEL.

2644

-+

2645

-+

2646

-+Userspace -> Kernel

2647

-+-------------------

2648

-+

2649

-+Userspace programs can communicate with fbcondecor via ioctls on the

2650

-+fbcondecor device. These ioctls are to be used by both the userspace helper

2651

-+(called only by the kernel) and userspace configuration tools (run by the users).

2652

-+

2653

-+The fbcondecor helper should set the origin field to FBCON_DECOR_IO_ORIG_KERNEL

2654

-+when doing the appropriate ioctls. All userspace configuration tools should

2655

-+use FBCON_DECOR_IO_ORIG_USER. Failure to set the appropriate value in the origin

2656

-+field when performing ioctls from the kernel helper will most likely result

2657

-+in a console deadlock.

2658

-+

2659

-+FBCON_DECOR_IO_ORIG_KERNEL instructs fbcondecor not to try to acquire the console

2660

-+semaphore. Not surprisingly, FBCON_DECOR_IO_ORIG_USER instructs it to acquire

2661

-+the console sem.

2662

-+

2663

-+The framebuffer console decoration provides the following ioctls (all defined in 

2664

-+linux/fb.h):

2665

-+

2666

-+FBIOCONDECOR_SETPIC

2667

-+description: loads a background picture for a virtual console

2668

-+argument: struct fbcon_decor_iowrapper*; data: struct fb_image*

2669

-+notes: 

2670

-+If called for consoles other than the current foreground one, the picture data

2671

-+will be ignored.

2672

-+

2673

-+If the current virtual console is running in a 8-bpp mode, the cmap substruct

2674

-+of fb_image has to be filled appropriately: start should be set to 16 (first

2675

-+16 colors are reserved for fbcon), len to a value <= 240 and red, green and

2676

-+blue should point to valid cmap data. The transp field is ingored. The fields

2677

-+dx, dy, bg_color, fg_color in fb_image are ignored as well.

2678

-+

2679

-+FBIOCONDECOR_SETCFG

2680

-+description: sets the fbcondecor config for a virtual console

2681

-+argument: struct fbcon_decor_iowrapper*; data: struct vc_decor*

2682

-+notes: The structure has to be filled with valid data.

2683

-+

2684

-+FBIOCONDECOR_GETCFG

2685

-+description: gets the fbcondecor config for a virtual console

2686

-+argument: struct fbcon_decor_iowrapper*; data: struct vc_decor*

2687

-+

2688

-+FBIOCONDECOR_SETSTATE

2689

-+description: sets the fbcondecor state for a virtual console

2690

-+argument: struct fbcon_decor_iowrapper*; data: unsigned int*

2691

-+          values: 0 = disabled, 1 = enabled.

2692

-+

2693

-+FBIOCONDECOR_GETSTATE

2694

-+description: gets the fbcondecor state for a virtual console

2695

-+argument: struct fbcon_decor_iowrapper*; data: unsigned int*

2696

-+          values: as in FBIOCONDECOR_SETSTATE

2697

-+

2698

-+Info on used structures:

2699

-+

2700

-+Definition of struct vc_decor can be found in linux/console_decor.h. It's

2701

-+heavily commented. Note that the 'theme' field should point to a string

2702

-+no longer than FBCON_DECOR_THEME_LEN. When FBIOCONDECOR_GETCFG call is

2703

-+performed, the theme field should point to a char buffer of length

2704

-+FBCON_DECOR_THEME_LEN.

2705

-+

2706

-+Definition of struct fbcon_decor_iowrapper can be found in linux/fb.h.

2707

-+The fields in this struct have the following meaning:

2708

-+

2709

-+vc: 

2710

-+Virtual console number.

2711

-+

2712

-+origin: 

2713

-+Specifies if the ioctl is performed as a response to a kernel request. The

2714

-+fbcondecor helper should set this field to FBCON_DECOR_IO_ORIG_KERNEL, userspace

2715

-+programs should set it to FBCON_DECOR_IO_ORIG_USER. This field is necessary to

2716

-+avoid console semaphore deadlocks.

2717

-+

2718

-+data: 

2719

-+Pointer to a data structure appropriate for the performed ioctl. Type of

2720

-+the data struct is specified in the ioctls description.

2721

-+

2722

-+*****************************************************************************

2723

-+

2724

-+Credit

2725

-+------

2726

-+

2727

-+Original 'bootsplash' project & implementation by:

2728

-+  Volker Poplawski <volker@×××××××××.de>, Stefan Reinauer <stepan@××××.de>,

2729

-+  Steffen Winterfeldt <snwint@××××.de>, Michael Schroeder <mls@××××.de>,

2730

-+  Ken Wimer <wimer@××××.de>.

2731

-+

2732

-+Fbcondecor, fbcondecor protocol design, current implementation & docs by:

2733

-+  Michal Januszewski <spock@g.o>

2734

-+

2735

-diff --git a/drivers/Makefile b/drivers/Makefile

2736

-index 95952c8..b55db6d 100644

2737

---- a/drivers/Makefile

2738

-+++ b/drivers/Makefile

2739

-@@ -16,4 +16,8 @@ obj-$(CONFIG_PCI)		+= pci/

2740

- obj-$(CONFIG_PARISC)		+= parisc/

2741

- obj-$(CONFIG_RAPIDIO)		+= rapidio/

2742

-+# tty/ comes before char/ so that the VT console is the boot-time

2743

-+# default.

2744

-+obj-y				+= tty/

2745

-+obj-y				+= char/

2746

- obj-y				+= video/

2747

- obj-y				+= idle/

2748

-@@ -37,11 +41,6 @@ obj-$(CONFIG_XEN)		+= xen/

2749

- # regulators early, since some subsystems rely on them to initialize

2750

- obj-$(CONFIG_REGULATOR)		+= regulator/

2751

-

2752

--# tty/ comes before char/ so that the VT console is the boot-time

2753

--# default.

2754

--obj-y				+= tty/

2755

--obj-y				+= char/

2756

--

2757

- # gpu/ comes after char for AGP vs DRM startup

2758

- obj-y				+= gpu/

2759

-

2760

-diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig

2761

-index a290be5..3a4ca32 100644

2762

---- a/drivers/video/Kconfig

2763

-+++ b/drivers/video/Kconfig

2764

-@@ -1229,7 +1229,6 @@ config FB_MATROX

2765

- 	select FB_CFB_FILLRECT

2766

- 	select FB_CFB_COPYAREA

2767

- 	select FB_CFB_IMAGEBLIT

2768

--	select FB_TILEBLITTING

2769

- 	select FB_MACMODES if PPC_PMAC

2770

- 	---help---

2771

- 	  Say Y here if you have a Matrox Millennium, Matrox Millennium II,

2772

-diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig

2773

-index c2d11fe..1be9de4 100644

2774

---- a/drivers/video/console/Kconfig

2775

-+++ b/drivers/video/console/Kconfig

2776

-@@ -120,6 +120,19 @@ config FRAMEBUFFER_CONSOLE_ROTATION

2777

-          such that other users of the framebuffer will remain normally

2778

-          oriented.

2779

-

2780

-+config FB_CON_DECOR

2781

-+	bool "Support for the Framebuffer Console Decorations"

2782

-+	depends on FRAMEBUFFER_CONSOLE=y && !FB_TILEBLITTING

2783

-+	default n

2784

-+	---help---

2785

-+	  This option enables support for framebuffer console decorations which

2786

-+	  makes it possible to display images in the background of the system

2787

-+	  consoles.  Note that userspace utilities are necessary in order to take 

2788

-+	  advantage of these features. Refer to Documentation/fb/fbcondecor.txt 

2789

-+	  for more information.

2790

-+

2791

-+	  If unsure, say N.

2792

-+

2793

- config STI_CONSOLE

2794

-         bool "STI text console"

2795

-         depends on PARISC

2796

---- a/drivers/video/console/Makefile    2013-08-26 14:02:39.905817618 -0400

2797

-+++ b/drivers/video/console/Makefile    2013-08-26 14:05:06.258848595 -0400

2798

-@@ -16,4 +16,5 @@ obj-$(CONFIG_FRAMEBUFFER_CONSOLE)     +=

2799

-                                          fbcon_ccw.o

2800

- endif

2801

-

2802

-+obj-$(CONFIG_FB_CON_DECOR)           += fbcondecor.o cfbcondecor.o

2803

- obj-$(CONFIG_FB_STI)              += sticore.o

2804

-diff --git a/drivers/video/console/bitblit.c b/drivers/video/console/bitblit.c

2805

-index 28b1a83..33712c0 100644

2806

---- a/drivers/video/console/bitblit.c

2807

-+++ b/drivers/video/console/bitblit.c

2808

-@@ -18,6 +18,7 @@

2809

- #include <linux/console.h>

2810

- #include <asm/types.h>

2811

- #include "fbcon.h"

2812

-+#include "fbcondecor.h"

2813

-

2814

- /*

2815

-  * Accelerated handlers.

2816

-@@ -55,6 +56,13 @@ static void bit_bmove(struct vc_data *vc, struct fb_info *info, int sy,

2817

- 	area.height = height * vc->vc_font.height;

2818

- 	area.width = width * vc->vc_font.width;

2819

-

2820

-+	if (fbcon_decor_active(info, vc)) {

2821

-+ 		area.sx += vc->vc_decor.tx;

2822

-+ 		area.sy += vc->vc_decor.ty;

2823

-+ 		area.dx += vc->vc_decor.tx;

2824

-+ 		area.dy += vc->vc_decor.ty;

2825

-+ 	}

2826

-+

2827

- 	info->fbops->fb_copyarea(info, &area);

2828

- }

2829

-

2830

-@@ -380,11 +388,15 @@ static void bit_cursor(struct vc_data *vc, struct fb_info *info, int mode,

2831

- 	cursor.image.depth = 1;

2832

- 	cursor.rop = ROP_XOR;

2833

-

2834

--	if (info->fbops->fb_cursor)

2835

--		err = info->fbops->fb_cursor(info, &cursor);

2836

-+	if (fbcon_decor_active(info, vc)) {

2837

-+		fbcon_decor_cursor(info, &cursor);

2838

-+	} else {

2839

-+		if (info->fbops->fb_cursor)

2840

-+			err = info->fbops->fb_cursor(info, &cursor);

2841

-

2842

--	if (err)

2843

--		soft_cursor(info, &cursor);

2844

-+		if (err)

2845

-+			soft_cursor(info, &cursor);

2846

-+	}

2847

-

2848

- 	ops->cursor_reset = 0;

2849

- }

2850

-diff --git a/drivers/video/console/cfbcondecor.c b/drivers/video/console/cfbcondecor.c

2851

-new file mode 100644

2852

-index 0000000..09381d3

2853

---- /dev/null

2854

-+++ b/drivers/video/console/cfbcondecor.c

2855

-@@ -0,0 +1,471 @@

2856

-+/*

2857

-+ *  linux/drivers/video/cfbcon_decor.c -- Framebuffer decor render functions

2858

-+ *

2859

-+ *  Copyright (C) 2004 Michal Januszewski <spock@g.o>

2860

-+ *

2861

-+ *  Code based upon "Bootdecor" (C) 2001-2003

2862

-+ *       Volker Poplawski <volker@×××××××××.de>,

2863

-+ *       Stefan Reinauer <stepan@××××.de>,

2864

-+ *       Steffen Winterfeldt <snwint@××××.de>,

2865

-+ *       Michael Schroeder <mls@××××.de>,

2866

-+ *       Ken Wimer <wimer@××××.de>.

2867

-+ *

2868

-+ *  This file is subject to the terms and conditions of the GNU General Public

2869

-+ *  License.  See the file COPYING in the main directory of this archive for

2870

-+ *  more details.

2871

-+ */

2872

-+#include <linux/module.h>

2873

-+#include <linux/types.h>

2874

-+#include <linux/fb.h>

2875

-+#include <linux/selection.h>

2876

-+#include <linux/slab.h>

2877

-+#include <linux/vt_kern.h>

2878

-+#include <asm/irq.h>

2879

-+

2880

-+#include "fbcon.h"

2881

-+#include "fbcondecor.h"

2882

-+

2883

-+#define parse_pixel(shift,bpp,type)						\

2884

-+	do {									\

2885

-+		if (d & (0x80 >> (shift)))					\

2886

-+			dd2[(shift)] = fgx;					\

2887

-+		else								\

2888

-+			dd2[(shift)] = transparent ? *(type *)decor_src : bgx;	\

2889

-+		decor_src += (bpp);						\

2890

-+	} while (0)								\

2891

-+

2892

-+extern int get_color(struct vc_data *vc, struct fb_info *info,

2893

-+		     u16 c, int is_fg);

2894

-+

2895

-+void fbcon_decor_fix_pseudo_pal(struct fb_info *info, struct vc_data *vc)

2896

-+{

2897

-+	int i, j, k;

2898

-+	int minlen = min(min(info->var.red.length, info->var.green.length),

2899

-+			     info->var.blue.length);

2900

-+	u32 col;

2901

-+

2902

-+	for (j = i = 0; i < 16; i++) {

2903

-+		k = color_table[i];

2904

-+

2905

-+		col = ((vc->vc_palette[j++]  >> (8-minlen))

2906

-+			<< info->var.red.offset);

2907

-+		col |= ((vc->vc_palette[j++] >> (8-minlen))

2908

-+			<< info->var.green.offset);

2909

-+		col |= ((vc->vc_palette[j++] >> (8-minlen))

2910

-+			<< info->var.blue.offset);

2911

-+			((u32 *)info->pseudo_palette)[k] = col;

2912

-+	}

2913

-+}

2914

-+

2915

-+void fbcon_decor_renderc(struct fb_info *info, int ypos, int xpos, int height,

2916

-+		      int width, u8* src, u32 fgx, u32 bgx, u8 transparent)

2917

-+{

2918

-+	unsigned int x, y;

2919

-+	u32 dd;

2920

-+	int bytespp = ((info->var.bits_per_pixel + 7) >> 3);

2921

-+	unsigned int d = ypos * info->fix.line_length + xpos * bytespp;

2922

-+	unsigned int ds = (ypos * info->var.xres + xpos) * bytespp;

2923

-+	u16 dd2[4];

2924

-+

2925

-+	u8* decor_src = (u8 *)(info->bgdecor.data + ds);

2926

-+	u8* dst = (u8 *)(info->screen_base + d);

2927

-+

2928

-+	if ((ypos + height) > info->var.yres || (xpos + width) > info->var.xres)

2929

-+		return;

2930

-+

2931

-+	for (y = 0; y < height; y++) {

2932

-+		switch (info->var.bits_per_pixel) {

2933

-+

2934

-+		case 32:

2935

-+			for (x = 0; x < width; x++) {

2936

-+

2937

-+				if ((x & 7) == 0)

2938

-+					d = *src++;

2939

-+				if (d & 0x80)

2940

-+					dd = fgx;

2941

-+				else

2942

-+					dd = transparent ?

2943

-+					     *(u32 *)decor_src : bgx;

2944

-+

2945

-+				d <<= 1;

2946

-+				decor_src += 4;

2947

-+				fb_writel(dd, dst);

2948

-+				dst += 4;

2949

-+			}

2950

-+			break;

2951

-+		case 24:

2952

-+			for (x = 0; x < width; x++) {

2953

-+

2954

-+				if ((x & 7) == 0)

2955

-+					d = *src++;

2956

-+				if (d & 0x80)

2957

-+					dd = fgx;

2958

-+				else

2959

-+					dd = transparent ?

2960

-+					     (*(u32 *)decor_src & 0xffffff) : bgx;

2961

-+

2962

-+				d <<= 1;

2963

-+				decor_src += 3;

2964

-+#ifdef __LITTLE_ENDIAN

2965

-+				fb_writew(dd & 0xffff, dst);

2966

-+				dst += 2;

2967

-+				fb_writeb((dd >> 16), dst);

2968

-+#else

2969

-+				fb_writew(dd >> 8, dst);

2970

-+				dst += 2;

2971

-+				fb_writeb(dd & 0xff, dst);

2972

-+#endif

2973

-+				dst++;

2974

-+			}

2975

-+			break;

2976

-+		case 16:

2977

-+			for (x = 0; x < width; x += 2) {

2978

-+				if ((x & 7) == 0)

2979

-+					d = *src++;

2980

-+

2981

-+				parse_pixel(0, 2, u16);

2982

-+				parse_pixel(1, 2, u16);

2983

-+#ifdef __LITTLE_ENDIAN

2984

-+				dd = dd2[0] | (dd2[1] << 16);

2985

-+#else

2986

-+				dd = dd2[1] | (dd2[0] << 16);

2987

-+#endif

2988

-+				d <<= 2;

2989

-+				fb_writel(dd, dst);

2990

-+				dst += 4;

2991

-+			}

2992

-+			break;

2993

-+

2994

-+		case 8:

2995

-+			for (x = 0; x < width; x += 4) {

2996

-+				if ((x & 7) == 0)

2997

-+					d = *src++;

2998

-+

2999

-+				parse_pixel(0, 1, u8);

3000

-+				parse_pixel(1, 1, u8);

3001

-+				parse_pixel(2, 1, u8);

3002

-+				parse_pixel(3, 1, u8);

3003

-+

3004

-+#ifdef __LITTLE_ENDIAN

3005

-+				dd = dd2[0] | (dd2[1] << 8) | (dd2[2] << 16) | (dd2[3] << 24);

3006

-+#else

3007

-+				dd = dd2[3] | (dd2[2] << 8) | (dd2[1] << 16) | (dd2[0] << 24);

3008

-+#endif

3009

-+				d <<= 4;

3010

-+				fb_writel(dd, dst);

3011

-+				dst += 4;

3012

-+			}

3013

-+		}

3014

-+

3015

-+		dst += info->fix.line_length - width * bytespp;

3016

-+		decor_src += (info->var.xres - width) * bytespp;

3017

-+	}

3018

-+}

3019

-+

3020

-+#define cc2cx(a) 						\

3021

-+	((info->fix.visual == FB_VISUAL_TRUECOLOR || 		\

3022

-+	  info->fix.visual == FB_VISUAL_DIRECTCOLOR) ? 		\

3023

-+	 ((u32*)info->pseudo_palette)[a] : a)

3024

-+

3025

-+void fbcon_decor_putcs(struct vc_data *vc, struct fb_info *info,

3026

-+		   const unsigned short *s, int count, int yy, int xx)

3027

-+{

3028

-+	unsigned short charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff;

3029

-+	struct fbcon_ops *ops = info->fbcon_par;

3030

-+	int fg_color, bg_color, transparent;

3031

-+	u8 *src;

3032

-+	u32 bgx, fgx;

3033

-+	u16 c = scr_readw(s);

3034

-+

3035

-+	fg_color = get_color(vc, info, c, 1);

3036

-+        bg_color = get_color(vc, info, c, 0);

3037

-+

3038

-+	/* Don't paint the background image if console is blanked */

3039

-+	transparent = ops->blank_state ? 0 :

3040

-+		(vc->vc_decor.bg_color == bg_color);

3041

-+

3042

-+	xx = xx * vc->vc_font.width + vc->vc_decor.tx;

3043

-+	yy = yy * vc->vc_font.height + vc->vc_decor.ty;

3044

-+

3045

-+	fgx = cc2cx(fg_color);

3046

-+	bgx = cc2cx(bg_color);

3047

-+

3048

-+	while (count--) {

3049

-+		c = scr_readw(s++);

3050

-+		src = vc->vc_font.data + (c & charmask) * vc->vc_font.height *

3051

-+		      ((vc->vc_font.width + 7) >> 3);

3052

-+

3053

-+		fbcon_decor_renderc(info, yy, xx, vc->vc_font.height,

3054

-+			       vc->vc_font.width, src, fgx, bgx, transparent);

3055

-+		xx += vc->vc_font.width;

3056

-+	}

3057

-+}

3058

-+

3059

-+void fbcon_decor_cursor(struct fb_info *info, struct fb_cursor *cursor)

3060

-+{

3061

-+	int i;

3062

-+	unsigned int dsize, s_pitch;

3063

-+	struct fbcon_ops *ops = info->fbcon_par;

3064

-+	struct vc_data* vc;

3065

-+	u8 *src;

3066

-+

3067

-+	/* we really don't need any cursors while the console is blanked */

3068

-+	if (info->state != FBINFO_STATE_RUNNING || ops->blank_state)

3069

-+		return;

3070

-+

3071

-+	vc = vc_cons[ops->currcon].d;

3072

-+

3073

-+	src = kmalloc(64 + sizeof(struct fb_image), GFP_ATOMIC);

3074

-+	if (!src)

3075

-+		return;

3076

-+

3077

-+	s_pitch = (cursor->image.width + 7) >> 3;

3078

-+	dsize = s_pitch * cursor->image.height;

3079

-+	if (cursor->enable) {

3080

-+		switch (cursor->rop) {

3081

-+		case ROP_XOR:

3082

-+			for (i = 0; i < dsize; i++)

3083

-+				src[i] = cursor->image.data[i] ^ cursor->mask[i];

3084

-+                        break;

3085

-+		case ROP_COPY:

3086

-+		default:

3087

-+			for (i = 0; i < dsize; i++)

3088

-+				src[i] = cursor->image.data[i] & cursor->mask[i];

3089

-+			break;

3090

-+		}

3091

-+	} else

3092

-+		memcpy(src, cursor->image.data, dsize);

3093

-+

3094

-+	fbcon_decor_renderc(info,

3095

-+			cursor->image.dy + vc->vc_decor.ty,

3096

-+			cursor->image.dx + vc->vc_decor.tx,

3097

-+			cursor->image.height,

3098

-+			cursor->image.width,

3099

-+			(u8*)src,

3100

-+			cc2cx(cursor->image.fg_color),

3101

-+			cc2cx(cursor->image.bg_color),

3102

-+			cursor->image.bg_color == vc->vc_decor.bg_color);

3103

-+

3104

-+	kfree(src);

3105

-+}

3106

-+

3107

-+static void decorset(u8 *dst, int height, int width, int dstbytes,

3108

-+		        u32 bgx, int bpp)

3109

-+{

3110

-+	int i;

3111

-+

3112

-+	if (bpp == 8)

3113

-+		bgx |= bgx << 8;

3114

-+	if (bpp == 16 || bpp == 8)

3115

-+		bgx |= bgx << 16;

3116

-+

3117

-+	while (height-- > 0) {

3118

-+		u8 *p = dst;

3119

-+

3120

-+		switch (bpp) {

3121

-+

3122

-+		case 32:

3123

-+			for (i=0; i < width; i++) {

3124

-+				fb_writel(bgx, p); p += 4;

3125

-+			}

3126

-+			break;

3127

-+		case 24:

3128

-+			for (i=0; i < width; i++) {

3129

-+#ifdef __LITTLE_ENDIAN

3130

-+				fb_writew((bgx & 0xffff),(u16*)p); p += 2;

3131

-+				fb_writeb((bgx >> 16),p++);

3132

-+#else

3133

-+				fb_writew((bgx >> 8),(u16*)p); p += 2;

3134

-+				fb_writeb((bgx & 0xff),p++);

3135

-+#endif

3136

-+			}

3137

-+		case 16:

3138

-+			for (i=0; i < width/4; i++) {

3139

-+				fb_writel(bgx,p); p += 4;

3140

-+				fb_writel(bgx,p); p += 4;

3141

-+			}

3142

-+			if (width & 2) {

3143

-+				fb_writel(bgx,p); p += 4;

3144

-+			}

3145

-+			if (width & 1)

3146

-+				fb_writew(bgx,(u16*)p);

3147

-+			break;

3148

-+		case 8:

3149

-+			for (i=0; i < width/4; i++) {

3150

-+				fb_writel(bgx,p); p += 4;

3151

-+			}

3152

-+

3153

-+			if (width & 2) {

3154

-+				fb_writew(bgx,p); p += 2;

3155

-+			}

3156

-+			if (width & 1)

3157

-+				fb_writeb(bgx,(u8*)p);

3158

-+			break;

3159

-+

3160

-+		}

3161

-+		dst += dstbytes;

3162

-+	}

3163

-+}

3164

-+

3165

-+void fbcon_decor_copy(u8 *dst, u8 *src, int height, int width, int linebytes,

3166

-+		   int srclinebytes, int bpp)

3167

-+{

3168

-+	int i;

3169

-+

3170

-+	while (height-- > 0) {

3171

-+		u32 *p = (u32 *)dst;

3172

-+		u32 *q = (u32 *)src;

3173

-+

3174

-+		switch (bpp) {

3175

-+

3176

-+		case 32:

3177

-+			for (i=0; i < width; i++)

3178

-+				fb_writel(*q++, p++);

3179

-+			break;

3180

-+		case 24:

3181

-+			for (i=0; i < (width*3/4); i++)

3182

-+				fb_writel(*q++, p++);

3183

-+			if ((width*3) % 4) {

3184

-+				if (width & 2) {

3185

-+					fb_writeb(*(u8*)q, (u8*)p);

3186

-+				} else if (width & 1) {

3187

-+					fb_writew(*(u16*)q, (u16*)p);

3188

-+					fb_writeb(*(u8*)((u16*)q+1),(u8*)((u16*)p+2));

3189

-+				}

3190

-+			}

3191

-+			break;

3192

-+		case 16:

3193

-+			for (i=0; i < width/4; i++) {

3194

-+				fb_writel(*q++, p++);

3195

-+				fb_writel(*q++, p++);

3196

-+			}

3197

-+			if (width & 2)

3198

-+				fb_writel(*q++, p++);

3199

-+			if (width & 1)

3200

-+				fb_writew(*(u16*)q, (u16*)p);

3201

-+			break;

3202

-+		case 8:

3203

-+			for (i=0; i < width/4; i++)

3204

-+				fb_writel(*q++, p++);

3205

-+

3206

-+			if (width & 2) {

3207

-+				fb_writew(*(u16*)q, (u16*)p);

3208

-+				q = (u32*) ((u16*)q + 1);

3209

-+				p = (u32*) ((u16*)p + 1);

3210

-+			}

3211

-+			if (width & 1)

3212

-+				fb_writeb(*(u8*)q, (u8*)p);

3213

-+			break;

3214

-+		}

3215

-+

3216

-+		dst += linebytes;

3217

-+		src += srclinebytes;

3218

-+	}

3219

-+}

3220

-+

3221

-+static void decorfill(struct fb_info *info, int sy, int sx, int height,

3222

-+		       int width)

3223

-+{

3224

-+	int bytespp = ((info->var.bits_per_pixel + 7) >> 3);

3225

-+	int d  = sy * info->fix.line_length + sx * bytespp;

3226

-+	int ds = (sy * info->var.xres + sx) * bytespp;

3227

-+

3228

-+	fbcon_decor_copy((u8 *)(info->screen_base + d), (u8 *)(info->bgdecor.data + ds),

3229

-+		    height, width, info->fix.line_length, info->var.xres * bytespp,

3230

-+		    info->var.bits_per_pixel);

3231

-+}

3232

-+

3233

-+void fbcon_decor_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx,

3234

-+		    int height, int width)

3235

-+{

3236

-+	int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;

3237

-+	struct fbcon_ops *ops = info->fbcon_par;

3238

-+	u8 *dst;

3239

-+	int transparent, bg_color = attr_bgcol_ec(bgshift, vc, info);

3240

-+

3241

-+	transparent = (vc->vc_decor.bg_color == bg_color);

3242

-+	sy = sy * vc->vc_font.height + vc->vc_decor.ty;

3243

-+	sx = sx * vc->vc_font.width + vc->vc_decor.tx;

3244

-+	height *= vc->vc_font.height;

3245

-+	width *= vc->vc_font.width;

3246

-+

3247

-+	/* Don't paint the background image if console is blanked */

3248

-+	if (transparent && !ops->blank_state) {

3249

-+		decorfill(info, sy, sx, height, width);

3250

-+	} else {

3251

-+		dst = (u8 *)(info->screen_base + sy * info->fix.line_length +

3252

-+			     sx * ((info->var.bits_per_pixel + 7) >> 3));

3253

-+		decorset(dst, height, width, info->fix.line_length, cc2cx(bg_color),

3254

-+			  info->var.bits_per_pixel);

3255

-+	}

3256

-+}

3257

-+

3258

-+void fbcon_decor_clear_margins(struct vc_data *vc, struct fb_info *info,

3259

-+			    int bottom_only)

3260

-+{

3261

-+	unsigned int tw = vc->vc_cols*vc->vc_font.width;

3262

-+	unsigned int th = vc->vc_rows*vc->vc_font.height;

3263

-+

3264

-+	if (!bottom_only) {

3265

-+		/* top margin */

3266

-+		decorfill(info, 0, 0, vc->vc_decor.ty, info->var.xres);

3267

-+		/* left margin */

3268

-+		decorfill(info, vc->vc_decor.ty, 0, th, vc->vc_decor.tx);

3269

-+		/* right margin */

3270

-+		decorfill(info, vc->vc_decor.ty, vc->vc_decor.tx + tw, th, 

3271

-+			   info->var.xres - vc->vc_decor.tx - tw);

3272

-+	}

3273

-+	decorfill(info, vc->vc_decor.ty + th, 0, 

3274

-+		   info->var.yres - vc->vc_decor.ty - th, info->var.xres);

3275

-+}

3276

-+

3277

-+void fbcon_decor_bmove_redraw(struct vc_data *vc, struct fb_info *info, int y, 

3278

-+			   int sx, int dx, int width)

3279

-+{

3280

-+	u16 *d = (u16 *) (vc->vc_origin + vc->vc_size_row * y + dx * 2);

3281

-+	u16 *s = d + (dx - sx);

3282

-+	u16 *start = d;

3283

-+	u16 *ls = d;

3284

-+	u16 *le = d + width;

3285

-+	u16 c;

3286

-+	int x = dx;

3287

-+	u16 attr = 1;

3288

-+

3289

-+	do {

3290

-+		c = scr_readw(d);

3291

-+		if (attr != (c & 0xff00)) {

3292

-+			attr = c & 0xff00;

3293

-+			if (d > start) {

3294

-+				fbcon_decor_putcs(vc, info, start, d - start, y, x);

3295

-+				x += d - start;

3296

-+				start = d;

3297

-+			}

3298

-+		}

3299

-+		if (s >= ls && s < le && c == scr_readw(s)) {

3300

-+			if (d > start) {

3301

-+				fbcon_decor_putcs(vc, info, start, d - start, y, x);

3302

-+				x += d - start + 1;

3303

-+				start = d + 1;

3304

-+			} else {

3305

-+				x++;

3306

-+				start++;

3307

-+			}

3308

-+		}

3309

-+		s++;

3310

-+		d++;

3311

-+	} while (d < le);

3312

-+	if (d > start)

3313

-+		fbcon_decor_putcs(vc, info, start, d - start, y, x);

3314

-+}

3315

-+

3316

-+void fbcon_decor_blank(struct vc_data *vc, struct fb_info *info, int blank)

3317

-+{

3318

-+	if (blank) {

3319

-+		decorset((u8 *)info->screen_base, info->var.yres, info->var.xres,

3320

-+			  info->fix.line_length, 0, info->var.bits_per_pixel);

3321

-+	} else {

3322

-+		update_screen(vc);

3323

-+		fbcon_decor_clear_margins(vc, info, 0);

3324

-+	}

3325

-+}

3326

-+

3327

-From ea6ca92753106f1e0773acd1f18c71ae79a6f9b0 Mon Sep 17 00:00:00 2001

3328

-From: Mike Pagano <mpagano@g.o>

3329

-Date: Tue, 27 Aug 2013 07:58:05 -0400

3330

-Subject: [PATCH] gbcondecor port

3331

-

3332

----

3333

- drivers/video/console/fbcon.c | 167 ++++++++++++++++++++++++++++++++++++------

3334

- 1 file changed, 143 insertions(+), 24 deletions(-)

3335

-

3336

-diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c

3337

-index cd8a802..666556c 100644

3338

---- a/drivers/video/console/fbcon.c

3339

-+++ b/drivers/video/console/fbcon.c

3340

-@@ -79,6 +79,7 @@

3341

- #include <asm/irq.h>

3342

-

3343

- #include "fbcon.h"

3344

-+#include "fbcondecor.h"

3345

-

3346

- #ifdef FBCONDEBUG

3347

- #  define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt, __func__ , ## args)

3348

-@@ -94,7 +95,7 @@ enum {

3349

-

3350

- static struct display fb_display[MAX_NR_CONSOLES];

3351

-

3352

--static signed char con2fb_map[MAX_NR_CONSOLES];

3353

-+signed char con2fb_map[MAX_NR_CONSOLES];

3354

- static signed char con2fb_map_boot[MAX_NR_CONSOLES];

3355

-

3356

- static int logo_lines;

3357

-@@ -286,7 +287,7 @@ static inline int fbcon_is_inactive(struct vc_data *vc, struct fb_info *info)

3358

- 		!vt_force_oops_output(vc);

3359

- }

3360

-

3361

--static int get_color(struct vc_data *vc, struct fb_info *info,

3362

-+int get_color(struct vc_data *vc, struct fb_info *info,

3363

- 	      u16 c, int is_fg)

3364

- {

3365

- 	int depth = fb_get_color_depth(&info->var, &info->fix);

3366

-@@ -551,6 +552,9 @@ static int do_fbcon_takeover(int show_logo)

3367

- 		info_idx = -1;

3368

- 	} else {

3369

- 		fbcon_has_console_bind = 1;

3370

-+#ifdef CONFIG_FB_CON_DECOR

3371

-+		fbcon_decor_init();

3372

-+#endif

3373

- 	}

3374

-

3375

- 	return err;

3376

-@@ -1007,6 +1011,12 @@ static const char *fbcon_startup(void)

3377

- 	rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);

3378

- 	cols /= vc->vc_font.width;

3379

- 	rows /= vc->vc_font.height;

3380

-+

3381

-+	if (fbcon_decor_active(info, vc)) {

3382

-+		cols = vc->vc_decor.twidth / vc->vc_font.width;

3383

-+		rows = vc->vc_decor.theight / vc->vc_font.height;

3384

-+	}

3385

-+

3386

- 	vc_resize(vc, cols, rows);

3387

-

3388

- 	DPRINTK("mode:   %s\n", info->fix.id);

3389

-@@ -1036,7 +1046,7 @@ static void fbcon_init(struct vc_data *vc, int init)

3390

- 	cap = info->flags;

3391

-

3392

- 	if (vc != svc || logo_shown == FBCON_LOGO_DONTSHOW ||

3393

--	    (info->fix.type == FB_TYPE_TEXT))

3394

-+	    (info->fix.type == FB_TYPE_TEXT) || fbcon_decor_active(info, vc))

3395

- 		logo = 0;

3396

-

3397

- 	if (var_to_display(p, &info->var, info))

3398

-@@ -1260,6 +1270,11 @@ static void fbcon_clear(struct vc_data *vc, int sy, int sx, int height,

3399

- 		fbcon_clear_margins(vc, 0);

3400

- 	}

3401

-

3402

-+	if (fbcon_decor_active(info, vc)) {

3403

-+		fbcon_decor_clear(vc, info, sy, sx, height, width);

3404

-+		return;

3405

-+	}

3406

-+

3407

- 	/* Split blits that cross physical y_wrap boundary */

3408

-

3409

- 	y_break = p->vrows - p->yscroll;

3410

-@@ -1279,10 +1294,15 @@ static void fbcon_putcs(struct vc_data *vc, const unsigned short *s,

3411

- 	struct display *p = &fb_display[vc->vc_num];

3412

- 	struct fbcon_ops *ops = info->fbcon_par;

3413

-

3414

--	if (!fbcon_is_inactive(vc, info))

3415

--		ops->putcs(vc, info, s, count, real_y(p, ypos), xpos,

3416

--			   get_color(vc, info, scr_readw(s), 1),

3417

--			   get_color(vc, info, scr_readw(s), 0));

3418

-+	if (!fbcon_is_inactive(vc, info)) {

3419

-+

3420

-+		if (fbcon_decor_active(info, vc))

3421

-+			fbcon_decor_putcs(vc, info, s, count, ypos, xpos);

3422

-+		else

3423

-+			ops->putcs(vc, info, s, count, real_y(p, ypos), xpos,

3424

-+			get_color(vc, info, scr_readw(s), 1),

3425

-+			get_color(vc, info, scr_readw(s), 0));

3426

-+	}

3427

- }

3428

-

3429

- static void fbcon_putc(struct vc_data *vc, int c, int ypos, int xpos)

3430

-@@ -1297,9 +1317,6 @@ static void fbcon_clear_margins(struct vc_data *vc, int bottom_only)

3431

- {

3432

- 	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];

3433

- 	struct fbcon_ops *ops = info->fbcon_par;

3434

--

3435

--	if (!fbcon_is_inactive(vc, info))

3436

--		ops->clear_margins(vc, info, bottom_only);

3437

- }

3438

-

3439

- static void fbcon_cursor(struct vc_data *vc, int mode)

3440

-@@ -1819,7 +1836,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir,

3441

- 			count = vc->vc_rows;

3442

- 		if (softback_top)

3443

- 			fbcon_softback_note(vc, t, count);

3444

--		if (logo_shown >= 0)

3445

-+		if (logo_shown >= 0 || fbcon_decor_active(info, vc))

3446

- 			goto redraw_up;

3447

- 		switch (p->scrollmode) {

3448

- 		case SCROLL_MOVE:

3449

-@@ -1912,6 +1929,8 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir,

3450

- 			count = vc->vc_rows;

3451

- 		if (logo_shown >= 0)

3452

- 			goto redraw_down;

3453

-+		if (fbcon_decor_active(info, vc))

3454

-+			goto redraw_down;

3455

- 		switch (p->scrollmode) {

3456

- 		case SCROLL_MOVE:

3457

- 			fbcon_redraw_blit(vc, info, p, b - 1, b - t - count,

3458

-@@ -2060,6 +2079,13 @@ static void fbcon_bmove_rec(struct vc_data *vc, struct display *p, int sy, int s

3459

- 		}

3460

- 		return;

3461

- 	}

3462

-+

3463

-+	if (fbcon_decor_active(info, vc) && sy == dy && height == 1) {

3464

-+		/* must use slower redraw bmove to keep background pic intact */

3465

-+		fbcon_decor_bmove_redraw(vc, info, sy, sx, dx, width);

3466

-+		return;

3467

-+	}

3468

-+

3469

- 	ops->bmove(vc, info, real_y(p, sy), sx, real_y(p, dy), dx,

3470

- 		   height, width);

3471

- }

3472

-@@ -2130,8 +2156,8 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width,

3473

- 	var.yres = virt_h * virt_fh;

3474

- 	x_diff = info->var.xres - var.xres;

3475

- 	y_diff = info->var.yres - var.yres;

3476

--	if (x_diff < 0 || x_diff > virt_fw ||

3477

--	    y_diff < 0 || y_diff > virt_fh) {

3478

-+	if ((x_diff < 0 || x_diff > virt_fw ||

3479

-+		y_diff < 0 || y_diff > virt_fh) && !vc->vc_decor.state) {

3480

- 		const struct fb_videomode *mode;

3481

-

3482

- 		DPRINTK("attempting resize %ix%i\n", var.xres, var.yres);

3483

-@@ -2168,6 +2194,22 @@ static int fbcon_switch(struct vc_data *vc)

3484

- 	info = registered_fb[con2fb_map[vc->vc_num]];

3485

- 	ops = info->fbcon_par;

3486

-

3487

-+	prev_console = ops->currcon;

3488

-+	if (prev_console != -1)

3489

-+		old_info = registered_fb[con2fb_map[prev_console]];

3490

-+

3491

-+#ifdef CONFIG_FB_CON_DECOR

3492

-+		if (!fbcon_decor_active_vc(vc) && info->fix.visual == FB_VISUAL_DIRECTCOLOR) {

3493

-+			struct vc_data *vc_curr = vc_cons[prev_console].d;

3494

-+			if (vc_curr && fbcon_decor_active_vc(vc_curr)) {

3495

-+				/* Clear the screen to avoid displaying funky colors during

3496

-+				 * * palette updates. */

3497

-+				memset((u8*)info->screen_base + info->fix.line_length * info->var.yoffset,

3498

-+					0, info->var.yres * info->fix.line_length);

3499

-+			}

3500

-+		}

3501

-+#endif

3502

-+

3503

- 	if (softback_top) {

3504

- 		if (softback_lines)

3505

- 			fbcon_set_origin(vc);

3506

-@@ -2185,9 +2227,6 @@ static int fbcon_switch(struct vc_data *vc)

3507

- 		logo_shown = FBCON_LOGO_CANSHOW;

3508

- 	}

3509

-

3510

--	prev_console = ops->currcon;

3511

--	if (prev_console != -1)

3512

--		old_info = registered_fb[con2fb_map[prev_console]];

3513

- 	/*

3514

- 	 * FIXME: If we have multiple fbdev's loaded, we need to

3515

- 	 * update all info->currcon.  Perhaps, we can place this

3516

-@@ -2231,6 +2270,18 @@ static int fbcon_switch(struct vc_data *vc)

3517

- 			fbcon_del_cursor_timer(old_info);

3518

- 	}

3519

-

3520

-+ 	if (fbcon_decor_active_vc(vc)) {

3521

-+		struct vc_data *vc_curr = vc_cons[prev_console].d;

3522

-+

3523

-+		if (!vc_curr->vc_decor.theme ||

3524

-+			strcmp(vc->vc_decor.theme, vc_curr->vc_decor.theme) ||

3525

-+			(fbcon_decor_active_nores(info, vc_curr) &&

3526

-+			!fbcon_decor_active(info, vc_curr))) {

3527

-+				fbcon_decor_disable(vc, 0);

3528

-+				fbcon_decor_call_helper("modechange", vc->vc_num);

3529

-+		}

3530

-+	}

3531

-+

3532

- 	if (fbcon_is_inactive(vc, info) ||

3533

- 	    ops->blank_state != FB_BLANK_UNBLANK)

3534

- 		fbcon_del_cursor_timer(info);

3535

-@@ -2344,10 +2395,14 @@ static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch)

3536

- 			ops->blank_state = blank;

3537

- 			fbcon_cursor(vc, blank ? CM_ERASE : CM_DRAW);

3538

- 			ops->cursor_flash = (!blank);

3539

--

3540

--			if (!(info->flags & FBINFO_MISC_USEREVENT))

3541

--				if (fb_blank(info, blank))

3542

--					fbcon_generic_blank(vc, info, blank);

3543

-+            if (!(info->flags & FBINFO_MISC_USEREVENT)) {

3544

-+				if (fb_blank(info, blank)) {

3545

-+					if (fbcon_decor_active(info, vc))

3546

-+						fbcon_decor_blank(vc, info, blank);

3547

-+					else

3548

-+						fbcon_generic_blank(vc, info, blank);

3549

-+				}

3550

-+			}

3551

- 		}

3552

-

3553

- 		if (!blank)

3554

-@@ -2522,10 +2577,18 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h,

3555

- 	}

3556

-

3557

- 	if (resize) {

3558

-+		/* reset wrap/pan */

3559

- 		int cols, rows;

3560

-

3561

- 		cols = FBCON_SWAP(ops->rotate, info->var.xres, info->var.yres);

3562

- 		rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);

3563

-+

3564

-+		if (fbcon_decor_active(info, vc)) {

3565

-+			info->var.xoffset = info->var.yoffset = p->yscroll = 0;

3566

-+			cols = vc->vc_decor.twidth;

3567

-+			rows = vc->vc_decor.theight;

3568

-+		}

3569

-+

3570

- 		cols /= w;

3571

- 		rows /= h;

3572

- 		vc_resize(vc, cols, rows);

3573

-@@ -2657,7 +2720,11 @@ static int fbcon_set_palette(struct vc_data *vc, unsigned char *table)

3574

- 	int i, j, k, depth;

3575

- 	u8 val;

3576

-

3577

--	if (fbcon_is_inactive(vc, info))

3578

-+	if (fbcon_is_inactive(vc, info)

3579

-+#ifdef CONFIG_FB_CON_DECOR

3580

-+		|| vc->vc_num != fg_console

3581

-+#endif

3582

-+	)

3583

- 		return -EINVAL;

3584

-

3585

- 	if (!CON_IS_VISIBLE(vc))

3586

-@@ -2683,7 +2750,49 @@ static int fbcon_set_palette(struct vc_data *vc, unsigned char *table)

3587

- 	} else

3588

- 		fb_copy_cmap(fb_default_cmap(1 << depth), &palette_cmap);

3589

-

3590

--	return fb_set_cmap(&palette_cmap, info);

3591

-+	if (fbcon_decor_active(info, vc_cons[fg_console].d) &&

3592

-+       info->fix.visual == FB_VISUAL_DIRECTCOLOR) {

3593

-+

3594

-+       u16 *red, *green, *blue;

3595

-+       int minlen = min(min(info->var.red.length, info->var.green.length),

3596

-+                    info->var.blue.length);

3597

-+       int h;

3598

-+

3599

-+       struct fb_cmap cmap = {

3600

-+           .start = 0,

3601

-+           .len = (1 << minlen),

3602

-+           .red = NULL,

3603

-+           .green = NULL,

3604

-+           .blue = NULL,

3605

-+           .transp = NULL

3606

-+       };

3607

-+

3608

-+       red = kmalloc(256 * sizeof(u16) * 3, GFP_KERNEL);

3609

-+

3610

-+       if (!red)

3611

-+           goto out;

3612

-+

3613

-+       green = red + 256;

3614

-+       blue = green + 256;

3615

-+       cmap.red = red;

3616

-+       cmap.green = green;

3617

-+       cmap.blue = blue;

3618

-+

3619

-+       for (i = 0; i < cmap.len; i++) {

3620

-+           red[i] = green[i] = blue[i] = (0xffff * i)/(cmap.len-1);

3621

-+       }

3622

-+

3623

-+       h = fb_set_cmap(&cmap, info);

3624

-+       fbcon_decor_fix_pseudo_pal(info, vc_cons[fg_console].d);

3625

-+       kfree(red);

3626

-+

3627

-+       return h;

3628

-+

3629

-+   } else if (fbcon_decor_active(info, vc_cons[fg_console].d) &&

3630

-+          info->var.bits_per_pixel == 8 && info->bgdecor.cmap.red != NULL)

3631

-+       fb_set_cmap(&info->bgdecor.cmap, info);

3632

-+

3633

-+out:   return fb_set_cmap(&palette_cmap, info);

3634

- }

3635

-

3636

- static u16 *fbcon_screen_pos(struct vc_data *vc, int offset)

3637

-@@ -2909,7 +3018,13 @@ static void fbcon_modechanged(struct fb_info *info)

3638

- 		rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);

3639

- 		cols /= vc->vc_font.width;

3640

- 		rows /= vc->vc_font.height;

3641

--		vc_resize(vc, cols, rows);

3642

-+ 		if (!fbcon_decor_active_nores(info, vc)) {

3643

-+           vc_resize(vc, cols, rows);

3644

-+		} else {

3645

-+        	fbcon_decor_disable(vc, 0);

3646

-+           	fbcon_decor_call_helper("modechange", vc->vc_num);

3647

-+       	}

3648

-+

3649

- 		updatescrollmode(p, info, vc);

3650

- 		scrollback_max = 0;

3651

- 		scrollback_current = 0;

3652

-@@ -2954,7 +3069,10 @@ static void fbcon_set_all_vcs(struct fb_info *info)

3653

- 		rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);

3654

- 		cols /= vc->vc_font.width;

3655

- 		rows /= vc->vc_font.height;

3656

--		vc_resize(vc, cols, rows);

3657

-+		if (!fbcon_decor_active_nores(info, vc)) {

3658

-+        	vc_resize(vc, cols, rows);

3659

-+		}

3660

-+

3661

- 	}

3662

-

3663

- 	if (fg != -1)

3664

-@@ -3570,6 +3688,7 @@ static void fbcon_exit(void)

3665

- 		}

3666

- 	}

3667

-

3668

-+	fbcon_decor_exit();

3669

- 	fbcon_has_exited = 1;

3670

- }

3671

-

3672

---

3673

-1.8.1.5

3674

-

3675

-diff --git a/drivers/video/console/fbcondecor.c b/drivers/video/console/fbcondecor.c

3676

-new file mode 100644

3677

-index 0000000..7189ce6

3678

---- /dev/null

3679

-+++ b/drivers/video/console/fbcondecor.c

3680

-@@ -0,0 +1,555 @@

3681

-+/*

3682

-+ *  linux/drivers/video/console/fbcondecor.c -- Framebuffer console decorations

3683

-+ *

3684

-+ *  Copyright (C) 2004-2009 Michal Januszewski <spock@g.o>

3685

-+ *

3686

-+ *  Code based upon "Bootsplash" (C) 2001-2003

3687

-+ *       Volker Poplawski <volker@×××××××××.de>,

3688

-+ *       Stefan Reinauer <stepan@××××.de>,

3689

-+ *       Steffen Winterfeldt <snwint@××××.de>,

3690

-+ *       Michael Schroeder <mls@××××.de>,

3691

-+ *       Ken Wimer <wimer@××××.de>.

3692

-+ *

3693

-+ *  Compat ioctl support by Thorsten Klein <TK@××××××××××××××.de>.

3694

-+ *

3695

-+ *  This file is subject to the terms and conditions of the GNU General Public

3696

-+ *  License.  See the file COPYING in the main directory of this archive for

3697

-+ *  more details.

3698

-+ *

3699

-+ */

3700

-+#include <linux/module.h>

3701

-+#include <linux/kernel.h>

3702

-+#include <linux/string.h>

3703

-+#include <linux/types.h>

3704

-+#include <linux/fb.h>

3705

-+#include <linux/vt_kern.h>

3706

-+#include <linux/vmalloc.h>

3707

-+#include <linux/unistd.h>

3708

-+#include <linux/syscalls.h>

3709

-+#include <linux/init.h>

3710

-+#include <linux/proc_fs.h>

3711

-+#include <linux/workqueue.h>

3712

-+#include <linux/kmod.h>

3713

-+#include <linux/miscdevice.h>

3714

-+#include <linux/device.h>

3715

-+#include <linux/fs.h>

3716

-+#include <linux/compat.h>

3717

-+#include <linux/console.h>

3718

-+

3719

-+#include <asm/uaccess.h>

3720

-+#include <asm/irq.h>

3721

-+

3722

-+#include "fbcon.h"

3723

-+#include "fbcondecor.h"

3724

-+

3725

-+extern signed char con2fb_map[];

3726

-+static int fbcon_decor_enable(struct vc_data *vc);

3727

-+char fbcon_decor_path[KMOD_PATH_LEN] = "/sbin/fbcondecor_helper";

3728

-+static int initialized = 0;

3729

-+

3730

-+int fbcon_decor_call_helper(char* cmd, unsigned short vc)

3731

-+{

3732

-+	char *envp[] = {

3733

-+		"HOME=/",

3734

-+		"PATH=/sbin:/bin",

3735

-+		NULL

3736

-+	};

3737

-+

3738

-+	char tfb[5];

3739

-+	char tcons[5];

3740

-+	unsigned char fb = (int) con2fb_map[vc];

3741

-+

3742

-+	char *argv[] = {

3743

-+		fbcon_decor_path,

3744

-+		"2",

3745

-+		cmd,

3746

-+		tcons,

3747

-+		tfb,

3748

-+		vc_cons[vc].d->vc_decor.theme,

3749

-+		NULL

3750

-+	};

3751

-+

3752

-+	snprintf(tfb,5,"%d",fb);

3753

-+	snprintf(tcons,5,"%d",vc);

3754

-+

3755

-+	return call_usermodehelper(fbcon_decor_path, argv, envp, UMH_WAIT_EXEC);

3756

-+}

3757

-+

3758

-+/* Disables fbcondecor on a virtual console; called with console sem held. */

3759

-+int fbcon_decor_disable(struct vc_data *vc, unsigned char redraw)

3760

-+{

3761

-+	struct fb_info* info;

3762

-+

3763

-+	if (!vc->vc_decor.state)

3764

-+		return -EINVAL;

3765

-+

3766

-+	info = registered_fb[(int) con2fb_map[vc->vc_num]];

3767

-+

3768

-+	if (info == NULL)

3769

-+		return -EINVAL;

3770

-+

3771

-+	vc->vc_decor.state = 0;

3772

-+	vc_resize(vc, info->var.xres / vc->vc_font.width,

3773

-+		  info->var.yres / vc->vc_font.height);

3774

-+

3775

-+	if (fg_console == vc->vc_num && redraw) {

3776

-+		redraw_screen(vc, 0);

3777

-+		update_region(vc, vc->vc_origin +

3778

-+			      vc->vc_size_row * vc->vc_top,

3779

-+			      vc->vc_size_row * (vc->vc_bottom - vc->vc_top) / 2);

3780

-+	}

3781

-+

3782

-+	printk(KERN_INFO "fbcondecor: switched decor state to 'off' on console %d\n",

3783

-+			 vc->vc_num);

3784

-+

3785

-+	return 0;

3786

-+}

3787

-+

3788

-+/* Enables fbcondecor on a virtual console; called with console sem held. */

3789

-+static int fbcon_decor_enable(struct vc_data *vc)

3790

-+{

3791

-+	struct fb_info* info;

3792

-+

3793

-+	info = registered_fb[(int) con2fb_map[vc->vc_num]];

3794

-+

3795

-+	if (vc->vc_decor.twidth == 0 || vc->vc_decor.theight == 0 ||

3796

-+	    info == NULL || vc->vc_decor.state || (!info->bgdecor.data &&

3797

-+	    vc->vc_num == fg_console))

3798

-+		return -EINVAL;

3799

-+

3800

-+	vc->vc_decor.state = 1;

3801

-+	vc_resize(vc, vc->vc_decor.twidth / vc->vc_font.width,

3802

-+		  vc->vc_decor.theight / vc->vc_font.height);

3803

-+

3804

-+	if (fg_console == vc->vc_num) {

3805

-+		redraw_screen(vc, 0);

3806

-+		update_region(vc, vc->vc_origin +

3807

-+			      vc->vc_size_row * vc->vc_top,

3808

-+			      vc->vc_size_row * (vc->vc_bottom - vc->vc_top) / 2);

3809

-+		fbcon_decor_clear_margins(vc, info, 0);

3810

-+	}

3811

-+

3812

-+	printk(KERN_INFO "fbcondecor: switched decor state to 'on' on console %d\n",

3813

-+			 vc->vc_num);

3814

-+

3815

-+	return 0;

3816

-+}

3817

-+

3818

-+static inline int fbcon_decor_ioctl_dosetstate(struct vc_data *vc, unsigned int state, unsigned char origin)

3819

-+{

3820

-+	int ret;

3821

-+

3822

-+//	if (origin == FBCON_DECOR_IO_ORIG_USER)

3823

-+		console_lock();

3824

-+	if (!state)

3825

-+		ret = fbcon_decor_disable(vc, 1);

3826

-+	else

3827

-+		ret = fbcon_decor_enable(vc);

3828

-+//	if (origin == FBCON_DECOR_IO_ORIG_USER)

3829

-+		console_unlock();

3830

-+

3831

-+	return ret;

3832

-+}

3833

-+

3834

-+static inline void fbcon_decor_ioctl_dogetstate(struct vc_data *vc, unsigned int *state)

3835

-+{

3836

-+	*state = vc->vc_decor.state;

3837

-+}

3838

-+

3839

-+static int fbcon_decor_ioctl_dosetcfg(struct vc_data *vc, struct vc_decor *cfg, unsigned char origin)

3840

-+{

3841

-+	struct fb_info *info;

3842

-+	int len;

3843

-+	char *tmp;

3844

-+

3845

-+	info = registered_fb[(int) con2fb_map[vc->vc_num]];

3846

-+

3847

-+	if (info == NULL || !cfg->twidth || !cfg->theight ||

3848

-+	    cfg->tx + cfg->twidth  > info->var.xres ||

3849

-+	    cfg->ty + cfg->theight > info->var.yres)

3850

-+		return -EINVAL;

3851

-+

3852

-+	len = strlen_user(cfg->theme);

3853

-+	if (!len || len > FBCON_DECOR_THEME_LEN)

3854

-+		return -EINVAL;

3855

-+	tmp = kmalloc(len, GFP_KERNEL);

3856

-+	if (!tmp)

3857

-+		return -ENOMEM;

3858

-+	if (copy_from_user(tmp, (void __user *)cfg->theme, len))

3859

-+		return -EFAULT;

3860

-+	cfg->theme = tmp;

3861

-+	cfg->state = 0;

3862

-+

3863

-+	/* If this ioctl is a response to a request from kernel, the console sem

3864

-+	 * is already held; we also don't need to disable decor because either the

3865

-+	 * new config and background picture will be successfully loaded, and the

3866

-+	 * decor will stay on, or in case of a failure it'll be turned off in fbcon. */

3867

-+//	if (origin == FBCON_DECOR_IO_ORIG_USER) {

3868

-+		console_lock();

3869

-+		if (vc->vc_decor.state)

3870

-+			fbcon_decor_disable(vc, 1);

3871

-+//	}

3872

-+

3873

-+	if (vc->vc_decor.theme)

3874

-+		kfree(vc->vc_decor.theme);

3875

-+

3876

-+	vc->vc_decor = *cfg;

3877

-+

3878

-+//	if (origin == FBCON_DECOR_IO_ORIG_USER)

3879

-+		console_unlock();

3880

-+

3881

-+	printk(KERN_INFO "fbcondecor: console %d using theme '%s'\n",

3882

-+			 vc->vc_num, vc->vc_decor.theme);

3883

-+	return 0;

3884

-+}

3885

-+

3886

-+static int fbcon_decor_ioctl_dogetcfg(struct vc_data *vc, struct vc_decor *decor)

3887

-+{

3888

-+	char __user *tmp;

3889

-+

3890

-+	tmp = decor->theme;

3891

-+	*decor = vc->vc_decor;

3892

-+	decor->theme = tmp;

3893

-+

3894

-+	if (vc->vc_decor.theme) {

3895

-+		if (copy_to_user(tmp, vc->vc_decor.theme, strlen(vc->vc_decor.theme) + 1))

3896

-+			return -EFAULT;

3897

-+	} else

3898

-+		if (put_user(0, tmp))

3899

-+			return -EFAULT;

3900

-+

3901

-+	return 0;

3902

-+}

3903

-+

3904

-+static int fbcon_decor_ioctl_dosetpic(struct vc_data *vc, struct fb_image *img, unsigned char origin)

3905

-+{

3906

-+	struct fb_info *info;

3907

-+	int len;

3908

-+	u8 *tmp;

3909

-+

3910

-+	if (vc->vc_num != fg_console)

3911

-+		return -EINVAL;

3912

-+

3913

-+	info = registered_fb[(int) con2fb_map[vc->vc_num]];

3914

-+

3915

-+	if (info == NULL)

3916

-+		return -EINVAL;

3917

-+

3918

-+	if (img->width != info->var.xres || img->height != info->var.yres) {

3919

-+		printk(KERN_ERR "fbcondecor: picture dimensions mismatch\n");

3920

-+		printk(KERN_ERR "%dx%d vs %dx%d\n", img->width, img->height, info->var.xres, info->var.yres);

3921

-+		return -EINVAL;

3922

-+	}

3923

-+

3924

-+	if (img->depth != info->var.bits_per_pixel) {

3925

-+		printk(KERN_ERR "fbcondecor: picture depth mismatch\n");

3926

-+		return -EINVAL;

3927

-+	}

3928

-+

3929

-+	if (img->depth == 8) {

3930

-+		if (!img->cmap.len || !img->cmap.red || !img->cmap.green ||

3931

-+		    !img->cmap.blue)

3932

-+			return -EINVAL;

3933

-+

3934

-+		tmp = vmalloc(img->cmap.len * 3 * 2);

3935

-+		if (!tmp)

3936

-+			return -ENOMEM;

3937

-+

3938

-+		if (copy_from_user(tmp,

3939

-+			    	   (void __user*)img->cmap.red, (img->cmap.len << 1)) ||

3940

-+		    copy_from_user(tmp + (img->cmap.len << 1),

3941

-+			    	   (void __user*)img->cmap.green, (img->cmap.len << 1)) ||

3942

-+		    copy_from_user(tmp + (img->cmap.len << 2),

3943

-+			    	   (void __user*)img->cmap.blue, (img->cmap.len << 1))) {

3944

-+			vfree(tmp);

3945

-+			return -EFAULT;

3946

-+		}

3947

-+

3948

-+		img->cmap.transp = NULL;

3949

-+		img->cmap.red = (u16*)tmp;

3950

-+		img->cmap.green = img->cmap.red + img->cmap.len;

3951

-+		img->cmap.blue = img->cmap.green + img->cmap.len;

3952

-+	} else {

3953

-+		img->cmap.red = NULL;

3954

-+	}

3955

-+

3956

-+	len = ((img->depth + 7) >> 3) * img->width * img->height;

3957

-+

3958

-+	/*

3959

-+	 * Allocate an additional byte so that we never go outside of the

3960

-+	 * buffer boundaries in the rendering functions in a 24 bpp mode.

3961

-+	 */

3962

-+	tmp = vmalloc(len + 1);

3963

-+

3964

-+	if (!tmp)

3965

-+		goto out;

3966

-+

3967

-+	if (copy_from_user(tmp, (void __user*)img->data, len))

3968

-+		goto out;

3969

-+

3970

-+	img->data = tmp;

3971

-+

3972

-+	/* If this ioctl is a response to a request from kernel, the console sem

3973

-+	 * is already held. */

3974

-+//	if (origin == FBCON_DECOR_IO_ORIG_USER)

3975

-+		console_lock();

3976

-+

3977

-+	if (info->bgdecor.data)

3978

-+		vfree((u8*)info->bgdecor.data);

3979

-+	if (info->bgdecor.cmap.red)

3980

-+		vfree(info->bgdecor.cmap.red);

3981

-+

3982

-+	info->bgdecor = *img;

3983

-+

3984

-+	if (fbcon_decor_active_vc(vc) && fg_console == vc->vc_num) {

3985

-+		redraw_screen(vc, 0);

3986

-+		update_region(vc, vc->vc_origin +

3987

-+			      vc->vc_size_row * vc->vc_top,

3988

-+			      vc->vc_size_row * (vc->vc_bottom - vc->vc_top) / 2);

3989

-+		fbcon_decor_clear_margins(vc, info, 0);

3990

-+	}

3991

-+

3992

-+//	if (origin == FBCON_DECOR_IO_ORIG_USER)

3993

-+		console_unlock();

3994

-+

3995

-+	return 0;

3996

-+

3997

-+out:	if (img->cmap.red)

3998

-+		vfree(img->cmap.red);

3999

-+

4000

-+	if (tmp)

4001

-+		vfree(tmp);

4002

-+	return -ENOMEM;

4003

-+}

4004

-+

4005

-+static long fbcon_decor_ioctl(struct file *filp, u_int cmd, u_long arg)

4006

-+{

4007

-+	struct fbcon_decor_iowrapper __user *wrapper = (void __user*) arg;

4008

-+	struct vc_data *vc = NULL;

4009

-+	unsigned short vc_num = 0;

4010

-+	unsigned char origin = 0;

4011

-+	void __user *data = NULL;

4012

-+

4013

-+	if (!access_ok(VERIFY_READ, wrapper,

4014

-+			sizeof(struct fbcon_decor_iowrapper)))

4015

-+		return -EFAULT;

4016

-+

4017

-+	__get_user(vc_num, &wrapper->vc);

4018

-+	__get_user(origin, &wrapper->origin);

4019

-+	__get_user(data, &wrapper->data);

4020

-+

4021

-+	if (!vc_cons_allocated(vc_num))

4022

-+		return -EINVAL;

4023

-+

4024

-+	vc = vc_cons[vc_num].d;

4025

-+

4026

-+	switch (cmd) {

4027

-+	case FBIOCONDECOR_SETPIC:

4028

-+	{

4029

-+		struct fb_image img;

4030

-+		if (copy_from_user(&img, (struct fb_image __user *)data, sizeof(struct fb_image)))

4031

-+			return -EFAULT;

4032

-+

4033

-+		return fbcon_decor_ioctl_dosetpic(vc, &img, origin);

4034

-+	}

4035

-+	case FBIOCONDECOR_SETCFG:

4036

-+	{

4037

-+		struct vc_decor cfg;

4038

-+		if (copy_from_user(&cfg, (struct vc_decor __user *)data, sizeof(struct vc_decor)))

4039

-+			return -EFAULT;

4040

-+

4041

-+		return fbcon_decor_ioctl_dosetcfg(vc, &cfg, origin);

4042

-+	}

4043

-+	case FBIOCONDECOR_GETCFG:

4044

-+	{

4045

-+		int rval;

4046

-+		struct vc_decor cfg;

4047

-+

4048

-+		if (copy_from_user(&cfg, (struct vc_decor __user *)data, sizeof(struct vc_decor)))

4049

-+			return -EFAULT;

4050

-+

4051

-+		rval = fbcon_decor_ioctl_dogetcfg(vc, &cfg);

4052

-+

4053

-+		if (copy_to_user(data, &cfg, sizeof(struct vc_decor)))

4054

-+			return -EFAULT;

4055

-+		return rval;

4056

-+	}

4057

-+	case FBIOCONDECOR_SETSTATE:

4058

-+	{

4059

-+		unsigned int state = 0;

4060

-+		if (get_user(state, (unsigned int __user *)data))

4061

-+			return -EFAULT;

4062

-+		return fbcon_decor_ioctl_dosetstate(vc, state, origin);

4063

-+	}

4064

-+	case FBIOCONDECOR_GETSTATE:

4065

-+	{

4066

-+		unsigned int state = 0;

4067

-+		fbcon_decor_ioctl_dogetstate(vc, &state);

4068

-+		return put_user(state, (unsigned int __user *)data);

4069

-+	}

4070

-+

4071

-+	default:

4072

-+		return -ENOIOCTLCMD;

4073

-+	}

4074

-+}

4075

-+

4076

-+#ifdef CONFIG_COMPAT

4077

-+

4078

-+static long fbcon_decor_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) {

4079

-+

4080

-+	struct fbcon_decor_iowrapper32 __user *wrapper = (void __user *)arg;

4081

-+	struct vc_data *vc = NULL;

4082

-+	unsigned short vc_num = 0;

4083

-+	unsigned char origin = 0;

4084

-+	compat_uptr_t data_compat = 0;

4085

-+	void __user *data = NULL;

4086

-+

4087

-+	if (!access_ok(VERIFY_READ, wrapper,

4088

-+                       sizeof(struct fbcon_decor_iowrapper32)))

4089

-+		return -EFAULT;

4090

-+

4091

-+	__get_user(vc_num, &wrapper->vc);

4092

-+	__get_user(origin, &wrapper->origin);

4093

-+	__get_user(data_compat, &wrapper->data);

4094

-+	data = compat_ptr(data_compat);

4095

-+

4096

-+	if (!vc_cons_allocated(vc_num))

4097

-+		return -EINVAL;

4098

-+

4099

-+	vc = vc_cons[vc_num].d;

4100

-+

4101

-+	switch (cmd) {

4102

-+	case FBIOCONDECOR_SETPIC32:

4103

-+	{

4104

-+		struct fb_image32 img_compat;

4105

-+		struct fb_image img;

4106

-+

4107

-+		if (copy_from_user(&img_compat, (struct fb_image32 __user *)data, sizeof(struct fb_image32)))

4108

-+			return -EFAULT;

4109

-+

4110

-+		fb_image_from_compat(img, img_compat);

4111

-+

4112

-+		return fbcon_decor_ioctl_dosetpic(vc, &img, origin);

4113

-+	}

4114

-+

4115

-+	case FBIOCONDECOR_SETCFG32:

4116

-+	{

4117

-+		struct vc_decor32 cfg_compat;

4118

-+		struct vc_decor cfg;

4119

-+

4120

-+		if (copy_from_user(&cfg_compat, (struct vc_decor32 __user *)data, sizeof(struct vc_decor32)))

4121

-+			return -EFAULT;

4122

-+

4123

-+		vc_decor_from_compat(cfg, cfg_compat);

4124

-+

4125

-+		return fbcon_decor_ioctl_dosetcfg(vc, &cfg, origin);

4126

-+	}

4127

-+

4128

-+	case FBIOCONDECOR_GETCFG32:

4129

-+	{

4130

-+		int rval;

4131

-+		struct vc_decor32 cfg_compat;

4132

-+		struct vc_decor cfg;

4133

-+

4134

-+		if (copy_from_user(&cfg_compat, (struct vc_decor32 __user *)data, sizeof(struct vc_decor32)))

4135

-+			return -EFAULT;

4136

-+		cfg.theme = compat_ptr(cfg_compat.theme);

4137

-+

4138

-+		rval = fbcon_decor_ioctl_dogetcfg(vc, &cfg);

4139

-+

4140

-+		vc_decor_to_compat(cfg_compat, cfg);

4141

-+

4142

-+		if (copy_to_user((struct vc_decor32 __user *)data, &cfg_compat, sizeof(struct vc_decor32)))

4143

-+			return -EFAULT;

4144

-+		return rval;

4145

-+	}

4146

-+

4147

-+	case FBIOCONDECOR_SETSTATE32:

4148

-+	{

4149

-+		compat_uint_t state_compat = 0;

4150

-+		unsigned int state = 0;

4151

-+

4152

-+		if (get_user(state_compat, (compat_uint_t __user *)data))

4153

-+			return -EFAULT;

4154

-+

4155

-+		state = (unsigned int)state_compat;

4156

-+

4157

-+		return fbcon_decor_ioctl_dosetstate(vc, state, origin);

4158

-+	}

4159

-+

4160

-+	case FBIOCONDECOR_GETSTATE32:

4161

-+	{

4162

-+		compat_uint_t state_compat = 0;

4163

-+		unsigned int state = 0;

4164

-+

4165

-+		fbcon_decor_ioctl_dogetstate(vc, &state);

4166

-+		state_compat = (compat_uint_t)state;

4167

-+

4168

-+		return put_user(state_compat, (compat_uint_t __user *)data);

4169

-+	}

4170

-+

4171

-+	default:

4172

-+		return -ENOIOCTLCMD;

4173

-+	}

4174

-+}

4175

-+#else

4176

-+  #define fbcon_decor_compat_ioctl NULL

4177

-+#endif

4178

-+

4179

-+static struct file_operations fbcon_decor_ops = {

4180

-+	.owner = THIS_MODULE,

4181

-+	.unlocked_ioctl = fbcon_decor_ioctl,

4182

-+	.compat_ioctl = fbcon_decor_compat_ioctl

4183

-+};

4184

-+

4185

-+static struct miscdevice fbcon_decor_dev = {

4186

-+	.minor = MISC_DYNAMIC_MINOR,

4187

-+	.name = "fbcondecor",

4188

-+	.fops = &fbcon_decor_ops

4189

-+};

4190

-+

4191

-+void fbcon_decor_reset()

4192

-+{

4193

-+	int i;

4194

-+

4195

-+	for (i = 0; i < num_registered_fb; i++) {

4196

-+		registered_fb[i]->bgdecor.data = NULL;

4197

-+		registered_fb[i]->bgdecor.cmap.red = NULL;

4198

-+	}

4199

-+

4200

-+	for (i = 0; i < MAX_NR_CONSOLES && vc_cons[i].d; i++) {

4201

-+		vc_cons[i].d->vc_decor.state = vc_cons[i].d->vc_decor.twidth =

4202

-+						vc_cons[i].d->vc_decor.theight = 0;

4203

-+		vc_cons[i].d->vc_decor.theme = NULL;

4204

-+	}

4205

-+

4206

-+	return;

4207

-+}

4208

-+

4209

-+int fbcon_decor_init()

4210

-+{

4211

-+	int i;

4212

-+

4213

-+	fbcon_decor_reset();

4214

-+

4215

-+	if (initialized)

4216

-+		return 0;

4217

-+

4218

-+	i = misc_register(&fbcon_decor_dev);

4219

-+	if (i) {

4220

-+		printk(KERN_ERR "fbcondecor: failed to register device\n");

4221

-+		return i;

4222

-+	}

4223

-+

4224

-+	fbcon_decor_call_helper("init", 0);

4225

-+	initialized = 1;

4226

-+	return 0;

4227

-+}

4228

-+

4229

-+int fbcon_decor_exit(void)

4230

-+{

4231

-+	fbcon_decor_reset();

4232

-+	return 0;

4233

-+}

4234

-+

4235

-+EXPORT_SYMBOL(fbcon_decor_path);

4236

---- a/drivers/video/console/fbcondecor.h	2014-01-21 09:50:42.229900176 -0500

4237

-+++ b/drivers/video/console/fbcondecor.h	2014-01-21 09:50:21.489900340 -0500

4238

-@@ -0,0 +1,79 @@

4239

-+/* 

4240

-+ *  linux/drivers/video/console/fbcondecor.h -- Framebuffer Console Decoration headers

4241

-+ *

4242

-+ *  Copyright (C) 2004 Michal Januszewski <spock@g.o>

4243

-+ *

4244

-+ */

4245

-+

4246

-+#ifndef __FBCON_DECOR_H

4247

-+#define __FBCON_DECOR_H

4248

-+

4249

-+#ifndef _LINUX_FB_H

4250

-+#include <linux/fb.h>

4251

-+#endif

4252

-+

4253

-+/* This is needed for vc_cons in fbcmap.c */

4254

-+#include <linux/vt_kern.h>

4255

-+

4256

-+struct fb_cursor;

4257

-+struct fb_info;

4258

-+struct vc_data;

4259

-+

4260

-+#ifdef CONFIG_FB_CON_DECOR

4261

-+/* fbcondecor.c */

4262

-+int fbcon_decor_init(void);

4263

-+int fbcon_decor_exit(void);

4264

-+int fbcon_decor_call_helper(char* cmd, unsigned short cons);

4265

-+int fbcon_decor_disable(struct vc_data *vc, unsigned char redraw);

4266

-+void fbcon_decor_reset(void);

4267

-+

4268

-+/* cfbcondecor.c */

4269

-+void fbcon_decor_putcs(struct vc_data *vc, struct fb_info *info, const unsigned short *s, int count, int yy, int xx);

4270

-+void fbcon_decor_cursor(struct fb_info *info, struct fb_cursor *cursor);

4271

-+void fbcon_decor_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width);

4272

-+void fbcon_decor_clear_margins(struct vc_data *vc, struct fb_info *info, int bottom_only);

4273

-+void fbcon_decor_blank(struct vc_data *vc, struct fb_info *info, int blank);

4274

-+void fbcon_decor_bmove_redraw(struct vc_data *vc, struct fb_info *info, int y, int sx, int dx, int width);

4275

-+void fbcon_decor_copy(u8 *dst, u8 *src, int height, int width, int linebytes, int srclinesbytes, int bpp);

4276

-+void fbcon_decor_fix_pseudo_pal(struct fb_info *info, struct vc_data *vc);

4277

-+

4278

-+/* vt.c */

4279

-+void acquire_console_sem(void);

4280

-+void release_console_sem(void);

4281

-+void do_unblank_screen(int entering_gfx);

4282

-+

4283

-+/* struct vc_data *y */

4284

-+#define fbcon_decor_active_vc(y) (y->vc_decor.state && y->vc_decor.theme) 

4285

-+

4286

-+/* struct fb_info *x, struct vc_data *y */

4287

-+#define fbcon_decor_active_nores(x,y) (x->bgdecor.data && fbcon_decor_active_vc(y))

4288

-+

4289

-+/* struct fb_info *x, struct vc_data *y */

4290

-+#define fbcon_decor_active(x,y) (fbcon_decor_active_nores(x,y) &&		\

4291

-+			      x->bgdecor.width == x->var.xres && 	\

4292

-+			      x->bgdecor.height == x->var.yres &&	\

4293

-+			      x->bgdecor.depth == x->var.bits_per_pixel)

4294

-+

4295

-+

4296

-+#else /* CONFIG_FB_CON_DECOR */

4297

-+

4298

-+static inline void fbcon_decor_putcs(struct vc_data *vc, struct fb_info *info, const unsigned short *s, int count, int yy, int xx) {}

4299

-+static inline void fbcon_decor_putc(struct vc_data *vc, struct fb_info *info, int c, int ypos, int xpos) {}

4300

-+static inline void fbcon_decor_cursor(struct fb_info *info, struct fb_cursor *cursor) {}

4301

-+static inline void fbcon_decor_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width) {}

4302

-+static inline void fbcon_decor_clear_margins(struct vc_data *vc, struct fb_info *info, int bottom_only) {}

4303

-+static inline void fbcon_decor_blank(struct vc_data *vc, struct fb_info *info, int blank) {}

4304

-+static inline void fbcon_decor_bmove_redraw(struct vc_data *vc, struct fb_info *info, int y, int sx, int dx, int width) {}

4305

-+static inline void fbcon_decor_fix_pseudo_pal(struct fb_info *info, struct vc_data *vc) {}

4306

-+static inline int fbcon_decor_call_helper(char* cmd, unsigned short cons) { return 0; }

4307

-+static inline int fbcon_decor_init(void) { return 0; }

4308

-+static inline int fbcon_decor_exit(void) { return 0; }

4309

-+static inline int fbcon_decor_disable(struct vc_data *vc, unsigned char redraw) { return 0; }

4310

-+

4311

-+#define fbcon_decor_active_vc(y) (0)

4312

-+#define fbcon_decor_active_nores(x,y) (0)

4313

-+#define fbcon_decor_active(x,y) (0)

4314

-+

4315

-+#endif /* CONFIG_FB_CON_DECOR */

4316

-+

4317

-+#endif /* __FBCON_DECOR_H */

4318

-diff --git a/drivers/video/fbcmap.c b/drivers/video/fbcmap.c

4319

-index 5c3960d..162b5f4 100644

4320

---- a/drivers/video/fbcmap.c

4321

-+++ b/drivers/video/fbcmap.c

4322

-@@ -17,6 +17,8 @@

4323

- #include <linux/slab.h>

4324

- #include <linux/uaccess.h>

4325

-

4326

-+#include "console/fbcondecor.h"

4327

-+

4328

- static u16 red2[] __read_mostly = {

4329

-     0x0000, 0xaaaa

4330

- };

4331

-@@ -249,14 +251,17 @@ int fb_set_cmap(struct fb_cmap *cmap, struct fb_info *info)

4332

- 			if (transp)

4333

- 				htransp = *transp++;

4334

- 			if (info->fbops->fb_setcolreg(start++,

4335

--						      hred, hgreen, hblue,

4336

-+						      hred, hgreen, hblue, 

4337

- 						      htransp, info))

4338

- 				break;

4339

- 		}

4340

- 	}

4341

--	if (rc == 0)

4342

-+	if (rc == 0) {

4343

- 		fb_copy_cmap(cmap, &info->cmap);

4344

--

4345

-+		if (fbcon_decor_active(info, vc_cons[fg_console].d) &&

4346

-+		    info->fix.visual == FB_VISUAL_DIRECTCOLOR)

4347

-+			fbcon_decor_fix_pseudo_pal(info, vc_cons[fg_console].d);

4348

-+	}

4349

- 	return rc;

4350

- }

4351

-

4352

-diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c

4353

-index c6ce416..7ce6640 100644

4354

---- a/drivers/video/fbmem.c

4355

-+++ b/drivers/video/fbmem.c

4356

-@@ -1231,15 +1231,6 @@ struct fb_fix_screeninfo32 {

4357

- 	u16			reserved[3];

4358

- };

4359

-

4360

--struct fb_cmap32 {

4361

--	u32			start;

4362

--	u32			len;

4363

--	compat_caddr_t	red;

4364

--	compat_caddr_t	green;

4365

--	compat_caddr_t	blue;

4366

--	compat_caddr_t	transp;

4367

--};

4368

--

4369

- static int fb_getput_cmap(struct fb_info *info, unsigned int cmd,

4370

- 			  unsigned long arg)

4371

- {

4372

-diff --git a/include/linux/console_decor.h b/include/linux/console_decor.h

4373

-new file mode 100644

4374

-index 0000000..04b8d80

4375

---- /dev/null

4376

-+++ b/include/linux/console_decor.h

4377

-@@ -0,0 +1,46 @@

4378

-+#ifndef _LINUX_CONSOLE_DECOR_H_

4379

-+#define _LINUX_CONSOLE_DECOR_H_ 1

4380

-+

4381

-+/* A structure used by the framebuffer console decorations (drivers/video/console/fbcondecor.c) */

4382

-+struct vc_decor {

4383

-+	__u8 bg_color;				/* The color that is to be treated as transparent */

4384

-+	__u8 state;				/* Current decor state: 0 = off, 1 = on */

4385

-+	__u16 tx, ty;				/* Top left corner coordinates of the text field */

4386

-+	__u16 twidth, theight;			/* Width and height of the text field */

4387

-+	char* theme;

4388

-+};

4389

-+

4390

-+#ifdef __KERNEL__

4391

-+#ifdef CONFIG_COMPAT

4392

-+#include <linux/compat.h>

4393

-+

4394

-+struct vc_decor32 {

4395

-+	__u8 bg_color;				/* The color that is to be treated as transparent */

4396

-+	__u8 state;				/* Current decor state: 0 = off, 1 = on */

4397

-+	__u16 tx, ty;				/* Top left corner coordinates of the text field */

4398

-+	__u16 twidth, theight;			/* Width and height of the text field */

4399

-+	compat_uptr_t theme;

4400

-+};

4401

-+

4402

-+#define vc_decor_from_compat(to, from) \

4403

-+	(to).bg_color = (from).bg_color; \

4404

-+	(to).state    = (from).state; \

4405

-+	(to).tx       = (from).tx; \

4406

-+	(to).ty       = (from).ty; \

4407

-+	(to).twidth   = (from).twidth; \

4408

-+	(to).theight  = (from).theight; \

4409

-+	(to).theme    = compat_ptr((from).theme)

4410

-+

4411

-+#define vc_decor_to_compat(to, from) \

4412

-+	(to).bg_color = (from).bg_color; \

4413

-+	(to).state    = (from).state; \

4414

-+	(to).tx       = (from).tx; \

4415

-+	(to).ty       = (from).ty; \

4416

-+	(to).twidth   = (from).twidth; \

4417

-+	(to).theight  = (from).theight; \

4418

-+	(to).theme    = ptr_to_compat((from).theme)

4419

-+

4420

-+#endif /* CONFIG_COMPAT */

4421

-+#endif /* __KERNEL__ */

4422

-+

4423

-+#endif

4424

-diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h

4425

-index 7f0c329..98f5d60 100644

4426

---- a/include/linux/console_struct.h

4427

-+++ b/include/linux/console_struct.h

4428

-@@ -19,6 +19,7 @@

4429

- struct vt_struct;

4430

-

4431

- #define NPAR 16

4432

-+#include <linux/console_decor.h>

4433

-

4434

- struct vc_data {

4435

- 	struct tty_port port;			/* Upper level data */

4436

-@@ -107,6 +108,8 @@ struct vc_data {

4437

- 	unsigned long	vc_uni_pagedir;

4438

- 	unsigned long	*vc_uni_pagedir_loc;  /* [!] Location of uni_pagedir variable for this console */

4439

- 	bool vc_panic_force_write; /* when oops/panic this VC can accept forced output/blanking */

4440

-+

4441

-+	struct vc_decor vc_decor;

4442

- 	/* additional information is in vt_kern.h */

4443

- };

4444

-

4445

-diff --git a/include/uapi/linux/fb.h b/include/uapi/linux/fb.h

4446

-index d31cb68..ad161bb 100644

4447

---- a/include/uapi/linux/fb.h

4448

-+++ b/include/uapi/linux/fb.h

4449

-@@ -8,6 +8,25 @@

4450

-

4451

- #define FB_MAX			32	/* sufficient for now */

4452

-

4453

-+struct fbcon_decor_iowrapper

4454

-+{

4455

-+	unsigned short vc;		/* Virtual console */

4456

-+	unsigned char origin;		/* Point of origin of the request */

4457

-+	void *data;

4458

-+};

4459

-+

4460

-+#ifdef __KERNEL__

4461

-+#ifdef CONFIG_COMPAT

4462

-+#include <linux/compat.h>

4463

-+struct fbcon_decor_iowrapper32

4464

-+{

4465

-+	unsigned short vc;		/* Virtual console */

4466

-+	unsigned char origin;		/* Point of origin of the request */

4467

-+	compat_uptr_t data;

4468

-+};

4469

-+#endif /* CONFIG_COMPAT */

4470

-+#endif /* __KERNEL__ */

4471

-+

4472

- /* ioctls

4473

-    0x46 is 'F'								*/

4474

- #define FBIOGET_VSCREENINFO	0x4600

4475

-@@ -34,6 +53,24 @@

4476

- #define FBIOPUT_MODEINFO        0x4617

4477

- #define FBIOGET_DISPINFO        0x4618

4478

- #define FBIO_WAITFORVSYNC	_IOW('F', 0x20, __u32)

4479

-+#define FBIOCONDECOR_SETCFG	_IOWR('F', 0x19, struct fbcon_decor_iowrapper)

4480

-+#define FBIOCONDECOR_GETCFG	_IOR('F', 0x1A, struct fbcon_decor_iowrapper)

4481

-+#define FBIOCONDECOR_SETSTATE	_IOWR('F', 0x1B, struct fbcon_decor_iowrapper)

4482

-+#define FBIOCONDECOR_GETSTATE	_IOR('F', 0x1C, struct fbcon_decor_iowrapper)

4483

-+#define FBIOCONDECOR_SETPIC 	_IOWR('F', 0x1D, struct fbcon_decor_iowrapper)

4484

-+#ifdef __KERNEL__

4485

-+#ifdef CONFIG_COMPAT

4486

-+#define FBIOCONDECOR_SETCFG32	_IOWR('F', 0x19, struct fbcon_decor_iowrapper32)

4487

-+#define FBIOCONDECOR_GETCFG32	_IOR('F', 0x1A, struct fbcon_decor_iowrapper32)

4488

-+#define FBIOCONDECOR_SETSTATE32	_IOWR('F', 0x1B, struct fbcon_decor_iowrapper32)

4489

-+#define FBIOCONDECOR_GETSTATE32	_IOR('F', 0x1C, struct fbcon_decor_iowrapper32)

4490

-+#define FBIOCONDECOR_SETPIC32	_IOWR('F', 0x1D, struct fbcon_decor_iowrapper32)

4491

-+#endif /* CONFIG_COMPAT */

4492

-+#endif /* __KERNEL__ */

4493

-+

4494

-+#define FBCON_DECOR_THEME_LEN		128	/* Maximum lenght of a theme name */

4495

-+#define FBCON_DECOR_IO_ORIG_KERNEL	0	/* Kernel ioctl origin */

4496

-+#define FBCON_DECOR_IO_ORIG_USER	1	/* User ioctl origin */

4497

-

4498

- #define FB_TYPE_PACKED_PIXELS		0	/* Packed Pixels	*/

4499

- #define FB_TYPE_PLANES			1	/* Non interleaved planes */

4500

-@@ -286,6 +323,28 @@ struct fb_cmap {

4501

- 	__u16 *transp;			/* transparency, can be NULL */

4502

- };

4503

-

4504

-+#ifdef __KERNEL__

4505

-+#ifdef CONFIG_COMPAT

4506

-+struct fb_cmap32 {

4507

-+	__u32 start;

4508

-+	__u32 len;			/* Number of entries */

4509

-+	compat_uptr_t red;		/* Red values	*/

4510

-+	compat_uptr_t green;

4511

-+	compat_uptr_t blue;

4512

-+	compat_uptr_t transp;		/* transparency, can be NULL */

4513

-+};

4514

-+

4515

-+#define fb_cmap_from_compat(to, from) \

4516

-+	(to).start  = (from).start; \

4517

-+	(to).len    = (from).len; \

4518

-+	(to).red    = compat_ptr((from).red); \

4519

-+	(to).green  = compat_ptr((from).green); \

4520

-+	(to).blue   = compat_ptr((from).blue); \

4521

-+	(to).transp = compat_ptr((from).transp)

4522

-+

4523

-+#endif /* CONFIG_COMPAT */

4524

-+#endif /* __KERNEL__ */

4525

-+

4526

- struct fb_con2fbmap {

4527

- 	__u32 console;

4528

- 	__u32 framebuffer;

4529

-@@ -367,6 +426,34 @@ struct fb_image {

4530

- 	struct fb_cmap cmap;	/* color map info */

4531

- };

4532

-

4533

-+#ifdef __KERNEL__

4534

-+#ifdef CONFIG_COMPAT

4535

-+struct fb_image32 {

4536

-+	__u32 dx;			/* Where to place image */

4537

-+	__u32 dy;

4538

-+	__u32 width;			/* Size of image */

4539

-+	__u32 height;

4540

-+	__u32 fg_color;			/* Only used when a mono bitmap */

4541

-+	__u32 bg_color;

4542

-+	__u8  depth;			/* Depth of the image */

4543

-+	const compat_uptr_t data;	/* Pointer to image data */

4544

-+	struct fb_cmap32 cmap;		/* color map info */

4545

-+};

4546

-+

4547

-+#define fb_image_from_compat(to, from) \

4548

-+	(to).dx       = (from).dx; \

4549

-+	(to).dy       = (from).dy; \

4550

-+	(to).width    = (from).width; \

4551

-+	(to).height   = (from).height; \

4552

-+	(to).fg_color = (from).fg_color; \

4553

-+	(to).bg_color = (from).bg_color; \

4554

-+	(to).depth    = (from).depth; \

4555

-+	(to).data     = compat_ptr((from).data); \

4556

-+	fb_cmap_from_compat((to).cmap, (from).cmap)

4557

-+

4558

-+#endif /* CONFIG_COMPAT */

4559

-+#endif /* __KERNEL__ */

4560

-+

4561

- /*

4562

-  * hardware cursor control

4563

-  */

4564

-

4565

-diff --git a/include/linux/fb.h b/include/linux/fb.h

4566

-index d31cb68..ad161bb 100644

4567

---- a/include/linux/fb.h

4568

-+++ b/include/linux/fb.h

4569

-@@ -488,5 +488,8 @@ #define FBINFO_STATE_SUSPENDED		1

4570

- 	u32 state;			/* Hardware state i.e suspend */

4571

- 	void *fbcon_par;                /* fbcon use-only private area */

4572

-+

4573

-+	struct fb_image bgdecor;

4574

-+

4575

- 	/* From here on everything is device dependent */

4576

- 	void *par;

4577

- 	/* we need the PCI or similar aperture base/size not

4578

-

4579

-diff --git a/kernel/sysctl.c b/kernel/sysctl.c

4580

-index 4ab1187..6561627 100644

4581

---- a/kernel/sysctl.c

4582

-+++ b/kernel/sysctl.c

4583

-@@ -145,6 +145,10 @@ static int min_percpu_pagelist_fract = 8;

4584

- static int ngroups_max = NGROUPS_MAX;

4585

- static const int cap_last_cap = CAP_LAST_CAP;

4586

-

4587

-+#ifdef CONFIG_FB_CON_DECOR

4588

-+extern char fbcon_decor_path[];

4589

-+#endif

4590

-+

4591

- #ifdef CONFIG_INOTIFY_USER

4592

- #include <linux/inotify.h>

4593

- #endif

4594

-@@ -248,6 +252,15 @@ static struct ctl_table sysctl_base_table[] = {

4595

- 		.mode		= 0555,

4596

- 		.child		= dev_table,

4597

- 	},

4598

-+#ifdef CONFIG_FB_CON_DECOR

4599

-+	{

4600

-+		.procname	= "fbcondecor",

4601

-+		.data		= &fbcon_decor_path,

4602

-+		.maxlen		= KMOD_PATH_LEN,

4603

-+		.mode		= 0644,

4604

-+		.proc_handler	= &proc_dostring,

4605

-+	},

4606

-+#endif

4607

- 	{ }

4608

- };

4609

-

4610

-@@ -1091,7 +1104,7 @@ static struct ctl_table vm_table[] = {

4611

- 		.proc_handler	= proc_dointvec,

4612

- 	},

4613

- 	{

4614

--		.procname	= "page-cluster", 

4615

-+		.procname	= "page-cluster",

4616

- 		.data		= &page_cluster,

4617

- 		.maxlen		= sizeof(int),

4618

- 		.mode		= 0644,

4619

-@@ -1535,7 +1548,7 @@ static struct ctl_table fs_table[] = {

4620

- 		.mode		= 0555,

4621

- 		.child		= inotify_table,

4622

- 	},

4623

--#endif	

4624

-+#endif

4625

- #ifdef CONFIG_EPOLL

4626

- 	{

4627

- 		.procname	= "epoll",

4628

-@@ -1873,12 +1886,12 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,

4629

- 	unsigned long page = 0;

4630

- 	size_t left;

4631

- 	char *kbuf;

4632

--

4633

-+

4634

- 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {

4635

- 		*lenp = 0;

4636

- 		return 0;

4637

- 	}

4638

--

4639

-+

4640

- 	i = (int *) tbl_data;

4641

- 	vleft = table->maxlen / sizeof(*i);

4642

- 	left = *lenp;

4643

-@@ -1967,7 +1980,7 @@ static int do_proc_dointvec(struct ctl_table *table, int write,

4644

-  * @ppos: file position

4645

-  *

4646

-  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer

4647

-- * values from/to the user buffer, treated as an ASCII string. 

4648

-+ * values from/to the user buffer, treated as an ASCII string.

4649

-  *

4650

-  * Returns 0 on success.

4651

-  */

4652

-@@ -2326,7 +2339,7 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,

4653

-  * @ppos: file position

4654

-  *

4655

-  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer

4656

-- * values from/to the user buffer, treated as an ASCII string. 

4657

-+ * values from/to the user buffer, treated as an ASCII string.

4658

-  * The values read are assumed to be in seconds, and are converted into

4659

-  * jiffies.

4660

-  *

4661

-@@ -2348,8 +2361,8 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write,

4662

-  * @ppos: pointer to the file position

4663

-  *

4664

-  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer

4665

-- * values from/to the user buffer, treated as an ASCII string. 

4666

-- * The values read are assumed to be in 1/USER_HZ seconds, and 

4667

-+ * values from/to the user buffer, treated as an ASCII string.

4668

-+ * The values read are assumed to be in 1/USER_HZ seconds, and

4669

-  * are converted into jiffies.

4670

-  *

4671

-  * Returns 0 on success.

4672

-@@ -2371,8 +2384,8 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,

4673

-  * @ppos: the current position in the file

4674

-  *

4675

-  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer

4676

-- * values from/to the user buffer, treated as an ASCII string. 

4677

-- * The values read are assumed to be in 1/1000 seconds, and 

4678

-+ * values from/to the user buffer, treated as an ASCII string.

4679

-+ * The values read are assumed to be in 1/1000 seconds, and

4680

-  * are converted into jiffies.

4681

-  *

4682

-  * Returns 0 on success.

4683

---

4684

-1.7.10

4685

-

4686

4687

Deleted: genpatches-2.6/trunk/3.15/5000_enable-additional-cpu-optimizations-for-gcc.patch

4688

===================================================================

4689

--- genpatches-2.6/trunk/3.15/5000_enable-additional-cpu-optimizations-for-gcc.patch	2014-05-05 14:22:27 UTC (rev 2771)

4690

+++ genpatches-2.6/trunk/3.15/5000_enable-additional-cpu-optimizations-for-gcc.patch	2014-05-05 14:28:48 UTC (rev 2772)

4691

@@ -1,325 +0,0 @@

4692

-This patch has been tested on and known to work with kernel versions from 3.2

4693

-up to the latest git version (pulled on 12/14/2013).

4694

-

4695

-This patch will expand the number of microarchitectures to include new

4696

-processors including: AMD K10-family, AMD Family 10h (Barcelona), AMD Family

4697

-14h (Bobcat), AMD Family 15h (Bulldozer), AMD Family 15h (Piledriver), AMD

4698

-Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7 (Nehalem), Intel 2nd Gen Core

4699

-i3/i5/i7 (Sandybridge), Intel 3rd Gen Core i3/i5/i7 (Ivybridge), and Intel 4th

4700

-Gen Core i3/i5/i7 (Haswell). It also offers the compiler the 'native' flag.

4701

-

4702

-Small but real speed increases are measurable using a make endpoint comparing

4703

-a generic kernel to one built with one of the respective microarchs.

4704

-

4705

-See the following experimental evidence supporting this statement:

4706

-https://github.com/graysky2/kernel_gcc_patch

4707

-

4708

----

4709

-diff -uprN a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h

4710

---- a/arch/x86/include/asm/module.h	2013-11-03 18:41:51.000000000 -0500

4711

-+++ b/arch/x86/include/asm/module.h	2013-12-15 06:21:24.351122516 -0500

4712

-@@ -15,6 +15,16 @@

4713

- #define MODULE_PROC_FAMILY "586MMX "

4714

- #elif defined CONFIG_MCORE2

4715

- #define MODULE_PROC_FAMILY "CORE2 "

4716

-+#elif defined CONFIG_MNATIVE

4717

-+#define MODULE_PROC_FAMILY "NATIVE "

4718

-+#elif defined CONFIG_MCOREI7

4719

-+#define MODULE_PROC_FAMILY "COREI7 "

4720

-+#elif defined CONFIG_MCOREI7AVX

4721

-+#define MODULE_PROC_FAMILY "COREI7AVX "

4722

-+#elif defined CONFIG_MCOREAVXI

4723

-+#define MODULE_PROC_FAMILY "COREAVXI "

4724

-+#elif defined CONFIG_MCOREAVX2

4725

-+#define MODULE_PROC_FAMILY "COREAVX2 "

4726

- #elif defined CONFIG_MATOM

4727

- #define MODULE_PROC_FAMILY "ATOM "

4728

- #elif defined CONFIG_M686

4729

-@@ -33,6 +43,18 @@

4730

- #define MODULE_PROC_FAMILY "K7 "

4731

- #elif defined CONFIG_MK8

4732

- #define MODULE_PROC_FAMILY "K8 "

4733

-+#elif defined CONFIG_MK10

4734

-+#define MODULE_PROC_FAMILY "K10 "

4735

-+#elif defined CONFIG_MBARCELONA

4736

-+#define MODULE_PROC_FAMILY "BARCELONA "

4737

-+#elif defined CONFIG_MBOBCAT

4738

-+#define MODULE_PROC_FAMILY "BOBCAT "

4739

-+#elif defined CONFIG_MBULLDOZER

4740

-+#define MODULE_PROC_FAMILY "BULLDOZER "

4741

-+#elif defined CONFIG_MPILEDRIVER

4742

-+#define MODULE_PROC_FAMILY "PILEDRIVER "

4743

-+#elif defined CONFIG_MJAGUAR

4744

-+#define MODULE_PROC_FAMILY "JAGUAR "

4745

- #elif defined CONFIG_MELAN

4746

- #define MODULE_PROC_FAMILY "ELAN "

4747

- #elif defined CONFIG_MCRUSOE

4748

-diff -uprN a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu

4749

---- a/arch/x86/Kconfig.cpu	2013-11-03 18:41:51.000000000 -0500

4750

-+++ b/arch/x86/Kconfig.cpu	2013-12-15 06:21:24.351122516 -0500

4751

-@@ -139,7 +139,7 @@ config MPENTIUM4

4752

-

4753

-

4754

- config MK6

4755

--	bool "K6/K6-II/K6-III"

4756

-+	bool "AMD K6/K6-II/K6-III"

4757

- 	depends on X86_32

4758

- 	---help---

4759

- 	  Select this for an AMD K6-family processor.  Enables use of

4760

-@@ -147,7 +147,7 @@ config MK6

4761

- 	  flags to GCC.

4762

-

4763

- config MK7

4764

--	bool "Athlon/Duron/K7"

4765

-+	bool "AMD Athlon/Duron/K7"

4766

- 	depends on X86_32

4767

- 	---help---

4768

- 	  Select this for an AMD Athlon K7-family processor.  Enables use of

4769

-@@ -155,12 +155,55 @@ config MK7

4770

- 	  flags to GCC.

4771

-

4772

- config MK8

4773

--	bool "Opteron/Athlon64/Hammer/K8"

4774

-+	bool "AMD Opteron/Athlon64/Hammer/K8"

4775

- 	---help---

4776

- 	  Select this for an AMD Opteron or Athlon64 Hammer-family processor.

4777

- 	  Enables use of some extended instructions, and passes appropriate

4778

- 	  optimization flags to GCC.

4779

-

4780

-+config MK10

4781

-+	bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"

4782

-+	---help---

4783

-+	  Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,

4784

-+		Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor.

4785

-+	  Enables use of some extended instructions, and passes appropriate

4786

-+	  optimization flags to GCC.

4787

-+

4788

-+config MBARCELONA

4789

-+	bool "AMD Barcelona"

4790

-+	---help---

4791

-+	  Select this for AMD Barcelona and newer processors.

4792

-+

4793

-+	  Enables -march=barcelona

4794

-+

4795

-+config MBOBCAT

4796

-+	bool "AMD Bobcat"

4797

-+	---help---

4798

-+	  Select this for AMD Bobcat processors.

4799

-+

4800

-+	  Enables -march=btver1

4801

-+

4802

-+config MBULLDOZER

4803

-+	bool "AMD Bulldozer"

4804

-+	---help---

4805

-+	  Select this for AMD Bulldozer processors.

4806

-+

4807

-+	  Enables -march=bdver1

4808

-+

4809

-+config MPILEDRIVER

4810

-+	bool "AMD Piledriver"

4811

-+	---help---

4812

-+	  Select this for AMD Piledriver processors.

4813

-+

4814

-+	  Enables -march=bdver2

4815

-+

4816

-+config MJAGUAR

4817

-+	bool "AMD Jaguar"

4818

-+	---help---

4819

-+	  Select this for AMD Jaguar processors.

4820

-+

4821

-+	  Enables -march=btver2

4822

-+

4823

- config MCRUSOE

4824

- 	bool "Crusoe"

4825

- 	depends on X86_32

4826

-@@ -251,8 +294,17 @@ config MPSC

4827

- 	  using the cpu family field

4828

- 	  in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.

4829

-

4830

-+config MATOM

4831

-+	bool "Intel Atom"

4832

-+	---help---

4833

-+

4834

-+	  Select this for the Intel Atom platform. Intel Atom CPUs have an

4835

-+	  in-order pipelining architecture and thus can benefit from

4836

-+	  accordingly optimized code. Use a recent GCC with specific Atom

4837

-+	  support in order to fully benefit from selecting this option.

4838

-+

4839

- config MCORE2

4840

--	bool "Core 2/newer Xeon"

4841

-+	bool "Intel Core 2"

4842

- 	---help---

4843

-

4844

- 	  Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and

4845

-@@ -260,14 +312,40 @@ config MCORE2

4846

- 	  family in /proc/cpuinfo. Newer ones have 6 and older ones 15

4847

- 	  (not a typo)

4848

-

4849

--config MATOM

4850

--	bool "Intel Atom"

4851

-+	  Enables -march=core2

4852

-+

4853

-+config MCOREI7

4854

-+	bool "Intel Core i7"

4855

- 	---help---

4856

-

4857

--	  Select this for the Intel Atom platform. Intel Atom CPUs have an

4858

--	  in-order pipelining architecture and thus can benefit from

4859

--	  accordingly optimized code. Use a recent GCC with specific Atom

4860

--	  support in order to fully benefit from selecting this option.

4861

-+	  Select this for the Intel Nehalem platform. Intel Nehalem proecessors

4862

-+	  include Core i3, i5, i7, Xeon: 34xx, 35xx, 55xx, 56xx, 75xx processors.

4863

-+

4864

-+	  Enables -march=corei7

4865

-+

4866

-+config MCOREI7AVX

4867

-+	bool "Intel Core 2nd Gen AVX"

4868

-+	---help---

4869

-+

4870

-+	  Select this for 2nd Gen Core processors including Sandy Bridge.

4871

-+

4872

-+	  Enables -march=corei7-avx

4873

-+

4874

-+config MCOREAVXI

4875

-+	bool "Intel Core 3rd Gen AVX"

4876

-+	---help---

4877

-+

4878

-+	  Select this for 3rd Gen Core processors including Ivy Bridge.

4879

-+

4880

-+	  Enables -march=core-avx-i

4881

-+

4882

-+config MCOREAVX2

4883

-+	bool "Intel Core AVX2"

4884

-+	---help---

4885

-+

4886

-+	  Select this for AVX2 enabled processors including Haswell.

4887

-+

4888

-+	  Enables -march=core-avx2

4889

-

4890

- config GENERIC_CPU

4891

- 	bool "Generic-x86-64"

4892

-@@ -276,6 +354,19 @@ config GENERIC_CPU

4893

- 	  Generic x86-64 CPU.

4894

- 	  Run equally well on all x86-64 CPUs.

4895

-

4896

-+config MNATIVE

4897

-+ bool "Native optimizations autodetected by GCC"

4898

-+ ---help---

4899

-+

4900

-+   GCC 4.2 and above support -march=native, which automatically detects

4901

-+   the optimum settings to use based on your processor. -march=native 

4902

-+   also detects and applies additional settings beyond -march specific

4903

-+   to your CPU, (eg. -msse4). Unless you have a specific reason not to

4904

-+   (e.g. distcc cross-compiling), you should probably be using

4905

-+   -march=native rather than anything listed below.

4906

-+

4907

-+   Enables -march=native

4908

-+

4909

- endchoice

4910

-

4911

- config X86_GENERIC

4912

-@@ -300,7 +391,7 @@ config X86_INTERNODE_CACHE_SHIFT

4913

- config X86_L1_CACHE_SHIFT

4914

- 	int

4915

- 	default "7" if MPENTIUM4 || MPSC

4916

--	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU

4917

-+	default "6" if MK7 || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MPENTIUMM || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MVIAC7 || X86_GENERIC || MNATIVE || GENERIC_CPU

4918

- 	default "4" if MELAN || M486 || MGEODEGX1

4919

- 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX

4920

-

4921

-@@ -331,11 +422,11 @@ config X86_ALIGNMENT_16

4922

-

4923

- config X86_INTEL_USERCOPY

4924

- 	def_bool y

4925

--	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2

4926

-+	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || MNATIVE || X86_GENERIC || MK8 || MK7 || MK10 || MBARCELONA || MEFFICEON || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2

4927

-

4928

- config X86_USE_PPRO_CHECKSUM

4929

- 	def_bool y

4930

--	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM

4931

-+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MNATIVE

4932

-

4933

- config X86_USE_3DNOW

4934

- 	def_bool y

4935

-@@ -363,17 +454,17 @@ config X86_P6_NOP

4936

-

4937

- config X86_TSC

4938

- 	def_bool y

4939

--	depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64

4940

-+	depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7-AVX || MATOM) && !X86_NUMAQ) || X86_64 || MNATIVE

4941

-

4942

- config X86_CMPXCHG64

4943

- 	def_bool y

4944

--	depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM

4945

-+	depends on X86_PAE || X86_64 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM || MNATIVE

4946

-

4947

- # this should be set for all -march=.. options where the compiler

4948

- # generates cmov.

4949

- config X86_CMOV

4950

- 	def_bool y

4951

--	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)

4952

-+	depends on (MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MK7 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)

4953

-

4954

- config X86_MINIMUM_CPU_FAMILY

4955

- 	int

4956

-diff -uprN a/arch/x86/Makefile b/arch/x86/Makefile

4957

---- a/arch/x86/Makefile	2013-11-03 18:41:51.000000000 -0500

4958

-+++ b/arch/x86/Makefile	2013-12-15 06:21:24.354455723 -0500

4959

-@@ -61,11 +61,26 @@ else

4960

- 	KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3)

4961

-

4962

-         # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)

4963

-+        cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)

4964

-         cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)

4965

-+        cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10)

4966

-+        cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona)

4967

-+        cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1)

4968

-+        cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1)

4969

-+        cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2)

4970

-+        cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)

4971

-         cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)

4972

-

4973

-         cflags-$(CONFIG_MCORE2) += \

4974

--                $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))

4975

-+                $(call cc-option,-march=core2,$(call cc-option,-mtune=core2))

4976

-+        cflags-$(CONFIG_MCOREI7) += \

4977

-+                $(call cc-option,-march=corei7,$(call cc-option,-mtune=corei7))

4978

-+        cflags-$(CONFIG_MCOREI7AVX) += \

4979

-+                $(call cc-option,-march=corei7-avx,$(call cc-option,-mtune=corei7-avx))

4980

-+        cflags-$(CONFIG_MCOREAVXI) += \

4981

-+                $(call cc-option,-march=core-avx-i,$(call cc-option,-mtune=core-avx-i))

4982

-+        cflags-$(CONFIG_MCOREAVX2) += \

4983

-+                $(call cc-option,-march=core-avx2,$(call cc-option,-mtune=core-avx2))

4984

- 	cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \

4985

- 		$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))

4986

-         cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)

4987

-diff -uprN a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu

4988

---- a/arch/x86/Makefile_32.cpu	2013-11-03 18:41:51.000000000 -0500

4989

-+++ b/arch/x86/Makefile_32.cpu	2013-12-15 06:21:24.354455723 -0500

4990

-@@ -23,7 +23,14 @@ cflags-$(CONFIG_MK6)		+= -march=k6

4991

- # Please note, that patches that add -march=athlon-xp and friends are pointless.

4992

- # They make zero difference whatsosever to performance at this time.

4993

- cflags-$(CONFIG_MK7)		+= -march=athlon

4994

-+cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)

4995

- cflags-$(CONFIG_MK8)		+= $(call cc-option,-march=k8,-march=athlon)

4996

-+cflags-$(CONFIG_MK10)	+= $(call cc-option,-march=amdfam10,-march=athlon)

4997

-+cflags-$(CONFIG_MBARCELONA)	+= $(call cc-option,-march=barcelona,-march=athlon)

4998

-+cflags-$(CONFIG_MBOBCAT)	+= $(call cc-option,-march=btver1,-march=athlon)

4999

-+cflags-$(CONFIG_MBULLDOZER)	+= $(call cc-option,-march=bdver1,-march=athlon)

5000

-+cflags-$(CONFIG_MPILEDRIVER)	+= $(call cc-option,-march=bdver2,-march=athlon)

5001

-+cflags-$(CONFIG_MJAGUAR)	+= $(call cc-option,-march=btver2,-march=athlon)

5002

- cflags-$(CONFIG_MCRUSOE)	+= -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0

5003

- cflags-$(CONFIG_MEFFICEON)	+= -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0

5004

- cflags-$(CONFIG_MWINCHIPC6)	+= $(call cc-option,-march=winchip-c6,-march=i586)

5005

-@@ -32,6 +39,10 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call cc-

5006

- cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-option,-march=c3-2,-march=i686)

5007

- cflags-$(CONFIG_MVIAC7)		+= -march=i686

5008

- cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call tune,core2)

5009

-+cflags-$(CONFIG_MCOREI7)	+= -march=i686 $(call tune,corei7)

5010

-+cflags-$(CONFIG_MCOREI7AVX)	+= -march=i686 $(call tune,corei7-avx)

5011

-+cflags-$(CONFIG_MCOREAVXI)	+= -march=i686 $(call tune,core-avx-i)

5012

-+cflags-$(CONFIG_MCOREAVX2)	+= -march=i686 $(call tune,core-avx2)

5013

- cflags-$(CONFIG_MATOM)		+= $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \

5014

- 	$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))

5015

-

5016

-

5017

5018

Deleted: genpatches-2.6/trunk/3.15/5001_BFQ-1-block-cgroups-kconfig-build-bits-for-BFQ-v7r2-3.14.patch

5019

===================================================================

5020

--- genpatches-2.6/trunk/3.15/5001_BFQ-1-block-cgroups-kconfig-build-bits-for-BFQ-v7r2-3.14.patch	2014-05-05 14:22:27 UTC (rev 2771)

5021

+++ genpatches-2.6/trunk/3.15/5001_BFQ-1-block-cgroups-kconfig-build-bits-for-BFQ-v7r2-3.14.patch	2014-05-05 14:28:48 UTC (rev 2772)

5022

@@ -1,104 +0,0 @@

5023

-From c3280db98437c9520f04ecacfdf1a868d7a4b7b3 Mon Sep 17 00:00:00 2001

5024

-From: Paolo Valente <paolo.valente@×××××××.it>

5025

-Date: Tue, 3 Sep 2013 16:50:42 +0200

5026

-Subject: [PATCH 1/3] block: cgroups, kconfig, build bits for BFQ-v7r2-3.14

5027

-

5028

-Update Kconfig.iosched and do the related Makefile changes to include

5029

-kernel configuration options for BFQ. Also add the bfqio controller

5030

-to the cgroups subsystem.

5031

-

5032

-Signed-off-by: Paolo Valente <paolo.valente@×××××××.it>

5033

-Signed-off-by: Arianna Avanzini <avanzini.arianna@×××××.com>

5034

----

5035

- block/Kconfig.iosched         | 32 ++++++++++++++++++++++++++++++++

5036

- block/Makefile                |  1 +

5037

- include/linux/cgroup_subsys.h |  4 ++++

5038

- 3 files changed, 37 insertions(+)

5039

-

5040

-diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched

5041

-index 421bef9..0ee5f0f 100644

5042

---- a/block/Kconfig.iosched

5043

-+++ b/block/Kconfig.iosched

5044

-@@ -39,6 +39,27 @@ config CFQ_GROUP_IOSCHED

5045

- 	---help---

5046

- 	  Enable group IO scheduling in CFQ.

5047

-

5048

-+config IOSCHED_BFQ

5049

-+	tristate "BFQ I/O scheduler"

5050

-+	default n

5051

-+	---help---

5052

-+	  The BFQ I/O scheduler tries to distribute bandwidth among

5053

-+	  all processes according to their weights.

5054

-+	  It aims at distributing the bandwidth as desired, independently of

5055

-+	  the disk parameters and with any workload. It also tries to

5056

-+	  guarantee low latency to interactive and soft real-time

5057

-+	  applications. If compiled built-in (saying Y here), BFQ can

5058

-+	  be configured to support hierarchical scheduling.

5059

-+

5060

-+config CGROUP_BFQIO

5061

-+	bool "BFQ hierarchical scheduling support"

5062

-+	depends on CGROUPS && IOSCHED_BFQ=y

5063

-+	default n

5064

-+	---help---

5065

-+	  Enable hierarchical scheduling in BFQ, using the cgroups

5066

-+	  filesystem interface.  The name of the subsystem will be

5067

-+	  bfqio.

5068

-+

5069

- choice

5070

- 	prompt "Default I/O scheduler"

5071

- 	default DEFAULT_CFQ

5072

-@@ -52,6 +73,16 @@ choice

5073

- 	config DEFAULT_CFQ

5074

- 		bool "CFQ" if IOSCHED_CFQ=y

5075

-

5076

-+	config DEFAULT_BFQ

5077

-+		bool "BFQ" if IOSCHED_BFQ=y

5078

-+		help

5079

-+		  Selects BFQ as the default I/O scheduler which will be

5080

-+		  used by default for all block devices.

5081

-+		  The BFQ I/O scheduler aims at distributing the bandwidth

5082

-+		  as desired, independently of the disk parameters and with

5083

-+		  any workload. It also tries to guarantee low latency to

5084

-+		  interactive and soft real-time applications.

5085

-+

5086

- 	config DEFAULT_NOOP

5087

- 		bool "No-op"

5088

-

5089

-@@ -61,6 +92,7 @@ config DEFAULT_IOSCHED

5090

- 	string

5091

- 	default "deadline" if DEFAULT_DEADLINE

5092

- 	default "cfq" if DEFAULT_CFQ

5093

-+	default "bfq" if DEFAULT_BFQ

5094

- 	default "noop" if DEFAULT_NOOP

5095

-

5096

- endmenu

5097

-diff --git a/block/Makefile b/block/Makefile

5098

-index 20645e8..cbd83fb 100644

5099

---- a/block/Makefile

5100

-+++ b/block/Makefile

5101

-@@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o

5102

- obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o

5103

- obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o

5104

- obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o

5105

-+obj-$(CONFIG_IOSCHED_BFQ)	+= bfq-iosched.o

5106

-

5107

- obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o

5108

- obj-$(CONFIG_BLK_DEV_INTEGRITY)	+= blk-integrity.o

5109

-diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h

5110

-index 7b99d71..4e8c0ff 100644

5111

---- a/include/linux/cgroup_subsys.h

5112

-+++ b/include/linux/cgroup_subsys.h

5113

-@@ -39,6 +39,10 @@ SUBSYS(net_cls)

5114

- SUBSYS(blkio)

5115

- #endif

5116

-

5117

-+#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_BFQIO)

5118

-+SUBSYS(bfqio)

5119

-+#endif

5120

-+

5121

- #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_PERF)

5122

- SUBSYS(perf)

5123

- #endif

5124

---

5125

-1.9.0

5126

-

5127

5128

Deleted: genpatches-2.6/trunk/3.15/5002_BFQ-2-block-introduce-the-BFQ-v7r2-I-O-sched-for-3.14.patch1

5129

===================================================================

5130

--- genpatches-2.6/trunk/3.15/5002_BFQ-2-block-introduce-the-BFQ-v7r2-I-O-sched-for-3.14.patch1	2014-05-05 14:22:27 UTC (rev 2771)

5131

+++ genpatches-2.6/trunk/3.15/5002_BFQ-2-block-introduce-the-BFQ-v7r2-I-O-sched-for-3.14.patch1	2014-05-05 14:28:48 UTC (rev 2772)

5132

@@ -1,6065 +0,0 @@

5133

-From 5055277df59d9280da6b60cf90bed8e5e57dc44d Mon Sep 17 00:00:00 2001

5134

-From: Paolo Valente <paolo.valente@×××××××.it>

5135

-Date: Thu, 9 May 2013 19:10:02 +0200

5136

-Subject: [PATCH 2/3] block: introduce the BFQ-v7r2 I/O sched for 3.14

5137

-

5138

-Add the BFQ-v7r2 I/O scheduler to 3.14.

5139

-The general structure is borrowed from CFQ, as much of the code for

5140

-handling I/O contexts. Over time, several useful features have been

5141

-ported from CFQ as well (details in the changelog in README.BFQ). A

5142

-(bfq_)queue is associated to each task doing I/O on a device, and each

5143

-time a scheduling decision has to be made a queue is selected and served

5144

-until it expires.

5145

-

5146

-    - Slices are given in the service domain: tasks are assigned

5147

-      budgets, measured in number of sectors. Once got the disk, a task

5148

-      must however consume its assigned budget within a configurable

5149

-      maximum time (by default, the maximum possible value of the

5150

-      budgets is automatically computed to comply with this timeout).

5151

-      This allows the desired latency vs "throughput boosting" tradeoff

5152

-      to be set.

5153

-

5154

-    - Budgets are scheduled according to a variant of WF2Q+, implemented

5155

-      using an augmented rb-tree to take eligibility into account while

5156

-      preserving an O(log N) overall complexity.

5157

-

5158

-    - A low-latency tunable is provided; if enabled, both interactive

5159

-      and soft real-time applications are guaranteed a very low latency.

5160

-

5161

-    - Latency guarantees are preserved also in the presence of NCQ.

5162

-

5163

-    - Also with flash-based devices, a high throughput is achieved

5164

-      while still preserving latency guarantees.

5165

-

5166

-    - BFQ features Early Queue Merge (EQM), a sort of fusion of the

5167

-      cooperating-queue-merging and the preemption mechanisms present

5168

-      in CFQ. EQM is in fact a unified mechanism that tries to get a

5169

-      sequential read pattern, and hence a high throughput, with any

5170

-      set of processes performing interleaved I/O over a contiguous

5171

-      sequence of sectors.

5172

-

5173

-    - BFQ supports full hierarchical scheduling, exporting a cgroups

5174

-      interface.  Since each node has a full scheduler, each group can

5175

-      be assigned its own weight.

5176

-

5177

-    - If the cgroups interface is not used, only I/O priorities can be

5178

-      assigned to processes, with ioprio values mapped to weights

5179

-      with the relation weight = IOPRIO_BE_NR - ioprio.

5180

-

5181

-    - ioprio classes are served in strict priority order, i.e., lower

5182

-      priority queues are not served as long as there are higher

5183

-      priority queues.  Among queues in the same class the bandwidth is

5184

-      distributed in proportion to the weight of each queue. A very

5185

-      thin extra bandwidth is however guaranteed to the Idle class, to

5186

-      prevent it from starving.

5187

-

5188

-Signed-off-by: Paolo Valente <paolo.valente@×××××××.it>

5189

-Signed-off-by: Arianna Avanzini <avanzini.arianna@×××××.com>

5190

----

5191

- block/bfq-cgroup.c  |  926 +++++++++++++++

5192

- block/bfq-ioc.c     |   36 +

5193

- block/bfq-iosched.c | 3300 +++++++++++++++++++++++++++++++++++++++++++++++++++

5194

- block/bfq-sched.c   | 1078 +++++++++++++++++

5195

- block/bfq.h         |  622 ++++++++++

5196

- 5 files changed, 5962 insertions(+)

5197

- create mode 100644 block/bfq-cgroup.c

5198

- create mode 100644 block/bfq-ioc.c

5199

- create mode 100644 block/bfq-iosched.c

5200

- create mode 100644 block/bfq-sched.c

5201

- create mode 100644 block/bfq.h

5202

-

5203

-diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c

5204

-new file mode 100644

5205

-index 0000000..bcecdb4

5206

---- /dev/null

5207

-+++ b/block/bfq-cgroup.c

5208

-@@ -0,0 +1,926 @@

5209

-+/*

5210

-+ * BFQ: CGROUPS support.

5211

-+ *

5212

-+ * Based on ideas and code from CFQ:

5213

-+ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk>

5214

-+ *

5215

-+ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it>

5216

-+ *		      Paolo Valente <paolo.valente@×××××××.it>

5217

-+ *

5218

-+ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it>

5219

-+ *

5220

-+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file.

5221

-+ */

5222

-+

5223

-+#ifdef CONFIG_CGROUP_BFQIO

5224

-+

5225

-+static DEFINE_MUTEX(bfqio_mutex);

5226

-+

5227

-+static bool bfqio_is_removed(struct bfqio_cgroup *bgrp)

5228

-+{

5229

-+	return bgrp ? !bgrp->online : false;

5230

-+}

5231

-+

5232

-+static struct bfqio_cgroup bfqio_root_cgroup = {

5233

-+	.weight = BFQ_DEFAULT_GRP_WEIGHT,

5234

-+	.ioprio = BFQ_DEFAULT_GRP_IOPRIO,

5235

-+	.ioprio_class = BFQ_DEFAULT_GRP_CLASS,

5236

-+};

5237

-+

5238

-+static inline void bfq_init_entity(struct bfq_entity *entity,

5239

-+				   struct bfq_group *bfqg)

5240

-+{

5241

-+	entity->weight = entity->new_weight;

5242

-+	entity->orig_weight = entity->new_weight;

5243

-+	entity->ioprio = entity->new_ioprio;

5244

-+	entity->ioprio_class = entity->new_ioprio_class;

5245

-+	entity->parent = bfqg->my_entity;

5246

-+	entity->sched_data = &bfqg->sched_data;

5247

-+}

5248

-+

5249

-+static struct bfqio_cgroup *css_to_bfqio(struct cgroup_subsys_state *css)

5250

-+{

5251

-+	return css ? container_of(css, struct bfqio_cgroup, css) : NULL;

5252

-+}

5253

-+

5254

-+/*

5255

-+ * Search the bfq_group for bfqd into the hash table (by now only a list)

5256

-+ * of bgrp.  Must be called under rcu_read_lock().

5257

-+ */

5258

-+static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp,

5259

-+					    struct bfq_data *bfqd)

5260

-+{

5261

-+	struct bfq_group *bfqg;

5262

-+	void *key;

5263

-+

5264

-+	hlist_for_each_entry_rcu(bfqg, &bgrp->group_data, group_node) {

5265

-+		key = rcu_dereference(bfqg->bfqd);

5266

-+		if (key == bfqd)

5267

-+			return bfqg;

5268

-+	}

5269

-+

5270

-+	return NULL;

5271

-+}

5272

-+

5273

-+static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp,

5274

-+					 struct bfq_group *bfqg)

5275

-+{

5276

-+	struct bfq_entity *entity = &bfqg->entity;

5277

-+

5278

-+	/*

5279

-+	 * If the weight of the entity has never been set via the sysfs

5280

-+	 * interface, then bgrp->weight == 0. In this case we initialize

5281

-+	 * the weight from the current ioprio value. Otherwise, the group

5282

-+	 * weight, if set, has priority over the ioprio value.

5283

-+	 */

5284

-+	if (bgrp->weight == 0) {

5285

-+		entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio);

5286

-+		entity->new_ioprio = bgrp->ioprio;

5287

-+	} else {

5288

-+		entity->new_weight = bgrp->weight;

5289

-+		entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight);

5290

-+	}

5291

-+	entity->orig_weight = entity->weight = entity->new_weight;

5292

-+	entity->ioprio = entity->new_ioprio;

5293

-+	entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class;

5294

-+	entity->my_sched_data = &bfqg->sched_data;

5295

-+}

5296

-+

5297

-+static inline void bfq_group_set_parent(struct bfq_group *bfqg,

5298

-+					struct bfq_group *parent)

5299

-+{

5300

-+	struct bfq_entity *entity;

5301

-+

5302

-+	BUG_ON(parent == NULL);

5303

-+	BUG_ON(bfqg == NULL);

5304

-+

5305

-+	entity = &bfqg->entity;

5306

-+	entity->parent = parent->my_entity;

5307

-+	entity->sched_data = &parent->sched_data;

5308

-+}

5309

-+

5310

-+/**

5311

-+ * bfq_group_chain_alloc - allocate a chain of groups.

5312

-+ * @bfqd: queue descriptor.

5313

-+ * @css: the leaf cgroup_subsys_state this chain starts from.

5314

-+ *

5315

-+ * Allocate a chain of groups starting from the one belonging to

5316

-+ * @cgroup up to the root cgroup.  Stop if a cgroup on the chain

5317

-+ * to the root has already an allocated group on @bfqd.

5318

-+ */

5319

-+static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd,

5320

-+					       struct cgroup_subsys_state *css)

5321

-+{

5322

-+	struct bfqio_cgroup *bgrp;

5323

-+	struct bfq_group *bfqg, *prev = NULL, *leaf = NULL;

5324

-+

5325

-+	for (; css != NULL; css = css->parent) {

5326

-+		bgrp = css_to_bfqio(css);

5327

-+

5328

-+		bfqg = bfqio_lookup_group(bgrp, bfqd);

5329

-+		if (bfqg != NULL) {

5330

-+			/*

5331

-+			 * All the cgroups in the path from there to the

5332

-+			 * root must have a bfq_group for bfqd, so we don't

5333

-+			 * need any more allocations.

5334

-+			 */

5335

-+			break;

5336

-+		}

5337

-+

5338

-+		bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC);

5339

-+		if (bfqg == NULL)

5340

-+			goto cleanup;

5341

-+

5342

-+		bfq_group_init_entity(bgrp, bfqg);

5343

-+		bfqg->my_entity = &bfqg->entity;

5344

-+

5345

-+		if (leaf == NULL) {

5346

-+			leaf = bfqg;

5347

-+			prev = leaf;

5348

-+		} else {

5349

-+			bfq_group_set_parent(prev, bfqg);

5350

-+			/*

5351

-+			 * Build a list of allocated nodes using the bfqd

5352

-+			 * filed, that is still unused and will be initialized

5353

-+			 * only after the node will be connected.

5354

-+			 */

5355

-+			prev->bfqd = bfqg;

5356

-+			prev = bfqg;

5357

-+		}

5358

-+	}

5359

-+

5360

-+	return leaf;

5361

-+

5362

-+cleanup:

5363

-+	while (leaf != NULL) {

5364

-+		prev = leaf;

5365

-+		leaf = leaf->bfqd;

5366

-+		kfree(prev);

5367

-+	}

5368

-+

5369

-+	return NULL;

5370

-+}

5371

-+

5372

-+/**

5373

-+ * bfq_group_chain_link - link an allocated group chain to a cgroup hierarchy.

5374

-+ * @bfqd: the queue descriptor.

5375

-+ * @css: the leaf cgroup_subsys_state to start from.

5376

-+ * @leaf: the leaf group (to be associated to @cgroup).

5377

-+ *

5378

-+ * Try to link a chain of groups to a cgroup hierarchy, connecting the

5379

-+ * nodes bottom-up, so we can be sure that when we find a cgroup in the

5380

-+ * hierarchy that already as a group associated to @bfqd all the nodes

5381

-+ * in the path to the root cgroup have one too.

5382

-+ *

5383

-+ * On locking: the queue lock protects the hierarchy (there is a hierarchy

5384

-+ * per device) while the bfqio_cgroup lock protects the list of groups

5385

-+ * belonging to the same cgroup.

5386

-+ */

5387

-+static void bfq_group_chain_link(struct bfq_data *bfqd,

5388

-+				 struct cgroup_subsys_state *css,

5389

-+				 struct bfq_group *leaf)

5390

-+{

5391

-+	struct bfqio_cgroup *bgrp;

5392

-+	struct bfq_group *bfqg, *next, *prev = NULL;

5393

-+	unsigned long flags;

5394

-+

5395

-+	assert_spin_locked(bfqd->queue->queue_lock);

5396

-+

5397

-+	for (; css != NULL && leaf != NULL; css = css->parent) {

5398

-+		bgrp = css_to_bfqio(css);

5399

-+		next = leaf->bfqd;

5400

-+

5401

-+		bfqg = bfqio_lookup_group(bgrp, bfqd);

5402

-+		BUG_ON(bfqg != NULL);

5403

-+

5404

-+		spin_lock_irqsave(&bgrp->lock, flags);

5405

-+

5406

-+		rcu_assign_pointer(leaf->bfqd, bfqd);

5407

-+		hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data);

5408

-+		hlist_add_head(&leaf->bfqd_node, &bfqd->group_list);

5409

-+

5410

-+		spin_unlock_irqrestore(&bgrp->lock, flags);

5411

-+

5412

-+		prev = leaf;

5413

-+		leaf = next;

5414

-+	}

5415

-+

5416

-+	BUG_ON(css == NULL && leaf != NULL);

5417

-+	if (css != NULL && prev != NULL) {

5418

-+		bgrp = css_to_bfqio(css);

5419

-+		bfqg = bfqio_lookup_group(bgrp, bfqd);

5420

-+		bfq_group_set_parent(prev, bfqg);

5421

-+	}

5422

-+}

5423

-+

5424

-+/**

5425

-+ * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup.

5426

-+ * @bfqd: queue descriptor.

5427

-+ * @cgroup: cgroup being searched for.

5428

-+ *

5429

-+ * Return a group associated to @bfqd in @cgroup, allocating one if

5430

-+ * necessary.  When a group is returned all the cgroups in the path

5431

-+ * to the root have a group associated to @bfqd.

5432

-+ *

5433

-+ * If the allocation fails, return the root group: this breaks guarantees

5434

-+ * but is a safe fallback.  If this loss becomes a problem it can be

5435

-+ * mitigated using the equivalent weight (given by the product of the

5436

-+ * weights of the groups in the path from @group to the root) in the

5437

-+ * root scheduler.

5438

-+ *

5439

-+ * We allocate all the missing nodes in the path from the leaf cgroup

5440

-+ * to the root and we connect the nodes only after all the allocations

5441

-+ * have been successful.

5442

-+ */

5443

-+static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,

5444

-+					      struct cgroup_subsys_state *css)

5445

-+{

5446

-+	struct bfqio_cgroup *bgrp = css_to_bfqio(css);

5447

-+	struct bfq_group *bfqg;

5448

-+

5449

-+	bfqg = bfqio_lookup_group(bgrp, bfqd);

5450

-+	if (bfqg != NULL)

5451

-+		return bfqg;

5452

-+

5453

-+	bfqg = bfq_group_chain_alloc(bfqd, css);

5454

-+	if (bfqg != NULL)

5455

-+		bfq_group_chain_link(bfqd, css, bfqg);

5456

-+	else

5457

-+		bfqg = bfqd->root_group;

5458

-+

5459

-+	return bfqg;

5460

-+}

5461

-+

5462

-+/**

5463

-+ * bfq_bfqq_move - migrate @bfqq to @bfqg.

5464

-+ * @bfqd: queue descriptor.

5465

-+ * @bfqq: the queue to move.

5466

-+ * @entity: @bfqq's entity.

5467

-+ * @bfqg: the group to move to.

5468

-+ *

5469

-+ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating

5470

-+ * it on the new one.  Avoid putting the entity on the old group idle tree.

5471

-+ *

5472

-+ * Must be called under the queue lock; the cgroup owning @bfqg must

5473

-+ * not disappear (by now this just means that we are called under

5474

-+ * rcu_read_lock()).

5475

-+ */

5476

-+static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,

5477

-+			  struct bfq_entity *entity, struct bfq_group *bfqg)

5478

-+{

5479

-+	int busy, resume;

5480

-+

5481

-+	busy = bfq_bfqq_busy(bfqq);

5482

-+	resume = !RB_EMPTY_ROOT(&bfqq->sort_list);

5483

-+

5484

-+	BUG_ON(resume && !entity->on_st);

5485

-+	BUG_ON(busy && !resume && entity->on_st &&

5486

-+	       bfqq != bfqd->in_service_queue);

5487

-+

5488

-+	if (busy) {

5489

-+		BUG_ON(atomic_read(&bfqq->ref) < 2);

5490

-+

5491

-+		if (!resume)

5492

-+			bfq_del_bfqq_busy(bfqd, bfqq, 0);

5493

-+		else

5494

-+			bfq_deactivate_bfqq(bfqd, bfqq, 0);

5495

-+	} else if (entity->on_st)

5496

-+		bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);

5497

-+

5498

-+	/*

5499

-+	 * Here we use a reference to bfqg.  We don't need a refcounter

5500

-+	 * as the cgroup reference will not be dropped, so that its

5501

-+	 * destroy() callback will not be invoked.

5502

-+	 */

5503

-+	entity->parent = bfqg->my_entity;

5504

-+	entity->sched_data = &bfqg->sched_data;

5505

-+

5506

-+	if (busy && resume)

5507

-+		bfq_activate_bfqq(bfqd, bfqq);

5508

-+

5509

-+	if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver)

5510

-+		bfq_schedule_dispatch(bfqd);

5511

-+}

5512

-+

5513

-+/**

5514

-+ * __bfq_bic_change_cgroup - move @bic to @cgroup.

5515

-+ * @bfqd: the queue descriptor.

5516

-+ * @bic: the bic to move.

5517

-+ * @cgroup: the cgroup to move to.

5518

-+ *

5519

-+ * Move bic to cgroup, assuming that bfqd->queue is locked; the caller

5520

-+ * has to make sure that the reference to cgroup is valid across the call.

5521

-+ *

5522

-+ * NOTE: an alternative approach might have been to store the current

5523

-+ * cgroup in bfqq and getting a reference to it, reducing the lookup

5524

-+ * time here, at the price of slightly more complex code.

5525

-+ */

5526

-+static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,

5527

-+						struct bfq_io_cq *bic,

5528

-+						struct cgroup_subsys_state *css)

5529

-+{

5530

-+	struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);

5531

-+	struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);

5532

-+	struct bfq_entity *entity;

5533

-+	struct bfq_group *bfqg;

5534

-+	struct bfqio_cgroup *bgrp;

5535

-+

5536

-+	bgrp = css_to_bfqio(css);

5537

-+

5538

-+	bfqg = bfq_find_alloc_group(bfqd, css);

5539

-+	if (async_bfqq != NULL) {

5540

-+		entity = &async_bfqq->entity;

5541

-+

5542

-+		if (entity->sched_data != &bfqg->sched_data) {

5543

-+			bic_set_bfqq(bic, NULL, 0);

5544

-+			bfq_log_bfqq(bfqd, async_bfqq,

5545

-+				     "bic_change_group: %p %d",

5546

-+				     async_bfqq, atomic_read(&async_bfqq->ref));

5547

-+			bfq_put_queue(async_bfqq);

5548

-+		}

5549

-+	}

5550

-+

5551

-+	if (sync_bfqq != NULL) {

5552

-+		entity = &sync_bfqq->entity;

5553

-+		if (entity->sched_data != &bfqg->sched_data)

5554

-+			bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);

5555

-+	}

5556

-+

5557

-+	return bfqg;

5558

-+}

5559

-+

5560

-+/**

5561

-+ * bfq_bic_change_cgroup - move @bic to @cgroup.

5562

-+ * @bic: the bic being migrated.

5563

-+ * @cgroup: the destination cgroup.

5564

-+ *

5565

-+ * When the task owning @bic is moved to @cgroup, @bic is immediately

5566

-+ * moved into its new parent group.

5567

-+ */

5568

-+static void bfq_bic_change_cgroup(struct bfq_io_cq *bic,

5569

-+				  struct cgroup_subsys_state *css)

5570

-+{

5571

-+	struct bfq_data *bfqd;

5572

-+	unsigned long uninitialized_var(flags);

5573

-+

5574

-+	bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),

5575

-+				   &flags);

5576

-+	if (bfqd != NULL) {

5577

-+		__bfq_bic_change_cgroup(bfqd, bic, css);

5578

-+		bfq_put_bfqd_unlock(bfqd, &flags);

5579

-+	}

5580

-+}

5581

-+

5582

-+/**

5583

-+ * bfq_bic_update_cgroup - update the cgroup of @bic.

5584

-+ * @bic: the @bic to update.

5585

-+ *

5586

-+ * Make sure that @bic is enqueued in the cgroup of the current task.

5587

-+ * We need this in addition to moving bics during the cgroup attach

5588

-+ * phase because the task owning @bic could be at its first disk

5589

-+ * access or we may end up in the root cgroup as the result of a

5590

-+ * memory allocation failure and here we try to move to the right

5591

-+ * group.

5592

-+ *

5593

-+ * Must be called under the queue lock.  It is safe to use the returned

5594

-+ * value even after the rcu_read_unlock() as the migration/destruction

5595

-+ * paths act under the queue lock too.  IOW it is impossible to race with

5596

-+ * group migration/destruction and end up with an invalid group as:

5597

-+ *   a) here cgroup has not yet been destroyed, nor its destroy callback

5598

-+ *      has started execution, as current holds a reference to it,

5599

-+ *   b) if it is destroyed after rcu_read_unlock() [after current is

5600

-+ *      migrated to a different cgroup] its attach() callback will have

5601

-+ *      taken care of remove all the references to the old cgroup data.

5602

-+ */

5603

-+static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic)

5604

-+{

5605

-+	struct bfq_data *bfqd = bic_to_bfqd(bic);

5606

-+	struct bfq_group *bfqg;

5607

-+	struct cgroup_subsys_state *css;

5608

-+

5609

-+	BUG_ON(bfqd == NULL);

5610

-+

5611

-+	rcu_read_lock();

5612

-+	css = task_css(current, bfqio_subsys_id);

5613

-+	bfqg = __bfq_bic_change_cgroup(bfqd, bic, css);

5614

-+	rcu_read_unlock();

5615

-+

5616

-+	return bfqg;

5617

-+}

5618

-+

5619

-+/**

5620

-+ * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.

5621

-+ * @st: the service tree being flushed.

5622

-+ */

5623

-+static inline void bfq_flush_idle_tree(struct bfq_service_tree *st)

5624

-+{

5625

-+	struct bfq_entity *entity = st->first_idle;

5626

-+

5627

-+	for (; entity != NULL; entity = st->first_idle)

5628

-+		__bfq_deactivate_entity(entity, 0);

5629

-+}

5630

-+

5631

-+/**

5632

-+ * bfq_reparent_leaf_entity - move leaf entity to the root_group.

5633

-+ * @bfqd: the device data structure with the root group.

5634

-+ * @entity: the entity to move.

5635

-+ */

5636

-+static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd,

5637

-+					    struct bfq_entity *entity)

5638

-+{

5639

-+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

5640

-+

5641

-+	BUG_ON(bfqq == NULL);

5642

-+	bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);

5643

-+	return;

5644

-+}

5645

-+

5646

-+/**

5647

-+ * bfq_reparent_active_entities - move to the root group all active entities.

5648

-+ * @bfqd: the device data structure with the root group.

5649

-+ * @bfqg: the group to move from.

5650

-+ * @st: the service tree with the entities.

5651

-+ *

5652

-+ * Needs queue_lock to be taken and reference to be valid over the call.

5653

-+ */

5654

-+static inline void bfq_reparent_active_entities(struct bfq_data *bfqd,

5655

-+						struct bfq_group *bfqg,

5656

-+						struct bfq_service_tree *st)

5657

-+{

5658

-+	struct rb_root *active = &st->active;

5659

-+	struct bfq_entity *entity = NULL;

5660

-+

5661

-+	if (!RB_EMPTY_ROOT(&st->active))

5662

-+		entity = bfq_entity_of(rb_first(active));

5663

-+

5664

-+	for (; entity != NULL; entity = bfq_entity_of(rb_first(active)))

5665

-+		bfq_reparent_leaf_entity(bfqd, entity);

5666

-+

5667

-+	if (bfqg->sched_data.in_service_entity != NULL)

5668

-+		bfq_reparent_leaf_entity(bfqd,

5669

-+			bfqg->sched_data.in_service_entity);

5670

-+

5671

-+	return;

5672

-+}

5673

-+

5674

-+/**

5675

-+ * bfq_destroy_group - destroy @bfqg.

5676

-+ * @bgrp: the bfqio_cgroup containing @bfqg.

5677

-+ * @bfqg: the group being destroyed.

5678

-+ *

5679

-+ * Destroy @bfqg, making sure that it is not referenced from its parent.

5680

-+ */

5681

-+static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg)

5682

-+{

5683

-+	struct bfq_data *bfqd;

5684

-+	struct bfq_service_tree *st;

5685

-+	struct bfq_entity *entity = bfqg->my_entity;

5686

-+	unsigned long uninitialized_var(flags);

5687

-+	int i;

5688

-+

5689

-+	hlist_del(&bfqg->group_node);

5690

-+

5691

-+	/*

5692

-+	 * Empty all service_trees belonging to this group before deactivating

5693

-+	 * the group itself.

5694

-+	 */

5695

-+	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {

5696

-+		st = bfqg->sched_data.service_tree + i;

5697

-+

5698

-+		/*

5699

-+		 * The idle tree may still contain bfq_queues belonging

5700

-+		 * to exited task because they never migrated to a different

5701

-+		 * cgroup from the one being destroyed now.  No one else

5702

-+		 * can access them so it's safe to act without any lock.

5703

-+		 */

5704

-+		bfq_flush_idle_tree(st);

5705

-+

5706

-+		/*

5707

-+		 * It may happen that some queues are still active

5708

-+		 * (busy) upon group destruction (if the corresponding

5709

-+		 * processes have been forced to terminate). We move

5710

-+		 * all the leaf entities corresponding to these queues

5711

-+		 * to the root_group.

5712

-+		 * Also, it may happen that the group has an entity

5713

-+		 * under service, which is disconnected from the active

5714

-+		 * tree: it must be moved, too.

5715

-+		 * There is no need to put the sync queues, as the

5716

-+		 * scheduler has taken no reference.

5717

-+		 */

5718

-+		bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);

5719

-+		if (bfqd != NULL) {

5720

-+			bfq_reparent_active_entities(bfqd, bfqg, st);

5721

-+			bfq_put_bfqd_unlock(bfqd, &flags);

5722

-+		}

5723

-+		BUG_ON(!RB_EMPTY_ROOT(&st->active));

5724

-+		BUG_ON(!RB_EMPTY_ROOT(&st->idle));

5725

-+	}

5726

-+	BUG_ON(bfqg->sched_data.next_in_service != NULL);

5727

-+	BUG_ON(bfqg->sched_data.in_service_entity != NULL);

5728

-+

5729

-+	/*

5730

-+	 * We may race with device destruction, take extra care when

5731

-+	 * dereferencing bfqg->bfqd.

5732

-+	 */

5733

-+	bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);

5734

-+	if (bfqd != NULL) {

5735

-+		hlist_del(&bfqg->bfqd_node);

5736

-+		__bfq_deactivate_entity(entity, 0);

5737

-+		bfq_put_async_queues(bfqd, bfqg);

5738

-+		bfq_put_bfqd_unlock(bfqd, &flags);

5739

-+	}

5740

-+	BUG_ON(entity->tree != NULL);

5741

-+

5742

-+	/*

5743

-+	 * No need to defer the kfree() to the end of the RCU grace

5744

-+	 * period: we are called from the destroy() callback of our

5745

-+	 * cgroup, so we can be sure that no one is a) still using

5746

-+	 * this cgroup or b) doing lookups in it.

5747

-+	 */

5748

-+	kfree(bfqg);

5749

-+}

5750

-+

5751

-+static void bfq_end_raising_async(struct bfq_data *bfqd)

5752

-+{

5753

-+	struct hlist_node *tmp;

5754

-+	struct bfq_group *bfqg;

5755

-+

5756

-+	hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node)

5757

-+		bfq_end_raising_async_queues(bfqd, bfqg);

5758

-+	bfq_end_raising_async_queues(bfqd, bfqd->root_group);

5759

-+}

5760

-+

5761

-+/**

5762

-+ * bfq_disconnect_groups - disconnect @bfqd from all its groups.

5763

-+ * @bfqd: the device descriptor being exited.

5764

-+ *

5765

-+ * When the device exits we just make sure that no lookup can return

5766

-+ * the now unused group structures.  They will be deallocated on cgroup

5767

-+ * destruction.

5768

-+ */

5769

-+static void bfq_disconnect_groups(struct bfq_data *bfqd)

5770

-+{

5771

-+	struct hlist_node *tmp;

5772

-+	struct bfq_group *bfqg;

5773

-+

5774

-+	bfq_log(bfqd, "disconnect_groups beginning");

5775

-+	hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node) {

5776

-+		hlist_del(&bfqg->bfqd_node);

5777

-+

5778

-+		__bfq_deactivate_entity(bfqg->my_entity, 0);

5779

-+

5780

-+		/*

5781

-+		 * Don't remove from the group hash, just set an

5782

-+		 * invalid key.  No lookups can race with the

5783

-+		 * assignment as bfqd is being destroyed; this

5784

-+		 * implies also that new elements cannot be added

5785

-+		 * to the list.

5786

-+		 */

5787

-+		rcu_assign_pointer(bfqg->bfqd, NULL);

5788

-+

5789

-+		bfq_log(bfqd, "disconnect_groups: put async for group %p",

5790

-+			bfqg);

5791

-+		bfq_put_async_queues(bfqd, bfqg);

5792

-+	}

5793

-+}

5794

-+

5795

-+static inline void bfq_free_root_group(struct bfq_data *bfqd)

5796

-+{

5797

-+	struct bfqio_cgroup *bgrp = &bfqio_root_cgroup;

5798

-+	struct bfq_group *bfqg = bfqd->root_group;

5799

-+

5800

-+	bfq_put_async_queues(bfqd, bfqg);

5801

-+

5802

-+	spin_lock_irq(&bgrp->lock);

5803

-+	hlist_del_rcu(&bfqg->group_node);

5804

-+	spin_unlock_irq(&bgrp->lock);

5805

-+

5806

-+	/*

5807

-+	 * No need to synchronize_rcu() here: since the device is gone

5808

-+	 * there cannot be any read-side access to its root_group.

5809

-+	 */

5810

-+	kfree(bfqg);

5811

-+}

5812

-+

5813

-+static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)

5814

-+{

5815

-+	struct bfq_group *bfqg;

5816

-+	struct bfqio_cgroup *bgrp;

5817

-+	int i;

5818

-+

5819

-+	bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node);

5820

-+	if (bfqg == NULL)

5821

-+		return NULL;

5822

-+

5823

-+	bfqg->entity.parent = NULL;

5824

-+	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)

5825

-+		bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;

5826

-+

5827

-+	bgrp = &bfqio_root_cgroup;

5828

-+	spin_lock_irq(&bgrp->lock);

5829

-+	rcu_assign_pointer(bfqg->bfqd, bfqd);

5830

-+	hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data);

5831

-+	spin_unlock_irq(&bgrp->lock);

5832

-+

5833

-+	return bfqg;

5834

-+}

5835

-+

5836

-+#define SHOW_FUNCTION(__VAR)						\

5837

-+static u64 bfqio_cgroup_##__VAR##_read(struct cgroup_subsys_state *css, \

5838

-+				       struct cftype *cftype)		\

5839

-+{									\

5840

-+	struct bfqio_cgroup *bgrp = css_to_bfqio(css);			\

5841

-+	u64 ret = -ENODEV;						\

5842

-+									\

5843

-+	mutex_lock(&bfqio_mutex);					\

5844

-+	if (bfqio_is_removed(bgrp))					\

5845

-+		goto out_unlock;					\

5846

-+									\

5847

-+	spin_lock_irq(&bgrp->lock);					\

5848

-+	ret = bgrp->__VAR;						\

5849

-+	spin_unlock_irq(&bgrp->lock);					\

5850

-+									\

5851

-+out_unlock:								\

5852

-+	mutex_unlock(&bfqio_mutex);					\

5853

-+	return ret;							\

5854

-+}

5855

-+

5856

-+SHOW_FUNCTION(weight);

5857

-+SHOW_FUNCTION(ioprio);

5858

-+SHOW_FUNCTION(ioprio_class);

5859

-+#undef SHOW_FUNCTION

5860

-+

5861

-+#define STORE_FUNCTION(__VAR, __MIN, __MAX)				\

5862

-+static int bfqio_cgroup_##__VAR##_write(struct cgroup_subsys_state *css,\

5863

-+					struct cftype *cftype,		\

5864

-+					u64 val)			\

5865

-+{									\

5866

-+	struct bfqio_cgroup *bgrp = css_to_bfqio(css);			\

5867

-+	struct bfq_group *bfqg;						\

5868

-+	int ret = -EINVAL;						\

5869

-+									\

5870

-+	if (val < (__MIN) || val > (__MAX))				\

5871

-+		return ret;						\

5872

-+									\

5873

-+	ret = -ENODEV;							\

5874

-+	mutex_lock(&bfqio_mutex);					\

5875

-+	if (bfqio_is_removed(bgrp))					\

5876

-+		goto out_unlock;					\

5877

-+	ret = 0;							\

5878

-+									\

5879

-+	spin_lock_irq(&bgrp->lock);					\

5880

-+	bgrp->__VAR = (unsigned short)val;				\

5881

-+	hlist_for_each_entry(bfqg, &bgrp->group_data, group_node) {	\

5882

-+		/*							\

5883

-+		 * Setting the ioprio_changed flag of the entity        \

5884

-+		 * to 1 with new_##__VAR == ##__VAR would re-set        \

5885

-+		 * the value of the weight to its ioprio mapping.       \

5886

-+		 * Set the flag only if necessary.			\

5887

-+		 */							\

5888

-+		if ((unsigned short)val != bfqg->entity.new_##__VAR) {  \

5889

-+			bfqg->entity.new_##__VAR = (unsigned short)val; \

5890

-+			/*						\

5891

-+			 * Make sure that the above new value has been	\

5892

-+			 * stored in bfqg->entity.new_##__VAR before	\

5893

-+			 * setting the ioprio_changed flag. In fact,	\

5894

-+			 * this flag may be read asynchronously (in	\

5895

-+			 * critical sections protected by a different	\

5896

-+			 * lock than that held here), and finding this	\

5897

-+			 * flag set may cause the execution of the code	\

5898

-+			 * for updating parameters whose value may	\

5899

-+			 * depend also on bfqg->entity.new_##__VAR (in	\

5900

-+			 * __bfq_entity_update_weight_prio).		\

5901

-+			 * This barrier makes sure that the new value	\

5902

-+			 * of bfqg->entity.new_##__VAR is correctly	\

5903

-+			 * seen in that code.				\

5904

-+			 */						\

5905

-+			smp_wmb();                                      \

5906

-+			bfqg->entity.ioprio_changed = 1;                \

5907

-+		}							\

5908

-+	}								\

5909

-+	spin_unlock_irq(&bgrp->lock);					\

5910

-+									\

5911

-+out_unlock:								\

5912

-+	mutex_unlock(&bfqio_mutex);					\

5913

-+	return ret;							\

5914

-+}

5915

-+

5916

-+STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT);

5917

-+STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1);

5918

-+STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE);

5919

-+#undef STORE_FUNCTION

5920

-+

5921

-+static struct cftype bfqio_files[] = {

5922

-+	{

5923

-+		.name = "weight",

5924

-+		.read_u64 = bfqio_cgroup_weight_read,

5925

-+		.write_u64 = bfqio_cgroup_weight_write,

5926

-+	},

5927

-+	{

5928

-+		.name = "ioprio",

5929

-+		.read_u64 = bfqio_cgroup_ioprio_read,

5930

-+		.write_u64 = bfqio_cgroup_ioprio_write,

5931

-+	},

5932

-+	{

5933

-+		.name = "ioprio_class",

5934

-+		.read_u64 = bfqio_cgroup_ioprio_class_read,

5935

-+		.write_u64 = bfqio_cgroup_ioprio_class_write,

5936

-+	},

5937

-+	{ },	/* terminate */

5938

-+};

5939

-+

5940

-+static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys_state

5941

-+						*parent_css)

5942

-+{

5943

-+	struct bfqio_cgroup *bgrp;

5944

-+

5945

-+	if (parent_css != NULL) {

5946

-+		bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL);

5947

-+		if (bgrp == NULL)

5948

-+			return ERR_PTR(-ENOMEM);

5949

-+	} else

5950

-+		bgrp = &bfqio_root_cgroup;

5951

-+

5952

-+	spin_lock_init(&bgrp->lock);

5953

-+	INIT_HLIST_HEAD(&bgrp->group_data);

5954

-+	bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO;

5955

-+	bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS;

5956

-+

5957

-+	return &bgrp->css;

5958

-+}

5959

-+

5960

-+/*

5961

-+ * We cannot support shared io contexts, as we have no means to support

5962

-+ * two tasks with the same ioc in two different groups without major rework

5963

-+ * of the main bic/bfqq data structures.  By now we allow a task to change

5964

-+ * its cgroup only if it's the only owner of its ioc; the drawback of this

5965

-+ * behavior is that a group containing a task that forked using CLONE_IO

5966

-+ * will not be destroyed until the tasks sharing the ioc die.

5967

-+ */

5968

-+static int bfqio_can_attach(struct cgroup_subsys_state *css,

5969

-+			    struct cgroup_taskset *tset)

5970

-+{

5971

-+	struct task_struct *task;

5972

-+	struct io_context *ioc;

5973

-+	int ret = 0;

5974

-+

5975

-+	cgroup_taskset_for_each(task, css, tset) {

5976

-+		/*

5977

-+		 * task_lock() is needed to avoid races with

5978

-+		 * exit_io_context()

5979

-+		 */

5980

-+		task_lock(task);

5981

-+		ioc = task->io_context;

5982

-+		if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1)

5983

-+			/*

5984

-+			 * ioc == NULL means that the task is either too young

5985

-+			 * or exiting: if it has still no ioc the ioc can't be

5986

-+			 * shared, if the task is exiting the attach will fail

5987

-+			 * anyway, no matter what we return here.

5988

-+			 */

5989

-+			ret = -EINVAL;

5990

-+		task_unlock(task);

5991

-+		if (ret)

5992

-+			break;

5993

-+	}

5994

-+

5995

-+	return ret;

5996

-+}

5997

-+

5998

-+static void bfqio_attach(struct cgroup_subsys_state *css,

5999

-+			 struct cgroup_taskset *tset)

6000

-+{

6001

-+	struct task_struct *task;

6002

-+	struct io_context *ioc;

6003

-+	struct io_cq *icq;

6004

-+

6005

-+	/*

6006

-+	 * IMPORTANT NOTE: The move of more than one process at a time to a

6007

-+	 * new group has not yet been tested.

6008

-+	 */

6009

-+	cgroup_taskset_for_each(task, css, tset) {

6010

-+		ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);

6011

-+		if (ioc) {

6012

-+			/*

6013

-+			 * Handle cgroup change here.

6014

-+			 */

6015

-+			rcu_read_lock();

6016

-+			hlist_for_each_entry_rcu(icq, &ioc->icq_list, ioc_node)

6017

-+				if (!strncmp(

6018

-+					icq->q->elevator->type->elevator_name,

6019

-+					"bfq", ELV_NAME_MAX))

6020

-+					bfq_bic_change_cgroup(icq_to_bic(icq),

6021

-+							      css);

6022

-+			rcu_read_unlock();

6023

-+			put_io_context(ioc);

6024

-+		}

6025

-+	}

6026

-+}

6027

-+

6028

-+static void bfqio_destroy(struct cgroup_subsys_state *css)

6029

-+{

6030

-+	struct bfqio_cgroup *bgrp = css_to_bfqio(css);

6031

-+	struct hlist_node *tmp;

6032

-+	struct bfq_group *bfqg;

6033

-+

6034

-+	/*

6035

-+	 * Since we are destroying the cgroup, there are no more tasks

6036

-+	 * referencing it, and all the RCU grace periods that may have

6037

-+	 * referenced it are ended (as the destruction of the parent

6038

-+	 * cgroup is RCU-safe); bgrp->group_data will not be accessed by

6039

-+	 * anything else and we don't need any synchronization.

6040

-+	 */

6041

-+	hlist_for_each_entry_safe(bfqg, tmp, &bgrp->group_data, group_node)

6042

-+		bfq_destroy_group(bgrp, bfqg);

6043

-+

6044

-+	BUG_ON(!hlist_empty(&bgrp->group_data));

6045

-+

6046

-+	kfree(bgrp);

6047

-+}

6048

-+

6049

-+static int bfqio_css_online(struct cgroup_subsys_state *css)

6050

-+{

6051

-+	struct bfqio_cgroup *bgrp = css_to_bfqio(css);

6052

-+

6053

-+	mutex_lock(&bfqio_mutex);

6054

-+	bgrp->online = true;

6055

-+	mutex_unlock(&bfqio_mutex);

6056

-+

6057

-+	return 0;

6058

-+}

6059

-+

6060

-+static void bfqio_css_offline(struct cgroup_subsys_state *css)

6061

-+{

6062

-+	struct bfqio_cgroup *bgrp = css_to_bfqio(css);

6063

-+

6064

-+	mutex_lock(&bfqio_mutex);

6065

-+	bgrp->online = false;

6066

-+	mutex_unlock(&bfqio_mutex);

6067

-+}

6068

-+

6069

-+struct cgroup_subsys bfqio_subsys = {

6070

-+	.name = "bfqio",

6071

-+	.css_alloc = bfqio_create,

6072

-+	.css_online = bfqio_css_online,

6073

-+	.css_offline = bfqio_css_offline,

6074

-+	.can_attach = bfqio_can_attach,

6075

-+	.attach = bfqio_attach,

6076

-+	.css_free = bfqio_destroy,

6077

-+	.subsys_id = bfqio_subsys_id,

6078

-+	.base_cftypes = bfqio_files,

6079

-+};

6080

-+#else

6081

-+static inline void bfq_init_entity(struct bfq_entity *entity,

6082

-+				   struct bfq_group *bfqg)

6083

-+{

6084

-+	entity->weight = entity->new_weight;

6085

-+	entity->orig_weight = entity->new_weight;

6086

-+	entity->ioprio = entity->new_ioprio;

6087

-+	entity->ioprio_class = entity->new_ioprio_class;

6088

-+	entity->sched_data = &bfqg->sched_data;

6089

-+}

6090

-+

6091

-+static inline struct bfq_group *

6092

-+bfq_bic_update_cgroup(struct bfq_io_cq *bic)

6093

-+{

6094

-+	struct bfq_data *bfqd = bic_to_bfqd(bic);

6095

-+	return bfqd->root_group;

6096

-+}

6097

-+

6098

-+static inline void bfq_bfqq_move(struct bfq_data *bfqd,

6099

-+				 struct bfq_queue *bfqq,

6100

-+				 struct bfq_entity *entity,

6101

-+				 struct bfq_group *bfqg)

6102

-+{

6103

-+}

6104

-+

6105

-+static void bfq_end_raising_async(struct bfq_data *bfqd)

6106

-+{

6107

-+	bfq_end_raising_async_queues(bfqd, bfqd->root_group);

6108

-+}

6109

-+

6110

-+static inline void bfq_disconnect_groups(struct bfq_data *bfqd)

6111

-+{

6112

-+	bfq_put_async_queues(bfqd, bfqd->root_group);

6113

-+}

6114

-+

6115

-+static inline void bfq_free_root_group(struct bfq_data *bfqd)

6116

-+{

6117

-+	kfree(bfqd->root_group);

6118

-+}

6119

-+

6120

-+static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)

6121

-+{

6122

-+	struct bfq_group *bfqg;

6123

-+	int i;

6124

-+

6125

-+	bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);

6126

-+	if (bfqg == NULL)

6127

-+		return NULL;

6128

-+

6129

-+	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)

6130

-+		bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;

6131

-+

6132

-+	return bfqg;

6133

-+}

6134

-+#endif

6135

-diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c

6136

-new file mode 100644

6137

-index 0000000..7f6b000

6138

---- /dev/null

6139

-+++ b/block/bfq-ioc.c

6140

-@@ -0,0 +1,36 @@

6141

-+/*

6142

-+ * BFQ: I/O context handling.

6143

-+ *

6144

-+ * Based on ideas and code from CFQ:

6145

-+ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk>

6146

-+ *

6147

-+ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it>

6148

-+ *		      Paolo Valente <paolo.valente@×××××××.it>

6149

-+ *

6150

-+ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it>

6151

-+ */

6152

-+

6153

-+/**

6154

-+ * icq_to_bic - convert iocontext queue structure to bfq_io_cq.

6155

-+ * @icq: the iocontext queue.

6156

-+ */

6157

-+static inline struct bfq_io_cq *icq_to_bic(struct io_cq *icq)

6158

-+{

6159

-+	/* bic->icq is the first member, %NULL will convert to %NULL */

6160

-+	return container_of(icq, struct bfq_io_cq, icq);

6161

-+}

6162

-+

6163

-+/**

6164

-+ * bfq_bic_lookup - search into @ioc a bic associated to @bfqd.

6165

-+ * @bfqd: the lookup key.

6166

-+ * @ioc: the io_context of the process doing I/O.

6167

-+ *

6168

-+ * Queue lock must be held.

6169

-+ */

6170

-+static inline struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,

6171

-+					       struct io_context *ioc)

6172

-+{

6173

-+	if (ioc)

6174

-+		return icq_to_bic(ioc_lookup_icq(ioc, bfqd->queue));

6175

-+	return NULL;

6176

-+}

6177

-diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c

6178

-new file mode 100644

6179

-index 0000000..f5f71e4

6180

---- /dev/null

6181

-+++ b/block/bfq-iosched.c

6182

-@@ -0,0 +1,3300 @@

6183

-+/*

6184

-+ * Budget Fair Queueing (BFQ) disk scheduler.

6185

-+ *

6186

-+ * Based on ideas and code from CFQ:

6187

-+ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk>

6188

-+ *

6189

-+ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it>

6190

-+ *		      Paolo Valente <paolo.valente@×××××××.it>

6191

-+ *

6192

-+ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it>

6193

-+ *

6194

-+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file.

6195

-+ *

6196

-+ * BFQ is a proportional share disk scheduling algorithm based on the

6197

-+ * slice-by-slice service scheme of CFQ. But BFQ assigns budgets, measured in

6198

-+ * number of sectors, to tasks instead of time slices. The disk is not granted

6199

-+ * to the in-service task for a given time slice, but until it has exhausted

6200

-+ * its assigned budget. This change from the time to the service domain allows

6201

-+ * BFQ to distribute the disk bandwidth among tasks as desired, without any

6202

-+ * distortion due to ZBR, workload fluctuations or other factors. BFQ uses an

6203

-+ * ad hoc internal scheduler, called B-WF2Q+, to schedule tasks according to

6204

-+ * their budgets (more precisely BFQ schedules queues associated to tasks).

6205

-+ * Thanks to this accurate scheduler, BFQ can afford to assign high budgets to

6206

-+ * disk-bound non-seeky tasks (to boost the throughput), and yet guarantee low

6207

-+ * latencies to interactive and soft real-time applications.

6208

-+ *

6209

-+ * BFQ is described in [1], where also a reference to the initial, more

6210

-+ * theoretical paper on BFQ can be found. The interested reader can find in

6211

-+ * the latter paper full details on the main algorithm as well as formulas of

6212

-+ * the guarantees, plus formal proofs of all the properties. With respect to

6213

-+ * the version of BFQ presented in these papers, this implementation adds a

6214

-+ * few more heuristics, such as the one that guarantees a low latency to soft

6215

-+ * real-time applications, and a hierarchical extension based on H-WF2Q+.

6216

-+ *

6217

-+ * B-WF2Q+ is based on WF2Q+, that is described in [2], together with

6218

-+ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N)

6219

-+ * complexity derives from the one introduced with EEVDF in [3].

6220

-+ *

6221

-+ * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness

6222

-+ *     with the BFQ Disk I/O Scheduler'',

6223

-+ *     Proceedings of the 5th Annual International Systems and Storage

6224

-+ *     Conference (SYSTOR '12), June 2012.

6225

-+ *

6226

-+ * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf

6227

-+ *

6228

-+ * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing

6229

-+ *     Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689,

6230

-+ *     Oct 1997.

6231

-+ *

6232

-+ * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz

6233

-+ *

6234

-+ * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline

6235

-+ *     First: A Flexible and Accurate Mechanism for Proportional Share

6236

-+ *     Resource Allocation,'' technical report.

6237

-+ *

6238

-+ * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf

6239

-+ */

6240

-+#include <linux/module.h>

6241

-+#include <linux/slab.h>

6242

-+#include <linux/blkdev.h>

6243

-+#include <linux/cgroup.h>

6244

-+#include <linux/elevator.h>

6245

-+#include <linux/jiffies.h>

6246

-+#include <linux/rbtree.h>

6247

-+#include <linux/ioprio.h>

6248

-+#include "bfq.h"

6249

-+#include "blk.h"

6250

-+

6251

-+/* Max number of dispatches in one round of service. */

6252

-+static const int bfq_quantum = 4;

6253

-+

6254

-+/* Expiration time of sync (0) and async (1) requests, in jiffies. */

6255

-+static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 };

6256

-+

6257

-+/* Maximum backwards seek, in KiB. */

6258

-+static const int bfq_back_max = 16 * 1024;

6259

-+

6260

-+/* Penalty of a backwards seek, in number of sectors. */

6261

-+static const int bfq_back_penalty = 2;

6262

-+

6263

-+/* Idling period duration, in jiffies. */

6264

-+static int bfq_slice_idle = HZ / 125;

6265

-+

6266

-+/* Default maximum budget values, in sectors and number of requests. */

6267

-+static const int bfq_default_max_budget = 16 * 1024;

6268

-+static const int bfq_max_budget_async_rq = 4;

6269

-+

6270

-+/*

6271

-+ * Async to sync throughput distribution is controlled as follows:

6272

-+ * when an async request is served, the entity is charged the number

6273

-+ * of sectors of the request, multiplied by the factor below

6274

-+ */

6275

-+static const int bfq_async_charge_factor = 10;

6276

-+

6277

-+/* Default timeout values, in jiffies, approximating CFQ defaults. */

6278

-+static const int bfq_timeout_sync = HZ / 8;

6279

-+static int bfq_timeout_async = HZ / 25;

6280

-+

6281

-+struct kmem_cache *bfq_pool;

6282

-+

6283

-+/* Below this threshold (in ms), we consider thinktime immediate. */

6284

-+#define BFQ_MIN_TT		2

6285

-+

6286

-+/* hw_tag detection: parallel requests threshold and min samples needed. */

6287

-+#define BFQ_HW_QUEUE_THRESHOLD	4

6288

-+#define BFQ_HW_QUEUE_SAMPLES	32

6289

-+

6290

-+#define BFQQ_SEEK_THR	 (sector_t)(8 * 1024)

6291

-+#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR)

6292

-+

6293

-+/* Min samples used for peak rate estimation (for autotuning). */

6294

-+#define BFQ_PEAK_RATE_SAMPLES	32

6295

-+

6296

-+/* Shift used for peak rate fixed precision calculations. */

6297

-+#define BFQ_RATE_SHIFT		16

6298

-+

6299

-+/*

6300

-+ * The duration of the weight raising for interactive applications is

6301

-+ * computed automatically (as default behaviour), using the following

6302

-+ * formula: duration = (R / r) * T, where r is the peak rate of the

6303

-+ * disk, and R and T are two reference parameters. In particular, R is

6304

-+ * the peak rate of a reference disk, and T is about the maximum time

6305

-+ * for starting popular large applications on that disk, under BFQ and

6306

-+ * while reading two files in parallel. Finally, BFQ uses two

6307

-+ * different pairs (R, T) depending on whether the disk is rotational

6308

-+ * or non-rotational.

6309

-+ */

6310

-+#define T_rot			(msecs_to_jiffies(5500))

6311

-+#define T_nonrot		(msecs_to_jiffies(2000))

6312

-+/* Next two quantities are in sectors/usec, left-shifted by BFQ_RATE_SHIFT */

6313

-+#define R_rot			17415

6314

-+#define R_nonrot		34791

6315

-+

6316

-+#define BFQ_SERVICE_TREE_INIT	((struct bfq_service_tree)		\

6317

-+				{ RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 })

6318

-+

6319

-+#define RQ_BIC(rq)		((struct bfq_io_cq *) (rq)->elv.priv[0])

6320

-+#define RQ_BFQQ(rq)		((rq)->elv.priv[1])

6321

-+

6322

-+static inline void bfq_schedule_dispatch(struct bfq_data *bfqd);

6323

-+

6324

-+#include "bfq-ioc.c"

6325

-+#include "bfq-sched.c"

6326

-+#include "bfq-cgroup.c"

6327

-+

6328

-+#define bfq_class_idle(bfqq)	((bfqq)->entity.ioprio_class ==\

6329

-+				 IOPRIO_CLASS_IDLE)

6330

-+#define bfq_class_rt(bfqq)	((bfqq)->entity.ioprio_class ==\

6331

-+				 IOPRIO_CLASS_RT)

6332

-+

6333

-+#define bfq_sample_valid(samples)	((samples) > 80)

6334

-+

6335

-+/*

6336

-+ * We regard a request as SYNC, if either it's a read or has the SYNC bit

6337

-+ * set (in which case it could also be a direct WRITE).

6338

-+ */

6339

-+static inline int bfq_bio_sync(struct bio *bio)

6340

-+{

6341

-+	if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC))

6342

-+		return 1;

6343

-+

6344

-+	return 0;

6345

-+}

6346

-+

6347

-+/*

6348

-+ * Scheduler run of queue, if there are requests pending and no one in the

6349

-+ * driver that will restart queueing.

6350

-+ */

6351

-+static inline void bfq_schedule_dispatch(struct bfq_data *bfqd)

6352

-+{

6353

-+	if (bfqd->queued != 0) {

6354

-+		bfq_log(bfqd, "schedule dispatch");

6355

-+		kblockd_schedule_work(bfqd->queue, &bfqd->unplug_work);

6356

-+	}

6357

-+}

6358

-+

6359

-+/*

6360

-+ * Lifted from AS - choose which of rq1 and rq2 that is best served now.

6361

-+ * We choose the request that is closesr to the head right now.  Distance

6362

-+ * behind the head is penalized and only allowed to a certain extent.

6363

-+ */

6364

-+static struct request *bfq_choose_req(struct bfq_data *bfqd,

6365

-+				      struct request *rq1,

6366

-+				      struct request *rq2,

6367

-+				      sector_t last)

6368

-+{

6369

-+	sector_t s1, s2, d1 = 0, d2 = 0;

6370

-+	unsigned long back_max;

6371

-+#define BFQ_RQ1_WRAP	0x01 /* request 1 wraps */

6372

-+#define BFQ_RQ2_WRAP	0x02 /* request 2 wraps */

6373

-+	unsigned wrap = 0; /* bit mask: requests behind the disk head? */

6374

-+

6375

-+	if (rq1 == NULL || rq1 == rq2)

6376

-+		return rq2;

6377

-+	if (rq2 == NULL)

6378

-+		return rq1;

6379

-+

6380

-+	if (rq_is_sync(rq1) && !rq_is_sync(rq2))

6381

-+		return rq1;

6382

-+	else if (rq_is_sync(rq2) && !rq_is_sync(rq1))

6383

-+		return rq2;

6384

-+	if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))

6385

-+		return rq1;

6386

-+	else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META))

6387

-+		return rq2;

6388

-+

6389

-+	s1 = blk_rq_pos(rq1);

6390

-+	s2 = blk_rq_pos(rq2);

6391

-+

6392

-+	/*

6393

-+	 * By definition, 1KiB is 2 sectors.

6394

-+	 */

6395

-+	back_max = bfqd->bfq_back_max * 2;

6396

-+

6397

-+	/*

6398

-+	 * Strict one way elevator _except_ in the case where we allow

6399

-+	 * short backward seeks which are biased as twice the cost of a

6400

-+	 * similar forward seek.

6401

-+	 */

6402

-+	if (s1 >= last)

6403

-+		d1 = s1 - last;

6404

-+	else if (s1 + back_max >= last)

6405

-+		d1 = (last - s1) * bfqd->bfq_back_penalty;

6406

-+	else

6407

-+		wrap |= BFQ_RQ1_WRAP;

6408

-+

6409

-+	if (s2 >= last)

6410

-+		d2 = s2 - last;

6411

-+	else if (s2 + back_max >= last)

6412

-+		d2 = (last - s2) * bfqd->bfq_back_penalty;

6413

-+	else

6414

-+		wrap |= BFQ_RQ2_WRAP;

6415

-+

6416

-+	/* Found required data */

6417

-+

6418

-+	/*

6419

-+	 * By doing switch() on the bit mask "wrap" we avoid having to

6420

-+	 * check two variables for all permutations: --> faster!

6421

-+	 */

6422

-+	switch (wrap) {

6423

-+	case 0: /* common case for CFQ: rq1 and rq2 not wrapped */

6424

-+		if (d1 < d2)

6425

-+			return rq1;

6426

-+		else if (d2 < d1)

6427

-+			return rq2;

6428

-+		else {

6429

-+			if (s1 >= s2)

6430

-+				return rq1;

6431

-+			else

6432

-+				return rq2;

6433

-+		}

6434

-+

6435

-+	case BFQ_RQ2_WRAP:

6436

-+		return rq1;

6437

-+	case BFQ_RQ1_WRAP:

6438

-+		return rq2;

6439

-+	case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */

6440

-+	default:

6441

-+		/*

6442

-+		 * Since both rqs are wrapped,

6443

-+		 * start with the one that's further behind head

6444

-+		 * (--> only *one* back seek required),

6445

-+		 * since back seek takes more time than forward.

6446

-+		 */

6447

-+		if (s1 <= s2)

6448

-+			return rq1;

6449

-+		else

6450

-+			return rq2;

6451

-+	}

6452

-+}

6453

-+

6454

-+static struct bfq_queue *

6455

-+bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root,

6456

-+		     sector_t sector, struct rb_node **ret_parent,

6457

-+		     struct rb_node ***rb_link)

6458

-+{

6459

-+	struct rb_node **p, *parent;

6460

-+	struct bfq_queue *bfqq = NULL;

6461

-+

6462

-+	parent = NULL;

6463

-+	p = &root->rb_node;

6464

-+	while (*p) {

6465

-+		struct rb_node **n;

6466

-+

6467

-+		parent = *p;

6468

-+		bfqq = rb_entry(parent, struct bfq_queue, pos_node);

6469

-+

6470

-+		/*

6471

-+		 * Sort strictly based on sector. Smallest to the left,

6472

-+		 * largest to the right.

6473

-+		 */

6474

-+		if (sector > blk_rq_pos(bfqq->next_rq))

6475

-+			n = &(*p)->rb_right;

6476

-+		else if (sector < blk_rq_pos(bfqq->next_rq))

6477

-+			n = &(*p)->rb_left;

6478

-+		else

6479

-+			break;

6480

-+		p = n;

6481

-+		bfqq = NULL;

6482

-+	}

6483

-+

6484

-+	*ret_parent = parent;

6485

-+	if (rb_link)

6486

-+		*rb_link = p;

6487

-+

6488

-+	bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d",

6489

-+		(long long unsigned)sector,

6490

-+		bfqq != NULL ? bfqq->pid : 0);

6491

-+

6492

-+	return bfqq;

6493

-+}

6494

-+

6495

-+static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq)

6496

-+{

6497

-+	struct rb_node **p, *parent;

6498

-+	struct bfq_queue *__bfqq;

6499

-+

6500

-+	if (bfqq->pos_root != NULL) {

6501

-+		rb_erase(&bfqq->pos_node, bfqq->pos_root);

6502

-+		bfqq->pos_root = NULL;

6503

-+	}

6504

-+

6505

-+	if (bfq_class_idle(bfqq))

6506

-+		return;

6507

-+	if (!bfqq->next_rq)

6508

-+		return;

6509

-+

6510

-+	bfqq->pos_root = &bfqd->rq_pos_tree;

6511

-+	__bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,

6512

-+			blk_rq_pos(bfqq->next_rq), &parent, &p);

6513

-+	if (__bfqq == NULL) {

6514

-+		rb_link_node(&bfqq->pos_node, parent, p);

6515

-+		rb_insert_color(&bfqq->pos_node, bfqq->pos_root);

6516

-+	} else

6517

-+		bfqq->pos_root = NULL;

6518

-+}

6519

-+

6520

-+static struct request *bfq_find_next_rq(struct bfq_data *bfqd,

6521

-+					struct bfq_queue *bfqq,

6522

-+					struct request *last)

6523

-+{

6524

-+	struct rb_node *rbnext = rb_next(&last->rb_node);

6525

-+	struct rb_node *rbprev = rb_prev(&last->rb_node);

6526

-+	struct request *next = NULL, *prev = NULL;

6527

-+

6528

-+	BUG_ON(RB_EMPTY_NODE(&last->rb_node));

6529

-+

6530

-+	if (rbprev != NULL)

6531

-+		prev = rb_entry_rq(rbprev);

6532

-+

6533

-+	if (rbnext != NULL)

6534

-+		next = rb_entry_rq(rbnext);

6535

-+	else {

6536

-+		rbnext = rb_first(&bfqq->sort_list);

6537

-+		if (rbnext && rbnext != &last->rb_node)

6538

-+			next = rb_entry_rq(rbnext);

6539

-+	}

6540

-+

6541

-+	return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last));

6542

-+}

6543

-+

6544

-+static void bfq_del_rq_rb(struct request *rq)

6545

-+{

6546

-+	struct bfq_queue *bfqq = RQ_BFQQ(rq);

6547

-+	struct bfq_data *bfqd = bfqq->bfqd;

6548

-+	const int sync = rq_is_sync(rq);

6549

-+

6550

-+	BUG_ON(bfqq->queued[sync] == 0);

6551

-+	bfqq->queued[sync]--;

6552

-+	bfqd->queued--;

6553

-+

6554

-+	elv_rb_del(&bfqq->sort_list, rq);

6555

-+

6556

-+	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {

6557

-+		if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue)

6558

-+			bfq_del_bfqq_busy(bfqd, bfqq, 1);

6559

-+		/*

6560

-+		 * Remove queue from request-position tree as it is empty.

6561

-+		 */

6562

-+		if (bfqq->pos_root != NULL) {

6563

-+			rb_erase(&bfqq->pos_node, bfqq->pos_root);

6564

-+			bfqq->pos_root = NULL;

6565

-+		}

6566

-+	}

6567

-+}

6568

-+

6569

-+/* see the definition of bfq_async_charge_factor for details */

6570

-+static inline unsigned long bfq_serv_to_charge(struct request *rq,

6571

-+					       struct bfq_queue *bfqq)

6572

-+{

6573

-+	return blk_rq_sectors(rq) *

6574

-+		(1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->raising_coeff == 1) *

6575

-+		bfq_async_charge_factor));

6576

-+}

6577

-+

6578

-+/**

6579

-+ * bfq_updated_next_req - update the queue after a new next_rq selection.

6580

-+ * @bfqd: the device data the queue belongs to.

6581

-+ * @bfqq: the queue to update.

6582

-+ *

6583

-+ * If the first request of a queue changes we make sure that the queue

6584

-+ * has enough budget to serve at least its first request (if the

6585

-+ * request has grown).  We do this because if the queue has not enough

6586

-+ * budget for its first request, it has to go through two dispatch

6587

-+ * rounds to actually get it dispatched.

6588

-+ */

6589

-+static void bfq_updated_next_req(struct bfq_data *bfqd,

6590

-+				 struct bfq_queue *bfqq)

6591

-+{

6592

-+	struct bfq_entity *entity = &bfqq->entity;

6593

-+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);

6594

-+	struct request *next_rq = bfqq->next_rq;

6595

-+	unsigned long new_budget;

6596

-+

6597

-+	if (next_rq == NULL)

6598

-+		return;

6599

-+

6600

-+	if (bfqq == bfqd->in_service_queue)

6601

-+		/*

6602

-+		 * In order not to break guarantees, budgets cannot be

6603

-+		 * changed after an entity has been selected.

6604

-+		 */

6605

-+		return;

6606

-+

6607

-+	BUG_ON(entity->tree != &st->active);

6608

-+	BUG_ON(entity == entity->sched_data->in_service_entity);

6609

-+

6610

-+	new_budget = max_t(unsigned long, bfqq->max_budget,

6611

-+			   bfq_serv_to_charge(next_rq, bfqq));

6612

-+	entity->budget = new_budget;

6613

-+	bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", new_budget);

6614

-+	bfq_activate_bfqq(bfqd, bfqq);

6615

-+}

6616

-+

6617

-+static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd)

6618

-+{

6619

-+	u64 dur;

6620

-+

6621

-+	if (bfqd->bfq_raising_max_time > 0)

6622

-+		return bfqd->bfq_raising_max_time;

6623

-+

6624

-+	dur = bfqd->RT_prod;

6625

-+	do_div(dur, bfqd->peak_rate);

6626

-+

6627

-+	return dur;

6628

-+}

6629

-+

6630

-+static void bfq_add_rq_rb(struct request *rq)

6631

-+{

6632

-+	struct bfq_queue *bfqq = RQ_BFQQ(rq);

6633

-+	struct bfq_entity *entity = &bfqq->entity;

6634

-+	struct bfq_data *bfqd = bfqq->bfqd;

6635

-+	struct request *next_rq, *prev;

6636

-+	unsigned long old_raising_coeff = bfqq->raising_coeff;

6637

-+	int idle_for_long_time = 0;

6638

-+

6639

-+	bfq_log_bfqq(bfqd, bfqq, "add_rq_rb %d", rq_is_sync(rq));

6640

-+	bfqq->queued[rq_is_sync(rq)]++;

6641

-+	bfqd->queued++;

6642

-+

6643

-+	elv_rb_add(&bfqq->sort_list, rq);

6644

-+

6645

-+	/*

6646

-+	 * Check if this request is a better next-serve candidate.

6647

-+	 */

6648

-+	prev = bfqq->next_rq;

6649

-+	next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);

6650

-+	BUG_ON(next_rq == NULL);

6651

-+	bfqq->next_rq = next_rq;

6652

-+

6653

-+	/*

6654

-+	 * Adjust priority tree position, if next_rq changes.

6655

-+	 */

6656

-+	if (prev != bfqq->next_rq)

6657

-+		bfq_rq_pos_tree_add(bfqd, bfqq);

6658

-+

6659

-+	if (!bfq_bfqq_busy(bfqq)) {

6660

-+		int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 &&

6661

-+			time_is_before_jiffies(bfqq->soft_rt_next_start);

6662

-+		idle_for_long_time = time_is_before_jiffies(

6663

-+			bfqq->budget_timeout +

6664

-+			bfqd->bfq_raising_min_idle_time);

6665

-+		entity->budget = max_t(unsigned long, bfqq->max_budget,

6666

-+				       bfq_serv_to_charge(next_rq, bfqq));

6667

-+

6668

-+		if (!bfqd->low_latency)

6669

-+			goto add_bfqq_busy;

6670

-+

6671

-+		/*

6672

-+		 * If the queue is not being boosted and has been idle

6673

-+		 * for enough time, start a weight-raising period

6674

-+		 */

6675

-+		if (old_raising_coeff == 1 &&

6676

-+		    (idle_for_long_time || soft_rt)) {

6677

-+			bfqq->raising_coeff = bfqd->bfq_raising_coeff;

6678

-+			if (idle_for_long_time)

6679

-+				bfqq->raising_cur_max_time =

6680

-+					bfq_wrais_duration(bfqd);

6681

-+			else

6682

-+				bfqq->raising_cur_max_time =

6683

-+					bfqd->bfq_raising_rt_max_time;

6684

-+			bfq_log_bfqq(bfqd, bfqq,

6685

-+				     "wrais starting at %lu, "

6686

-+				     "rais_max_time %u",

6687

-+				     jiffies,

6688

-+				     jiffies_to_msecs(bfqq->

6689

-+					raising_cur_max_time));

6690

-+		} else if (old_raising_coeff > 1) {

6691

-+			if (idle_for_long_time)

6692

-+				bfqq->raising_cur_max_time =

6693

-+					bfq_wrais_duration(bfqd);

6694

-+			else if (bfqq->raising_cur_max_time ==

6695

-+				 bfqd->bfq_raising_rt_max_time &&

6696

-+				 !soft_rt) {

6697

-+				bfqq->raising_coeff = 1;

6698

-+				bfq_log_bfqq(bfqd, bfqq,

6699

-+					     "wrais ending at %lu, "

6700

-+					     "rais_max_time %u",

6701

-+					     jiffies,

6702

-+					     jiffies_to_msecs(bfqq->

6703

-+						raising_cur_max_time));

6704

-+			} else if (time_before(

6705

-+					bfqq->last_rais_start_finish +

6706

-+					bfqq->raising_cur_max_time,

6707

-+					jiffies +

6708

-+					bfqd->bfq_raising_rt_max_time) &&

6709

-+				   soft_rt) {

6710

-+				/*

6711

-+				 *

6712

-+				 * The remaining weight-raising time is lower

6713

-+				 * than bfqd->bfq_raising_rt_max_time, which

6714

-+				 * means that the application is enjoying

6715

-+				 * weight raising either because deemed soft-

6716

-+				 * rt in the near past, or because deemed

6717

-+				 * interactive a long ago. In both cases,

6718

-+				 * resetting now the current remaining weight-

6719

-+				 * raising time for the application to the

6720

-+				 * weight-raising duration for soft rt

6721

-+				 * applications would not cause any latency

6722

-+				 * increase for the application (as the new

6723

-+				 * duration would be higher than the remaining

6724

-+				 * time).

6725

-+				 *

6726

-+				 * In addition, the application is now meeting

6727

-+				 * the requirements for being deemed soft rt.

6728

-+				 * In the end we can correctly and safely

6729

-+				 * (re)charge the weight-raising duration for

6730

-+				 * the application with the weight-raising

6731

-+				 * duration for soft rt applications.

6732

-+				 *

6733

-+				 * In particular, doing this recharge now, i.e.,

6734

-+				 * before the weight-raising period for the

6735

-+				 * application finishes, reduces the probability

6736

-+				 * of the following negative scenario:

6737

-+				 * 1) the weight of a soft rt application is

6738

-+				 *    raised at startup (as for any newly

6739

-+				 *    created application),

6740

-+				 * 2) since the application is not interactive,

6741

-+				 *    at a certain time weight-raising is

6742

-+				 *    stopped for the application,

6743

-+				 * 3) at that time the application happens to

6744

-+				 *    still have pending requests, and hence

6745

-+				 *    is destined to not have a chance to be

6746

-+				 *    deemed soft rt before these requests are

6747

-+				 *    completed (see the comments to the

6748

-+				 *    function bfq_bfqq_softrt_next_start()

6749

-+				 *    for details on soft rt detection),

6750

-+				 * 4) these pending requests experience a high

6751

-+				 *    latency because the application is not

6752

-+				 *    weight-raised while they are pending.

6753

-+				 */

6754

-+				bfqq->last_rais_start_finish = jiffies;

6755

-+				bfqq->raising_cur_max_time =

6756

-+					bfqd->bfq_raising_rt_max_time;

6757

-+			}

6758

-+		}

6759

-+		if (old_raising_coeff != bfqq->raising_coeff)

6760

-+			entity->ioprio_changed = 1;

6761

-+add_bfqq_busy:

6762

-+		bfqq->last_idle_bklogged = jiffies;

6763

-+		bfqq->service_from_backlogged = 0;

6764

-+		bfq_clear_bfqq_softrt_update(bfqq);

6765

-+		bfq_add_bfqq_busy(bfqd, bfqq);

6766

-+	} else {

6767

-+		if (bfqd->low_latency && old_raising_coeff == 1 &&

6768

-+			!rq_is_sync(rq) &&

6769

-+			time_is_before_jiffies(

6770

-+				bfqq->last_rais_start_finish +

6771

-+				bfqd->bfq_raising_min_inter_arr_async)) {

6772

-+			bfqq->raising_coeff = bfqd->bfq_raising_coeff;

6773

-+			bfqq->raising_cur_max_time = bfq_wrais_duration(bfqd);

6774

-+

6775

-+			bfqd->raised_busy_queues++;

6776

-+			entity->ioprio_changed = 1;

6777

-+			bfq_log_bfqq(bfqd, bfqq,

6778

-+				     "non-idle wrais starting at %lu, "

6779

-+				     "rais_max_time %u",

6780

-+				     jiffies,

6781

-+				     jiffies_to_msecs(bfqq->

6782

-+					raising_cur_max_time));

6783

-+		}

6784

-+		bfq_updated_next_req(bfqd, bfqq);

6785

-+	}

6786

-+

6787

-+	if (bfqd->low_latency &&

6788

-+		(old_raising_coeff == 1 || bfqq->raising_coeff == 1 ||

6789

-+		 idle_for_long_time))

6790

-+		bfqq->last_rais_start_finish = jiffies;

6791

-+}

6792

-+

6793

-+static void bfq_reposition_rq_rb(struct bfq_queue *bfqq, struct request *rq)

6794

-+{

6795

-+	elv_rb_del(&bfqq->sort_list, rq);

6796

-+	bfqq->queued[rq_is_sync(rq)]--;

6797

-+	bfqq->bfqd->queued--;

6798

-+	bfq_add_rq_rb(rq);

6799

-+}

6800

-+

6801

-+static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd,

6802

-+					  struct bio *bio)

6803

-+{

6804

-+	struct task_struct *tsk = current;

6805

-+	struct bfq_io_cq *bic;

6806

-+	struct bfq_queue *bfqq;

6807

-+

6808

-+	bic = bfq_bic_lookup(bfqd, tsk->io_context);

6809

-+	if (bic == NULL)

6810

-+		return NULL;

6811

-+

6812

-+	bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));

6813

-+	if (bfqq != NULL)

6814

-+		return elv_rb_find(&bfqq->sort_list, bio_end_sector(bio));

6815

-+

6816

-+	return NULL;

6817

-+}

6818

-+

6819

-+static void bfq_activate_request(struct request_queue *q, struct request *rq)

6820

-+{

6821

-+	struct bfq_data *bfqd = q->elevator->elevator_data;

6822

-+

6823

-+	bfqd->rq_in_driver++;

6824

-+	bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);

6825

-+	bfq_log(bfqd, "activate_request: new bfqd->last_position %llu",

6826

-+		(long long unsigned)bfqd->last_position);

6827

-+}

6828

-+

6829

-+static void bfq_deactivate_request(struct request_queue *q, struct request *rq)

6830

-+{

6831

-+	struct bfq_data *bfqd = q->elevator->elevator_data;

6832

-+

6833

-+	WARN_ON(bfqd->rq_in_driver == 0);

6834

-+	bfqd->rq_in_driver--;

6835

-+}

6836

-+

6837

-+static void bfq_remove_request(struct request *rq)

6838

-+{

6839

-+	struct bfq_queue *bfqq = RQ_BFQQ(rq);

6840

-+	struct bfq_data *bfqd = bfqq->bfqd;

6841

-+

6842

-+	if (bfqq->next_rq == rq) {

6843

-+		bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq);

6844

-+		bfq_updated_next_req(bfqd, bfqq);

6845

-+	}

6846

-+

6847

-+	list_del_init(&rq->queuelist);

6848

-+	bfq_del_rq_rb(rq);

6849

-+

6850

-+	if (rq->cmd_flags & REQ_META) {

6851

-+		WARN_ON(bfqq->meta_pending == 0);

6852

-+		bfqq->meta_pending--;

6853

-+	}

6854

-+}

6855

-+

6856

-+static int bfq_merge(struct request_queue *q, struct request **req,

6857

-+		     struct bio *bio)

6858

-+{

6859

-+	struct bfq_data *bfqd = q->elevator->elevator_data;

6860

-+	struct request *__rq;

6861

-+

6862

-+	__rq = bfq_find_rq_fmerge(bfqd, bio);

6863

-+	if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) {

6864

-+		*req = __rq;

6865

-+		return ELEVATOR_FRONT_MERGE;

6866

-+	}

6867

-+

6868

-+	return ELEVATOR_NO_MERGE;

6869

-+}

6870

-+

6871

-+static void bfq_merged_request(struct request_queue *q, struct request *req,

6872

-+			       int type)

6873

-+{

6874

-+	if (type == ELEVATOR_FRONT_MERGE) {

6875

-+		struct bfq_queue *bfqq = RQ_BFQQ(req);

6876

-+

6877

-+		bfq_reposition_rq_rb(bfqq, req);

6878

-+	}

6879

-+}

6880

-+

6881

-+static void bfq_merged_requests(struct request_queue *q, struct request *rq,

6882

-+				struct request *next)

6883

-+{

6884

-+	struct bfq_queue *bfqq = RQ_BFQQ(rq);

6885

-+

6886

-+	/*

6887

-+	 * Reposition in fifo if next is older than rq.

6888

-+	 */

6889

-+	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&

6890

-+	    time_before(rq_fifo_time(next), rq_fifo_time(rq))) {

6891

-+		list_move(&rq->queuelist, &next->queuelist);

6892

-+		rq_set_fifo_time(rq, rq_fifo_time(next));

6893

-+	}

6894

-+

6895

-+	if (bfqq->next_rq == next)

6896

-+		bfqq->next_rq = rq;

6897

-+

6898

-+	bfq_remove_request(next);

6899

-+}

6900

-+

6901

-+/* Must be called with bfqq != NULL */

6902

-+static inline void bfq_bfqq_end_raising(struct bfq_queue *bfqq)

6903

-+{

6904

-+	BUG_ON(bfqq == NULL);

6905

-+	if (bfq_bfqq_busy(bfqq))

6906

-+		bfqq->bfqd->raised_busy_queues--;

6907

-+	bfqq->raising_coeff = 1;

6908

-+	bfqq->raising_cur_max_time = 0;

6909

-+	/* Trigger a weight change on the next activation of the queue */

6910

-+	bfqq->entity.ioprio_changed = 1;

6911

-+}

6912

-+

6913

-+static void bfq_end_raising_async_queues(struct bfq_data *bfqd,

6914

-+					struct bfq_group *bfqg)

6915

-+{

6916

-+	int i, j;

6917

-+

6918

-+	for (i = 0; i < 2; i++)

6919

-+		for (j = 0; j < IOPRIO_BE_NR; j++)

6920

-+			if (bfqg->async_bfqq[i][j] != NULL)

6921

-+				bfq_bfqq_end_raising(bfqg->async_bfqq[i][j]);

6922

-+	if (bfqg->async_idle_bfqq != NULL)

6923

-+		bfq_bfqq_end_raising(bfqg->async_idle_bfqq);

6924

-+}

6925

-+

6926

-+static void bfq_end_raising(struct bfq_data *bfqd)

6927

-+{

6928

-+	struct bfq_queue *bfqq;

6929

-+

6930

-+	spin_lock_irq(bfqd->queue->queue_lock);

6931

-+

6932

-+	list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list)

6933

-+		bfq_bfqq_end_raising(bfqq);

6934

-+	list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list)

6935

-+		bfq_bfqq_end_raising(bfqq);

6936

-+	bfq_end_raising_async(bfqd);

6937

-+

6938

-+	spin_unlock_irq(bfqd->queue->queue_lock);

6939

-+}

6940

-+

6941

-+static int bfq_allow_merge(struct request_queue *q, struct request *rq,

6942

-+			   struct bio *bio)

6943

-+{

6944

-+	struct bfq_data *bfqd = q->elevator->elevator_data;

6945

-+	struct bfq_io_cq *bic;

6946

-+	struct bfq_queue *bfqq;

6947

-+

6948

-+	/*

6949

-+	 * Disallow merge of a sync bio into an async request.

6950

-+	 */

6951

-+	if (bfq_bio_sync(bio) && !rq_is_sync(rq))

6952

-+		return 0;

6953

-+

6954

-+	/*

6955

-+	 * Lookup the bfqq that this bio will be queued with. Allow

6956

-+	 * merge only if rq is queued there.

6957

-+	 * Queue lock is held here.

6958

-+	 */

6959

-+	bic = bfq_bic_lookup(bfqd, current->io_context);

6960

-+	if (bic == NULL)

6961

-+		return 0;

6962

-+

6963

-+	bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));

6964

-+	return bfqq == RQ_BFQQ(rq);

6965

-+}

6966

-+

6967

-+static void __bfq_set_in_service_queue(struct bfq_data *bfqd,

6968

-+				       struct bfq_queue *bfqq)

6969

-+{

6970

-+	if (bfqq != NULL) {

6971

-+		bfq_mark_bfqq_must_alloc(bfqq);

6972

-+		bfq_mark_bfqq_budget_new(bfqq);

6973

-+		bfq_clear_bfqq_fifo_expire(bfqq);

6974

-+

6975

-+		bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;

6976

-+

6977

-+		bfq_log_bfqq(bfqd, bfqq,

6978

-+			     "set_in_service_queue, cur-budget = %lu",

6979

-+			     bfqq->entity.budget);

6980

-+	}

6981

-+

6982

-+	bfqd->in_service_queue = bfqq;

6983

-+}

6984

-+

6985

-+/*

6986

-+ * Get and set a new queue for service.

6987

-+ */

6988

-+static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd,

6989

-+						  struct bfq_queue *bfqq)

6990

-+{

6991

-+	if (!bfqq)

6992

-+		bfqq = bfq_get_next_queue(bfqd);

6993

-+	else

6994

-+		bfq_get_next_queue_forced(bfqd, bfqq);

6995

-+

6996

-+	__bfq_set_in_service_queue(bfqd, bfqq);

6997

-+	return bfqq;

6998

-+}

6999

-+

7000

-+static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd,

7001

-+					  struct request *rq)

7002

-+{

7003

-+	if (blk_rq_pos(rq) >= bfqd->last_position)

7004

-+		return blk_rq_pos(rq) - bfqd->last_position;

7005

-+	else

7006

-+		return bfqd->last_position - blk_rq_pos(rq);

7007

-+}

7008

-+

7009

-+/*

7010

-+ * Return true if bfqq has no request pending and rq is close enough to

7011

-+ * bfqd->last_position, or if rq is closer to bfqd->last_position than

7012

-+ * bfqq->next_rq

7013

-+ */

7014

-+static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq)

7015

-+{

7016

-+	return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR;

7017

-+}

7018

-+

7019

-+static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)

7020

-+{

7021

-+	struct rb_root *root = &bfqd->rq_pos_tree;

7022

-+	struct rb_node *parent, *node;

7023

-+	struct bfq_queue *__bfqq;

7024

-+	sector_t sector = bfqd->last_position;

7025

-+

7026

-+	if (RB_EMPTY_ROOT(root))

7027

-+		return NULL;

7028

-+

7029

-+	/*

7030

-+	 * First, if we find a request starting at the end of the last

7031

-+	 * request, choose it.

7032

-+	 */

7033

-+	__bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL);

7034

-+	if (__bfqq != NULL)

7035

-+		return __bfqq;

7036

-+

7037

-+	/*

7038

-+	 * If the exact sector wasn't found, the parent of the NULL leaf

7039

-+	 * will contain the closest sector (rq_pos_tree sorted by next_request

7040

-+	 * position).

7041

-+	 */

7042

-+	__bfqq = rb_entry(parent, struct bfq_queue, pos_node);

7043

-+	if (bfq_rq_close(bfqd, __bfqq->next_rq))

7044

-+		return __bfqq;

7045

-+

7046

-+	if (blk_rq_pos(__bfqq->next_rq) < sector)

7047

-+		node = rb_next(&__bfqq->pos_node);

7048

-+	else

7049

-+		node = rb_prev(&__bfqq->pos_node);

7050

-+	if (node == NULL)

7051

-+		return NULL;

7052

-+

7053

-+	__bfqq = rb_entry(node, struct bfq_queue, pos_node);

7054

-+	if (bfq_rq_close(bfqd, __bfqq->next_rq))

7055

-+		return __bfqq;

7056

-+

7057

-+	return NULL;

7058

-+}

7059

-+

7060

-+/*

7061

-+ * bfqd - obvious

7062

-+ * cur_bfqq - passed in so that we don't decide that the current queue

7063

-+ *            is closely cooperating with itself.

7064

-+ *

7065

-+ * We are assuming that cur_bfqq has dispatched at least one request,

7066

-+ * and that bfqd->last_position reflects a position on the disk associated

7067

-+ * with the I/O issued by cur_bfqq.

7068

-+ */

7069

-+static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,

7070

-+					      struct bfq_queue *cur_bfqq)

7071

-+{

7072

-+	struct bfq_queue *bfqq;

7073

-+

7074

-+	if (bfq_class_idle(cur_bfqq))

7075

-+		return NULL;

7076

-+	if (!bfq_bfqq_sync(cur_bfqq))

7077

-+		return NULL;

7078

-+	if (BFQQ_SEEKY(cur_bfqq))

7079

-+		return NULL;

7080

-+

7081

-+	/* If device has only one backlogged bfq_queue, don't search. */

7082

-+	if (bfqd->busy_queues == 1)

7083

-+		return NULL;

7084

-+

7085

-+	/*

7086

-+	 * We should notice if some of the queues are cooperating, e.g.

7087

-+	 * working closely on the same area of the disk. In that case,

7088

-+	 * we can group them together and don't waste time idling.

7089

-+	 */

7090

-+	bfqq = bfqq_close(bfqd);

7091

-+	if (bfqq == NULL || bfqq == cur_bfqq)

7092

-+		return NULL;

7093

-+

7094

-+	/*

7095

-+	 * Do not merge queues from different bfq_groups.

7096

-+	*/

7097

-+	if (bfqq->entity.parent != cur_bfqq->entity.parent)

7098

-+		return NULL;

7099

-+

7100

-+	/*

7101

-+	 * It only makes sense to merge sync queues.

7102

-+	 */

7103

-+	if (!bfq_bfqq_sync(bfqq))

7104

-+		return NULL;

7105

-+	if (BFQQ_SEEKY(bfqq))

7106

-+		return NULL;

7107

-+

7108

-+	/*

7109

-+	 * Do not merge queues of different priority classes.

7110

-+	 */

7111

-+	if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq))

7112

-+		return NULL;

7113

-+

7114

-+	return bfqq;

7115

-+}

7116

-+

7117

-+/*

7118

-+ * If enough samples have been computed, return the current max budget

7119

-+ * stored in bfqd, which is dynamically updated according to the

7120

-+ * estimated disk peak rate; otherwise return the default max budget

7121

-+ */

7122

-+static inline unsigned long bfq_max_budget(struct bfq_data *bfqd)

7123

-+{

7124

-+	if (bfqd->budgets_assigned < 194)

7125

-+		return bfq_default_max_budget;

7126

-+	else

7127

-+		return bfqd->bfq_max_budget;

7128

-+}

7129

-+

7130

-+/*

7131

-+ * Return min budget, which is a fraction of the current or default

7132

-+ * max budget (trying with 1/32)

7133

-+ */

7134

-+static inline unsigned long bfq_min_budget(struct bfq_data *bfqd)

7135

-+{

7136

-+	if (bfqd->budgets_assigned < 194)

7137

-+		return bfq_default_max_budget / 32;

7138

-+	else

7139

-+		return bfqd->bfq_max_budget / 32;

7140

-+}

7141

-+

7142

-+/*

7143

-+ * Decides whether idling should be done for given device and

7144

-+ * given in-service queue.

7145

-+ */

7146

-+static inline bool bfq_queue_nonrot_noidle(struct bfq_data *bfqd,

7147

-+					   struct bfq_queue *in_service_bfqq)

7148

-+{

7149

-+	if (in_service_bfqq == NULL)

7150

-+		return false;

7151

-+	/*

7152

-+	 * If the device is non-rotational, and hence has no seek penalty,

7153

-+	 * disable idling; but do so only if:

7154

-+	 * - device does not support queuing, otherwise we still have

7155

-+	 *   a problem with sync vs async workloads;

7156

-+	 * - the queue is not weight-raised, to preserve guarantees.

7157

-+	 */

7158

-+	return blk_queue_nonrot(bfqd->queue) && bfqd->hw_tag &&

7159

-+	       (in_service_bfqq->raising_coeff == 1);

7160

-+}

7161

-+

7162

-+static void bfq_arm_slice_timer(struct bfq_data *bfqd)

7163

-+{

7164

-+	struct bfq_queue *bfqq = bfqd->in_service_queue;

7165

-+	struct bfq_io_cq *bic;

7166

-+	unsigned long sl;

7167

-+

7168

-+	WARN_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));

7169

-+

7170

-+	/* Tasks have exited, don't wait. */

7171

-+	bic = bfqd->in_service_bic;

7172

-+	if (bic == NULL || atomic_read(&bic->icq.ioc->active_ref) == 0)

7173

-+		return;

7174

-+

7175

-+	bfq_mark_bfqq_wait_request(bfqq);

7176

-+

7177

-+	/*

7178

-+	 * We don't want to idle for seeks, but we do want to allow

7179

-+	 * fair distribution of slice time for a process doing back-to-back

7180

-+	 * seeks. So allow a little bit of time for him to submit a new rq.

7181

-+	 *

7182

-+	 * To prevent processes with (partly) seeky workloads from

7183

-+	 * being too ill-treated, grant them a small fraction of the

7184

-+	 * assigned budget before reducing the waiting time to

7185

-+	 * BFQ_MIN_TT. This happened to help reduce latency.

7186

-+	 */

7187

-+	sl = bfqd->bfq_slice_idle;

7188

-+	if (bfq_sample_valid(bfqq->seek_samples) && BFQQ_SEEKY(bfqq) &&

7189

-+	    bfqq->entity.service > bfq_max_budget(bfqd) / 8 &&

7190

-+	    bfqq->raising_coeff == 1)

7191

-+		sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT));

7192

-+	else if (bfqq->raising_coeff > 1)

7193

-+		sl = sl * 3;

7194

-+	bfqd->last_idling_start = ktime_get();

7195

-+	mod_timer(&bfqd->idle_slice_timer, jiffies + sl);

7196

-+	bfq_log(bfqd, "arm idle: %u/%u ms",

7197

-+		jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle));

7198

-+}

7199

-+

7200

-+/*

7201

-+ * Set the maximum time for the in-service queue to consume its

7202

-+ * budget. This prevents seeky processes from lowering the disk

7203

-+ * throughput (always guaranteed with a time slice scheme as in CFQ).

7204

-+ */

7205

-+static void bfq_set_budget_timeout(struct bfq_data *bfqd)

7206

-+{

7207

-+	struct bfq_queue *bfqq = bfqd->in_service_queue;

7208

-+	unsigned int timeout_coeff;

7209

-+	if (bfqq->raising_cur_max_time == bfqd->bfq_raising_rt_max_time)

7210

-+		timeout_coeff = 1;

7211

-+	else

7212

-+		timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;

7213

-+

7214

-+	bfqd->last_budget_start = ktime_get();

7215

-+

7216

-+	bfq_clear_bfqq_budget_new(bfqq);

7217

-+	bfqq->budget_timeout = jiffies +

7218

-+		bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff;

7219

-+

7220

-+	bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u",

7221

-+		jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] *

7222

-+		timeout_coeff));

7223

-+}

7224

-+

7225

-+/*

7226

-+ * Move request from internal lists to the request queue dispatch list.

7227

-+ */

7228

-+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)

7229

-+{

7230

-+	struct bfq_data *bfqd = q->elevator->elevator_data;

7231

-+	struct bfq_queue *bfqq = RQ_BFQQ(rq);

7232

-+

7233

-+	bfq_remove_request(rq);

7234

-+	bfqq->dispatched++;

7235

-+	elv_dispatch_sort(q, rq);

7236

-+

7237

-+	if (bfq_bfqq_sync(bfqq))

7238

-+		bfqd->sync_flight++;

7239

-+}

7240

-+

7241

-+/*

7242

-+ * Return expired entry, or NULL to just start from scratch in rbtree.

7243

-+ */

7244

-+static struct request *bfq_check_fifo(struct bfq_queue *bfqq)

7245

-+{

7246

-+	struct request *rq = NULL;

7247

-+

7248

-+	if (bfq_bfqq_fifo_expire(bfqq))

7249

-+		return NULL;

7250

-+

7251

-+	bfq_mark_bfqq_fifo_expire(bfqq);

7252

-+

7253

-+	if (list_empty(&bfqq->fifo))

7254

-+		return NULL;

7255

-+

7256

-+	rq = rq_entry_fifo(bfqq->fifo.next);

7257

-+

7258

-+	if (time_before(jiffies, rq_fifo_time(rq)))

7259

-+		return NULL;

7260

-+

7261

-+	return rq;

7262

-+}

7263

-+

7264

-+/*

7265

-+ * Must be called with the queue_lock held.

7266

-+ */

7267

-+static int bfqq_process_refs(struct bfq_queue *bfqq)

7268

-+{

7269

-+	int process_refs, io_refs;

7270

-+

7271

-+	io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];

7272

-+	process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;

7273

-+	BUG_ON(process_refs < 0);

7274

-+	return process_refs;

7275

-+}

7276

-+

7277

-+static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)

7278

-+{

7279

-+	int process_refs, new_process_refs;

7280

-+	struct bfq_queue *__bfqq;

7281

-+

7282

-+	/*

7283

-+	 * If there are no process references on the new_bfqq, then it is

7284

-+	 * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain

7285

-+	 * may have dropped their last reference (not just their last process

7286

-+	 * reference).

7287

-+	 */

7288

-+	if (!bfqq_process_refs(new_bfqq))

7289

-+		return;

7290

-+

7291

-+	/* Avoid a circular list and skip interim queue merges. */

7292

-+	while ((__bfqq = new_bfqq->new_bfqq)) {

7293

-+		if (__bfqq == bfqq)

7294

-+			return;

7295

-+		new_bfqq = __bfqq;

7296

-+	}

7297

-+

7298

-+	process_refs = bfqq_process_refs(bfqq);

7299

-+	new_process_refs = bfqq_process_refs(new_bfqq);

7300

-+	/*

7301

-+	 * If the process for the bfqq has gone away, there is no

7302

-+	 * sense in merging the queues.

7303

-+	 */

7304

-+	if (process_refs == 0 || new_process_refs == 0)

7305

-+		return;

7306

-+

7307

-+	/*

7308

-+	 * Merge in the direction of the lesser amount of work.

7309

-+	 */

7310

-+	if (new_process_refs >= process_refs) {

7311

-+		bfqq->new_bfqq = new_bfqq;

7312

-+		atomic_add(process_refs, &new_bfqq->ref);

7313

-+	} else {

7314

-+		new_bfqq->new_bfqq = bfqq;

7315

-+		atomic_add(new_process_refs, &bfqq->ref);

7316

-+	}

7317

-+	bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",

7318

-+		new_bfqq->pid);

7319

-+}

7320

-+

7321

-+static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq)

7322

-+{

7323

-+	struct bfq_entity *entity = &bfqq->entity;

7324

-+	return entity->budget - entity->service;

7325

-+}

7326

-+

7327

-+static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)

7328

-+{

7329

-+	BUG_ON(bfqq != bfqd->in_service_queue);

7330

-+

7331

-+	__bfq_bfqd_reset_in_service(bfqd);

7332

-+

7333

-+	/*

7334

-+	 * If this bfqq is shared between multiple processes, check

7335

-+	 * to make sure that those processes are still issuing I/Os

7336

-+	 * within the mean seek distance. If not, it may be time to

7337

-+	 * break the queues apart again.

7338

-+	 */

7339

-+	if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq))

7340

-+		bfq_mark_bfqq_split_coop(bfqq);

7341

-+

7342

-+	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {

7343

-+		/*

7344

-+		 * overloading budget_timeout field to store when

7345

-+		 * the queue remains with no backlog, used by

7346

-+		 * the weight-raising mechanism

7347

-+		 */

7348

-+		bfqq->budget_timeout = jiffies;

7349

-+		bfq_del_bfqq_busy(bfqd, bfqq, 1);

7350

-+	} else {

7351

-+		bfq_activate_bfqq(bfqd, bfqq);

7352

-+		/*

7353

-+		 * Resort priority tree of potential close cooperators.

7354

-+		 */

7355

-+		bfq_rq_pos_tree_add(bfqd, bfqq);

7356

-+	}

7357

-+}

7358

-+

7359

-+/**

7360

-+ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior.

7361

-+ * @bfqd: device data.

7362

-+ * @bfqq: queue to update.

7363

-+ * @reason: reason for expiration.

7364

-+ *

7365

-+ * Handle the feedback on @bfqq budget.  See the body for detailed

7366

-+ * comments.

7367

-+ */

7368

-+static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,

7369

-+				     struct bfq_queue *bfqq,

7370

-+				     enum bfqq_expiration reason)

7371

-+{

7372

-+	struct request *next_rq;

7373

-+	unsigned long budget, min_budget;

7374

-+

7375

-+	budget = bfqq->max_budget;

7376

-+	min_budget = bfq_min_budget(bfqd);

7377

-+

7378

-+	BUG_ON(bfqq != bfqd->in_service_queue);

7379

-+

7380

-+	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu",

7381

-+		bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));

7382

-+	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu",

7383

-+		budget, bfq_min_budget(bfqd));

7384

-+	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d",

7385

-+		bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue));

7386

-+

7387

-+	if (bfq_bfqq_sync(bfqq)) {

7388

-+		switch (reason) {

7389

-+		/*

7390

-+		 * Caveat: in all the following cases we trade latency

7391

-+		 * for throughput.

7392

-+		 */

7393

-+		case BFQ_BFQQ_TOO_IDLE:

7394

-+			/*

7395

-+			 * This is the only case where we may reduce

7396

-+			 * the budget: if there is no request of the

7397

-+			 * process still waiting for completion, then

7398

-+			 * we assume (tentatively) that the timer has

7399

-+			 * expired because the batch of requests of

7400

-+			 * the process could have been served with a

7401

-+			 * smaller budget.  Hence, betting that

7402

-+			 * process will behave in the same way when it

7403

-+			 * becomes backlogged again, we reduce its

7404

-+			 * next budget.  As long as we guess right,

7405

-+			 * this budget cut reduces the latency

7406

-+			 * experienced by the process.

7407

-+			 *

7408

-+			 * However, if there are still outstanding

7409

-+			 * requests, then the process may have not yet

7410

-+			 * issued its next request just because it is

7411

-+			 * still waiting for the completion of some of

7412

-+			 * the still outstanding ones.  So in this

7413

-+			 * subcase we do not reduce its budget, on the

7414

-+			 * contrary we increase it to possibly boost

7415

-+			 * the throughput, as discussed in the

7416

-+			 * comments to the BUDGET_TIMEOUT case.

7417

-+			 */

7418

-+			if (bfqq->dispatched > 0) /* still outstanding reqs */

7419

-+				budget = min(budget * 2, bfqd->bfq_max_budget);

7420

-+			else {

7421

-+				if (budget > 5 * min_budget)

7422

-+					budget -= 4 * min_budget;

7423

-+				else

7424

-+					budget = min_budget;

7425

-+			}

7426

-+			break;

7427

-+		case BFQ_BFQQ_BUDGET_TIMEOUT:

7428

-+			/*

7429

-+			 * We double the budget here because: 1) it

7430

-+			 * gives the chance to boost the throughput if

7431

-+			 * this is not a seeky process (which may have

7432

-+			 * bumped into this timeout because of, e.g.,

7433

-+			 * ZBR), 2) together with charge_full_budget

7434

-+			 * it helps give seeky processes higher

7435

-+			 * timestamps, and hence be served less

7436

-+			 * frequently.

7437

-+			 */

7438

-+			budget = min(budget * 2, bfqd->bfq_max_budget);

7439

-+			break;

7440

-+		case BFQ_BFQQ_BUDGET_EXHAUSTED:

7441

-+			/*

7442

-+			 * The process still has backlog, and did not

7443

-+			 * let either the budget timeout or the disk

7444

-+			 * idling timeout expire. Hence it is not

7445

-+			 * seeky, has a short thinktime and may be

7446

-+			 * happy with a higher budget too. So

7447

-+			 * definitely increase the budget of this good

7448

-+			 * candidate to boost the disk throughput.

7449

-+			 */

7450

-+			budget = min(budget * 4, bfqd->bfq_max_budget);

7451

-+			break;

7452

-+		case BFQ_BFQQ_NO_MORE_REQUESTS:

7453

-+		       /*

7454

-+			* Leave the budget unchanged.

7455

-+			*/

7456

-+		default:

7457

-+			return;

7458

-+		}

7459

-+	} else /* async queue */

7460

-+	    /* async queues get always the maximum possible budget

7461

-+	     * (their ability to dispatch is limited by

7462

-+	     * @bfqd->bfq_max_budget_async_rq).

7463

-+	     */

7464

-+		budget = bfqd->bfq_max_budget;

7465

-+

7466

-+	bfqq->max_budget = budget;

7467

-+

7468

-+	if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 &&

7469

-+	    bfqq->max_budget > bfqd->bfq_max_budget)

7470

-+		bfqq->max_budget = bfqd->bfq_max_budget;

7471

-+

7472

-+	/*

7473

-+	 * Make sure that we have enough budget for the next request.

7474

-+	 * Since the finish time of the bfqq must be kept in sync with

7475

-+	 * the budget, be sure to call __bfq_bfqq_expire() after the

7476

-+	 * update.

7477

-+	 */

7478

-+	next_rq = bfqq->next_rq;

7479

-+	if (next_rq != NULL)

7480

-+		bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget,

7481

-+					    bfq_serv_to_charge(next_rq, bfqq));

7482

-+	else

7483

-+		bfqq->entity.budget = bfqq->max_budget;

7484

-+

7485

-+	bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu",

7486

-+			next_rq != NULL ? blk_rq_sectors(next_rq) : 0,

7487

-+			bfqq->entity.budget);

7488

-+}

7489

-+

7490

-+static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout)

7491

-+{

7492

-+	unsigned long max_budget;

7493

-+

7494

-+	/*

7495

-+	 * The max_budget calculated when autotuning is equal to the

7496

-+	 * amount of sectors transfered in timeout_sync at the

7497

-+	 * estimated peak rate.

7498

-+	 */

7499

-+	max_budget = (unsigned long)(peak_rate * 1000 *

7500

-+				     timeout >> BFQ_RATE_SHIFT);

7501

-+

7502

-+	return max_budget;

7503

-+}

7504

-+

7505

-+/*

7506

-+ * In addition to updating the peak rate, checks whether the process

7507

-+ * is "slow", and returns 1 if so. This slow flag is used, in addition

7508

-+ * to the budget timeout, to reduce the amount of service provided to

7509

-+ * seeky processes, and hence reduce their chances to lower the

7510

-+ * throughput. See the code for more details.

7511

-+ */

7512

-+static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,

7513

-+				int compensate, enum bfqq_expiration reason)

7514

-+{

7515

-+	u64 bw, usecs, expected, timeout;

7516

-+	ktime_t delta;

7517

-+	int update = 0;

7518

-+

7519

-+	if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq))

7520

-+		return 0;

7521

-+

7522

-+	if (compensate)

7523

-+		delta = bfqd->last_idling_start;

7524

-+	else

7525

-+		delta = ktime_get();

7526

-+	delta = ktime_sub(delta, bfqd->last_budget_start);

7527

-+	usecs = ktime_to_us(delta);

7528

-+

7529

-+	/* Don't trust short/unrealistic values. */

7530

-+	if (usecs < 100 || usecs >= LONG_MAX)

7531

-+		return 0;

7532

-+

7533

-+	/*

7534

-+	 * Calculate the bandwidth for the last slice.  We use a 64 bit

7535

-+	 * value to store the peak rate, in sectors per usec in fixed

7536

-+	 * point math.  We do so to have enough precision in the estimate

7537

-+	 * and to avoid overflows.

7538

-+	 */

7539

-+	bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT;

7540

-+	do_div(bw, (unsigned long)usecs);

7541

-+

7542

-+	timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);

7543

-+

7544

-+	/*

7545

-+	 * Use only long (> 20ms) intervals to filter out spikes for

7546

-+	 * the peak rate estimation.

7547

-+	 */

7548

-+	if (usecs > 20000) {

7549

-+		if (bw > bfqd->peak_rate ||

7550

-+		   (!BFQQ_SEEKY(bfqq) &&

7551

-+		    reason == BFQ_BFQQ_BUDGET_TIMEOUT)) {

7552

-+			bfq_log(bfqd, "measured bw =%llu", bw);

7553

-+			/*

7554

-+			 * To smooth oscillations use a low-pass filter with

7555

-+			 * alpha=7/8, i.e.,

7556

-+			 * new_rate = (7/8) * old_rate + (1/8) * bw

7557

-+			 */

7558

-+			do_div(bw, 8);

7559

-+			if (bw == 0)

7560

-+				return 0;

7561

-+			bfqd->peak_rate *= 7;

7562

-+			do_div(bfqd->peak_rate, 8);

7563

-+			bfqd->peak_rate += bw;

7564

-+			update = 1;

7565

-+			bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate);

7566

-+		}

7567

-+

7568

-+		update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1;

7569

-+

7570

-+		if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES)

7571

-+			bfqd->peak_rate_samples++;

7572

-+

7573

-+		if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES &&

7574

-+		    update && bfqd->bfq_user_max_budget == 0) {

7575

-+			bfqd->bfq_max_budget =

7576

-+				bfq_calc_max_budget(bfqd->peak_rate, timeout);

7577

-+			bfq_log(bfqd, "new max_budget=%lu",

7578

-+				bfqd->bfq_max_budget);

7579

-+		}

7580

-+	}

7581

-+

7582

-+	/*

7583

-+	 * If the process has been served for a too short time

7584

-+	 * interval to let its possible sequential accesses prevail on

7585

-+	 * the initial seek time needed to move the disk head on the

7586

-+	 * first sector it requested, then give the process a chance

7587

-+	 * and for the moment return false.

7588

-+	 */

7589

-+	if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8)

7590

-+		return 0;

7591

-+

7592

-+	/*

7593

-+	 * A process is considered ``slow'' (i.e., seeky, so that we

7594

-+	 * cannot treat it fairly in the service domain, as it would

7595

-+	 * slow down too much the other processes) if, when a slice

7596

-+	 * ends for whatever reason, it has received service at a

7597

-+	 * rate that would not be high enough to complete the budget

7598

-+	 * before the budget timeout expiration.

7599

-+	 */

7600

-+	expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT;

7601

-+

7602

-+	/*

7603

-+	 * Caveat: processes doing IO in the slower disk zones will

7604

-+	 * tend to be slow(er) even if not seeky. And the estimated

7605

-+	 * peak rate will actually be an average over the disk

7606

-+	 * surface. Hence, to not be too harsh with unlucky processes,

7607

-+	 * we keep a budget/3 margin of safety before declaring a

7608

-+	 * process slow.

7609

-+	 */

7610

-+	return expected > (4 * bfqq->entity.budget) / 3;

7611

-+}

7612

-+

7613

-+/*

7614

-+ * To be deemed as soft real-time, an application must meet two requirements.

7615

-+ * First, the application must not require an average bandwidth higher than

7616

-+ * the approximate bandwidth required to playback or record a compressed high-

7617

-+ * definition video.

7618

-+ * The next function is invoked on the completion of the last request of a

7619

-+ * batch, to compute the next-start time instant, soft_rt_next_start, such

7620

-+ * that, if the next request of the application does not arrive before

7621

-+ * soft_rt_next_start, then the above requirement on the bandwidth is met.

7622

-+ *

7623

-+ * The second requirement is that the request pattern of the application is

7624

-+ * isochronous, i.e., that, after issuing a request or a batch of requests,

7625

-+ * the application stops issuing new requests until all its pending requests

7626

-+ * have been completed. After that, the application may issue a new batch,

7627

-+ * and so on.

7628

-+ * For this reason the next function is invoked to compute soft_rt_next_start

7629

-+ * only for applications that meet this requirement, whereas soft_rt_next_start

7630

-+ * is set to infinity for applications that do not.

7631

-+ *

7632

-+ * Unfortunately, even a greedy application may happen to behave in an

7633

-+ * isochronous way if the CPU load is high. In fact, the application may stop

7634

-+ * issuing requests while the CPUs are busy serving other processes, then

7635

-+ * restart, then stop again for a while, and so on. In addition, if the disk

7636

-+ * achieves a low enough throughput with the request pattern issued by the

7637

-+ * application (e.g., because the request pattern is random and/or the device

7638

-+ * is slow), then the application may meet the above bandwidth requirement too.

7639

-+ * To prevent such a greedy application to be deemed as soft real-time, a

7640

-+ * further rule is used in the computation of soft_rt_next_start:

7641

-+ * soft_rt_next_start must be higher than the current time plus the maximum

7642

-+ * time for which the arrival of a request is waited for when a sync queue

7643

-+ * becomes idle, namely bfqd->bfq_slice_idle.

7644

-+ * This filters out greedy applications, as the latter issue instead their next

7645

-+ * request as soon as possible after the last one has been completed (in

7646

-+ * contrast, when a batch of requests is completed, a soft real-time application

7647

-+ * spends some time processing data).

7648

-+ *

7649

-+ * Unfortunately, the last filter may easily generate false positives if only

7650

-+ * bfqd->bfq_slice_idle is used as a reference time interval and one or both

7651

-+ * the following cases occur:

7652

-+ * 1) HZ is so low that the duration of a jiffy is comparable to or higher

7653

-+ *    than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with

7654

-+ *    HZ=100.

7655

-+ * 2) jiffies, instead of increasing at a constant rate, may stop increasing

7656

-+ *    for a while, then suddenly 'jump' by several units to recover the lost

7657

-+ *    increments. This seems to happen, e.g., inside virtual machines.

7658

-+ * To address this issue, we do not use as a reference time interval just

7659

-+ * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In

7660

-+ * particular we add the minimum number of jiffies for which the filter seems

7661

-+ * to be quite precise also in embedded systems and KVM/QEMU virtual machines.

7662

-+ */

7663

-+static inline unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,

7664

-+						       struct bfq_queue *bfqq)

7665

-+{

7666

-+	return max(bfqq->last_idle_bklogged +

7667

-+		   HZ * bfqq->service_from_backlogged /

7668

-+		   bfqd->bfq_raising_max_softrt_rate,

7669

-+		   jiffies + bfqq->bfqd->bfq_slice_idle + 4);

7670

-+}

7671

-+

7672

-+/*

7673

-+ * Return the largest-possible time instant such that, for as long as possible,

7674

-+ * the current time will be lower than this time instant according to the macro

7675

-+ * time_is_before_jiffies().

7676

-+ */

7677

-+static inline unsigned long bfq_infinity_from_now(unsigned long now)

7678

-+{

7679

-+	return now + ULONG_MAX / 2;

7680

-+}

7681

-+

7682

-+/**

7683

-+ * bfq_bfqq_expire - expire a queue.

7684

-+ * @bfqd: device owning the queue.

7685

-+ * @bfqq: the queue to expire.

7686

-+ * @compensate: if true, compensate for the time spent idling.

7687

-+ * @reason: the reason causing the expiration.

7688

-+ *

7689

-+ *

7690

-+ * If the process associated to the queue is slow (i.e., seeky), or in

7691

-+ * case of budget timeout, or, finally, if it is async, we

7692

-+ * artificially charge it an entire budget (independently of the

7693

-+ * actual service it received). As a consequence, the queue will get

7694

-+ * higher timestamps than the correct ones upon reactivation, and

7695

-+ * hence it will be rescheduled as if it had received more service

7696

-+ * than what it actually received. In the end, this class of processes

7697

-+ * will receive less service in proportion to how slowly they consume

7698

-+ * their budgets (and hence how seriously they tend to lower the

7699

-+ * throughput).

7700

-+ *

7701

-+ * In contrast, when a queue expires because it has been idling for

7702

-+ * too much or because it exhausted its budget, we do not touch the

7703

-+ * amount of service it has received. Hence when the queue will be

7704

-+ * reactivated and its timestamps updated, the latter will be in sync

7705

-+ * with the actual service received by the queue until expiration.

7706

-+ *

7707

-+ * Charging a full budget to the first type of queues and the exact

7708

-+ * service to the others has the effect of using the WF2Q+ policy to

7709

-+ * schedule the former on a timeslice basis, without violating the

7710

-+ * service domain guarantees of the latter.

7711

-+ */

7712

-+static void bfq_bfqq_expire(struct bfq_data *bfqd,

7713

-+			    struct bfq_queue *bfqq,

7714

-+			    int compensate,

7715

-+			    enum bfqq_expiration reason)

7716

-+{

7717

-+	int slow;

7718

-+	BUG_ON(bfqq != bfqd->in_service_queue);

7719

-+

7720

-+	/* Update disk peak rate for autotuning and check whether the

7721

-+	 * process is slow (see bfq_update_peak_rate).

7722

-+	 */

7723

-+	slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason);

7724

-+

7725

-+	/*

7726

-+	 * As above explained, 'punish' slow (i.e., seeky), timed-out

7727

-+	 * and async queues, to favor sequential sync workloads.

7728

-+	 *

7729

-+	 * Processes doing IO in the slower disk zones will tend to be

7730

-+	 * slow(er) even if not seeky. Hence, since the estimated peak

7731

-+	 * rate is actually an average over the disk surface, these

7732

-+	 * processes may timeout just for bad luck. To avoid punishing

7733

-+	 * them we do not charge a full budget to a process that

7734

-+	 * succeeded in consuming at least 2/3 of its budget.

7735

-+	 */

7736

-+	if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT &&

7737

-+		     bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3))

7738

-+		bfq_bfqq_charge_full_budget(bfqq);

7739

-+

7740

-+	bfqq->service_from_backlogged += bfqq->entity.service;

7741

-+

7742

-+	if (bfqd->low_latency && bfqq->raising_coeff == 1)

7743

-+		bfqq->last_rais_start_finish = jiffies;

7744

-+

7745

-+	if (bfqd->low_latency && bfqd->bfq_raising_max_softrt_rate > 0 &&

7746

-+	    RB_EMPTY_ROOT(&bfqq->sort_list)) {

7747

-+		/*

7748

-+		 * If we get here, and there are no outstanding requests,

7749

-+		 * then the request pattern is isochronous (see the comments

7750

-+		 * to the function bfq_bfqq_softrt_next_start()). Hence we can

7751

-+		 * compute soft_rt_next_start. If, instead, the queue still

7752

-+		 * has outstanding requests, then we have to wait for the

7753

-+		 * completion of all the outstanding requests to discover

7754

-+		 * whether the request pattern is actually isochronous.

7755

-+		 */

7756

-+		if (bfqq->dispatched == 0)

7757

-+			bfqq->soft_rt_next_start =

7758

-+				bfq_bfqq_softrt_next_start(bfqd, bfqq);

7759

-+		else {

7760

-+			/*

7761

-+			 * The application is still waiting for the

7762

-+			 * completion of one or more requests:

7763

-+			 * prevent it from possibly being incorrectly

7764

-+			 * deemed as soft real-time by setting its

7765

-+			 * soft_rt_next_start to infinity. In fact,

7766

-+			 * without this assignment, the application

7767

-+			 * would be incorrectly deemed as soft

7768

-+			 * real-time if:

7769

-+			 * 1) it issued a new request before the

7770

-+			 *    completion of all its in-flight

7771

-+			 *    requests, and

7772

-+			 * 2) at that time, its soft_rt_next_start

7773

-+			 *    happened to be in the past.

7774

-+			 */

7775

-+			bfqq->soft_rt_next_start =

7776

-+				bfq_infinity_from_now(jiffies);

7777

-+			/*

7778

-+			 * Schedule an update of soft_rt_next_start to when

7779

-+			 * the task may be discovered to be isochronous.

7780

-+			 */

7781

-+			bfq_mark_bfqq_softrt_update(bfqq);

7782

-+		}

7783

-+	}

7784

-+

7785

-+	bfq_log_bfqq(bfqd, bfqq,

7786

-+		"expire (%d, slow %d, num_disp %d, idle_win %d)", reason, slow,

7787

-+		bfqq->dispatched, bfq_bfqq_idle_window(bfqq));

7788

-+

7789

-+	/* Increase, decrease or leave budget unchanged according to reason */

7790

-+	__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);

7791

-+	__bfq_bfqq_expire(bfqd, bfqq);

7792

-+}

7793

-+

7794

-+/*

7795

-+ * Budget timeout is not implemented through a dedicated timer, but

7796

-+ * just checked on request arrivals and completions, as well as on

7797

-+ * idle timer expirations.

7798

-+ */

7799

-+static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)

7800

-+{

7801

-+	if (bfq_bfqq_budget_new(bfqq))

7802

-+		return 0;

7803

-+

7804

-+	if (time_before(jiffies, bfqq->budget_timeout))

7805

-+		return 0;

7806

-+

7807

-+	return 1;

7808

-+}

7809

-+

7810

-+/*

7811

-+ * If we expire a queue that is waiting for the arrival of a new

7812

-+ * request, we may prevent the fictitious timestamp back-shifting that

7813

-+ * allows the guarantees of the queue to be preserved (see [1] for

7814

-+ * this tricky aspect). Hence we return true only if this condition

7815

-+ * does not hold, or if the queue is slow enough to deserve only to be

7816

-+ * kicked off for preserving a high throughput.

7817

-+*/

7818

-+static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)

7819

-+{

7820

-+	bfq_log_bfqq(bfqq->bfqd, bfqq,

7821

-+		"may_budget_timeout: wr %d left %d timeout %d",

7822

-+		bfq_bfqq_wait_request(bfqq),

7823

-+			bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3,

7824

-+		bfq_bfqq_budget_timeout(bfqq));

7825

-+

7826

-+	return (!bfq_bfqq_wait_request(bfqq) ||

7827

-+		bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3)

7828

-+		&&

7829

-+		bfq_bfqq_budget_timeout(bfqq);

7830

-+}

7831

-+

7832

-+/*

7833

-+ * For weight-raised queues issuing sync requests, idling is always performed,

7834

-+ * as this is instrumental in guaranteeing a high fraction of the throughput

7835

-+ * to these queues, and hence in guaranteeing a lower latency for their

7836

-+ * requests. See [1] for details.

7837

-+ *

7838

-+ * For non-weight-raised queues, idling is instead disabled if the device is

7839

-+ * NCQ-enabled and non-rotational, as this boosts the throughput on such

7840

-+ * devices.

7841

-+ */

7842

-+static inline bool bfq_bfqq_must_not_expire(struct bfq_queue *bfqq)

7843

-+{

7844

-+	struct bfq_data *bfqd = bfqq->bfqd;

7845

-+

7846

-+	return bfq_bfqq_sync(bfqq) && (

7847

-+		bfqq->raising_coeff > 1 ||

7848

-+		(bfq_bfqq_idle_window(bfqq) &&

7849

-+		 !(bfqd->hw_tag &&

7850

-+		   (blk_queue_nonrot(bfqd->queue) ||

7851

-+		 /*

7852

-+		  * If there are weight-raised busy queues, then do not idle

7853

-+		  * the disk for a sync non-weight-raised queue, and hence

7854

-+		  * expire the queue immediately if empty. Combined with the

7855

-+		  * timestamping rules of BFQ (see [1] for details), this

7856

-+		  * causes sync non-weight-raised queues to get a lower

7857

-+		  * fraction of the disk throughput, and hence reduces the rate

7858

-+		  * at which the processes associated to these queues ask for

7859

-+		  * requests from the request pool.

7860

-+		  *

7861

-+		  * This is beneficial for weight-raised processes, when the

7862

-+		  * system operates in request-pool saturation conditions

7863

-+		  * (e.g., in the presence of write hogs). In fact, if

7864

-+		  * non-weight-raised processes ask for requests at a lower

7865

-+		  * rate, then weight-raised processes have a higher

7866

-+		  * probability to get a request from the pool immediately

7867

-+		  * (or at least soon) when they need one. Hence they have a

7868

-+		  * higher probability to actually get a fraction of the disk

7869

-+		  * throughput proportional to their high weight. This is

7870

-+		  * especially true with NCQ-enabled drives, which enqueue

7871

-+		  * several requests in advance and further reorder

7872

-+		  * internally-queued requests.

7873

-+		  *

7874

-+		  * Mistreating non-weight-raised queues in the above-described

7875

-+		  * way, when there are busy weight-raised queues, seems to

7876

-+		  * mitigate starvation problems in the presence of heavy write

7877

-+		  * workloads and NCQ, and hence to guarantee a higher

7878

-+		  * application and system responsiveness in these hostile

7879

-+		  * scenarios.

7880

-+		  */

7881

-+		    bfqd->raised_busy_queues > 0)

7882

-+		  )

7883

-+		)

7884

-+	);

7885

-+}

7886

-+

7887

-+/*

7888

-+ * If the in-service queue is empty, but it is sync and either of the following

7889

-+ * conditions holds, then: 1) the queue must remain in service and cannot be

7890

-+ * expired, and 2) the disk must be idled to wait for the possible arrival

7891

-+ * of a new request for the queue. The conditions are:

7892

-+ * - the device is rotational and not performing NCQ, and the queue has its

7893

-+ *   idle window set (in this case, waiting for a new request for the queue

7894

-+ *   is likely to boost the disk throughput);

7895

-+ * - the queue is weight-raised (waiting for the request is necessary to

7896

-+ *   provide the queue with fairness and latency guarantees, see [1] for

7897

-+ *   details).

7898

-+ */

7899

-+static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)

7900

-+{

7901

-+	struct bfq_data *bfqd = bfqq->bfqd;

7902

-+

7903

-+	return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 &&

7904

-+	       bfq_bfqq_must_not_expire(bfqq) &&

7905

-+	       !bfq_queue_nonrot_noidle(bfqd, bfqq);

7906

-+}

7907

-+

7908

-+/*

7909

-+ * Select a queue for service.  If we have a current queue in service,

7910

-+ * check whether to continue servicing it, or retrieve and set a new one.

7911

-+ */

7912

-+static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)

7913

-+{

7914

-+	struct bfq_queue *bfqq, *new_bfqq = NULL;

7915

-+	struct request *next_rq;

7916

-+	enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;

7917

-+

7918

-+	bfqq = bfqd->in_service_queue;

7919

-+	if (bfqq == NULL)

7920

-+		goto new_queue;

7921

-+

7922

-+	bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue");

7923

-+

7924

-+	/*

7925

-+         * If another queue has a request waiting within our mean seek

7926

-+         * distance, let it run. The expire code will check for close

7927

-+         * cooperators and put the close queue at the front of the

7928

-+         * service tree. If possible, merge the expiring queue with the

7929

-+         * new bfqq.

7930

-+         */

7931

-+        new_bfqq = bfq_close_cooperator(bfqd, bfqq);

7932

-+        if (new_bfqq != NULL && bfqq->new_bfqq == NULL)

7933

-+                bfq_setup_merge(bfqq, new_bfqq);

7934

-+

7935

-+	if (bfq_may_expire_for_budg_timeout(bfqq) &&

7936

-+	    !timer_pending(&bfqd->idle_slice_timer) &&

7937

-+	    !bfq_bfqq_must_idle(bfqq))

7938

-+		goto expire;

7939

-+

7940

-+	next_rq = bfqq->next_rq;

7941

-+	/*

7942

-+	 * If bfqq has requests queued and it has enough budget left to

7943

-+	 * serve them, keep the queue, otherwise expire it.

7944

-+	 */

7945

-+	if (next_rq != NULL) {

7946

-+		if (bfq_serv_to_charge(next_rq, bfqq) >

7947

-+			bfq_bfqq_budget_left(bfqq)) {

7948

-+			reason = BFQ_BFQQ_BUDGET_EXHAUSTED;

7949

-+			goto expire;

7950

-+		} else {

7951

-+			/*

7952

-+			 * The idle timer may be pending because we may not

7953

-+			 * disable disk idling even when a new request arrives

7954

-+			 */

7955

-+			if (timer_pending(&bfqd->idle_slice_timer)) {

7956

-+				/*

7957

-+				 * If we get here: 1) at least a new request

7958

-+				 * has arrived but we have not disabled the

7959

-+				 * timer because the request was too small,

7960

-+				 * 2) then the block layer has unplugged the

7961

-+				 * device, causing the dispatch to be invoked.

7962

-+				 *

7963

-+				 * Since the device is unplugged, now the

7964

-+				 * requests are probably large enough to

7965

-+				 * provide a reasonable throughput.

7966

-+				 * So we disable idling.

7967

-+				 */

7968

-+				bfq_clear_bfqq_wait_request(bfqq);

7969

-+				del_timer(&bfqd->idle_slice_timer);

7970

-+			}

7971

-+			if (new_bfqq == NULL)

7972

-+				goto keep_queue;

7973

-+			else

7974

-+				goto expire;

7975

-+		}

7976

-+	}

7977

-+

7978

-+	/*

7979

-+	 * No requests pending.  If the in-service queue has no cooperator and

7980

-+	 * still has requests in flight (possibly waiting for a completion)

7981

-+	 * or is idling for a new request, then keep it.

7982

-+	 */

7983

-+	if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) ||

7984

-+	    (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq)))) {

7985

-+		bfqq = NULL;

7986

-+		goto keep_queue;

7987

-+	} else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) {

7988

-+		/*

7989

-+		 * Expiring the queue because there is a close cooperator,

7990

-+		 * cancel timer.

7991

-+		 */

7992

-+		bfq_clear_bfqq_wait_request(bfqq);

7993

-+		del_timer(&bfqd->idle_slice_timer);

7994

-+	}

7995

-+

7996

-+	reason = BFQ_BFQQ_NO_MORE_REQUESTS;

7997

-+expire:

7998

-+	bfq_bfqq_expire(bfqd, bfqq, 0, reason);

7999

-+new_queue:

8000

-+	bfqq = bfq_set_in_service_queue(bfqd, new_bfqq);

8001

-+	bfq_log(bfqd, "select_queue: new queue %d returned",

8002

-+		bfqq != NULL ? bfqq->pid : 0);

8003

-+keep_queue:

8004

-+	return bfqq;

8005

-+}

8006

-+

8007

-+static void bfq_update_raising_data(struct bfq_data *bfqd,

8008

-+				    struct bfq_queue *bfqq)

8009

-+{

8010

-+	if (bfqq->raising_coeff > 1) { /* queue is being boosted */

8011

-+		struct bfq_entity *entity = &bfqq->entity;

8012

-+

8013

-+		bfq_log_bfqq(bfqd, bfqq,

8014

-+			"raising period dur %u/%u msec, "

8015

-+			"old raising coeff %u, w %d(%d)",

8016

-+			jiffies_to_msecs(jiffies -

8017

-+				bfqq->last_rais_start_finish),

8018

-+			jiffies_to_msecs(bfqq->raising_cur_max_time),

8019

-+			bfqq->raising_coeff,

8020

-+			bfqq->entity.weight, bfqq->entity.orig_weight);

8021

-+

8022

-+		BUG_ON(bfqq != bfqd->in_service_queue && entity->weight !=

8023

-+			entity->orig_weight * bfqq->raising_coeff);

8024

-+		if (entity->ioprio_changed)

8025

-+			bfq_log_bfqq(bfqd, bfqq,

8026

-+			"WARN: pending prio change");

8027

-+		/*

8028

-+		 * If too much time has elapsed from the beginning

8029

-+		 * of this weight-raising, stop it.

8030

-+		 */

8031

-+		if (time_is_before_jiffies(bfqq->last_rais_start_finish +

8032

-+					   bfqq->raising_cur_max_time)) {

8033

-+			bfqq->last_rais_start_finish = jiffies;

8034

-+			bfq_log_bfqq(bfqd, bfqq,

8035

-+				     "wrais ending at %lu, "

8036

-+				     "rais_max_time %u",

8037

-+				     bfqq->last_rais_start_finish,

8038

-+				     jiffies_to_msecs(bfqq->

8039

-+					raising_cur_max_time));

8040

-+			bfq_bfqq_end_raising(bfqq);

8041

-+			__bfq_entity_update_weight_prio(

8042

-+				bfq_entity_service_tree(entity),

8043

-+				entity);

8044

-+		}

8045

-+	}

8046

-+}

8047

-+

8048

-+/*

8049

-+ * Dispatch one request from bfqq, moving it to the request queue

8050

-+ * dispatch list.

8051

-+ */

8052

-+static int bfq_dispatch_request(struct bfq_data *bfqd,

8053

-+				struct bfq_queue *bfqq)

8054

-+{

8055

-+	int dispatched = 0;

8056

-+	struct request *rq;

8057

-+	unsigned long service_to_charge;

8058

-+

8059

-+	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));

8060

-+

8061

-+	/* Follow expired path, else get first next available. */

8062

-+	rq = bfq_check_fifo(bfqq);

8063

-+	if (rq == NULL)

8064

-+		rq = bfqq->next_rq;

8065

-+	service_to_charge = bfq_serv_to_charge(rq, bfqq);

8066

-+

8067

-+	if (service_to_charge > bfq_bfqq_budget_left(bfqq)) {

8068

-+		/*

8069

-+		 * This may happen if the next rq is chosen

8070

-+		 * in fifo order instead of sector order.

8071

-+		 * The budget is properly dimensioned

8072

-+		 * to be always sufficient to serve the next request

8073

-+		 * only if it is chosen in sector order. The reason is

8074

-+		 * that it would be quite inefficient and little useful

8075

-+		 * to always make sure that the budget is large enough

8076

-+		 * to serve even the possible next rq in fifo order.

8077

-+		 * In fact, requests are seldom served in fifo order.

8078

-+		 *

8079

-+		 * Expire the queue for budget exhaustion, and

8080

-+		 * make sure that the next act_budget is enough

8081

-+		 * to serve the next request, even if it comes

8082

-+		 * from the fifo expired path.

8083

-+		 */

8084

-+		bfqq->next_rq = rq;

8085

-+		/*

8086

-+		 * Since this dispatch is failed, make sure that

8087

-+		 * a new one will be performed

8088

-+		 */

8089

-+		if (!bfqd->rq_in_driver)

8090

-+			bfq_schedule_dispatch(bfqd);

8091

-+		goto expire;

8092

-+	}

8093

-+

8094

-+	/* Finally, insert request into driver dispatch list. */

8095

-+	bfq_bfqq_served(bfqq, service_to_charge);

8096

-+	bfq_dispatch_insert(bfqd->queue, rq);

8097

-+

8098

-+	bfq_update_raising_data(bfqd, bfqq);

8099

-+

8100

-+	bfq_log_bfqq(bfqd, bfqq,

8101

-+			"dispatched %u sec req (%llu), budg left %lu",

8102

-+			blk_rq_sectors(rq),

8103

-+			(long long unsigned)blk_rq_pos(rq),

8104

-+			bfq_bfqq_budget_left(bfqq));

8105

-+

8106

-+	dispatched++;

8107

-+

8108

-+	if (bfqd->in_service_bic == NULL) {

8109

-+		atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount);

8110

-+		bfqd->in_service_bic = RQ_BIC(rq);

8111

-+	}

8112

-+

8113

-+	if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) &&

8114

-+	    dispatched >= bfqd->bfq_max_budget_async_rq) ||

8115

-+	    bfq_class_idle(bfqq)))

8116

-+		goto expire;

8117

-+

8118

-+	return dispatched;

8119

-+

8120

-+expire:

8121

-+	bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED);

8122

-+	return dispatched;

8123

-+}

8124

-+

8125

-+static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq)

8126

-+{

8127

-+	int dispatched = 0;

8128

-+

8129

-+	while (bfqq->next_rq != NULL) {

8130

-+		bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq);

8131

-+		dispatched++;

8132

-+	}

8133

-+

8134

-+	BUG_ON(!list_empty(&bfqq->fifo));

8135

-+	return dispatched;

8136

-+}

8137

-+

8138

-+/*

8139

-+ * Drain our current requests.  Used for barriers and when switching

8140

-+ * io schedulers on-the-fly.

8141

-+ */

8142

-+static int bfq_forced_dispatch(struct bfq_data *bfqd)

8143

-+{

8144

-+	struct bfq_queue *bfqq, *n;

8145

-+	struct bfq_service_tree *st;

8146

-+	int dispatched = 0;

8147

-+

8148

-+	bfqq = bfqd->in_service_queue;

8149

-+	if (bfqq != NULL)

8150

-+		__bfq_bfqq_expire(bfqd, bfqq);

8151

-+

8152

-+	/*

8153

-+	 * Loop through classes, and be careful to leave the scheduler

8154

-+	 * in a consistent state, as feedback mechanisms and vtime

8155

-+	 * updates cannot be disabled during the process.

8156

-+	 */

8157

-+	list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) {

8158

-+		st = bfq_entity_service_tree(&bfqq->entity);

8159

-+

8160

-+		dispatched += __bfq_forced_dispatch_bfqq(bfqq);

8161

-+		bfqq->max_budget = bfq_max_budget(bfqd);

8162

-+

8163

-+		bfq_forget_idle(st);

8164

-+	}

8165

-+

8166

-+	BUG_ON(bfqd->busy_queues != 0);

8167

-+

8168

-+	return dispatched;

8169

-+}

8170

-+

8171

-+static int bfq_dispatch_requests(struct request_queue *q, int force)

8172

-+{

8173

-+	struct bfq_data *bfqd = q->elevator->elevator_data;

8174

-+	struct bfq_queue *bfqq;

8175

-+	int max_dispatch;

8176

-+

8177

-+	bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues);

8178

-+	if (bfqd->busy_queues == 0)

8179

-+		return 0;

8180

-+

8181

-+	if (unlikely(force))

8182

-+		return bfq_forced_dispatch(bfqd);

8183

-+

8184

-+	bfqq = bfq_select_queue(bfqd);

8185

-+	if (bfqq == NULL)

8186

-+		return 0;

8187

-+

8188

-+	max_dispatch = bfqd->bfq_quantum;

8189

-+	if (bfq_class_idle(bfqq))

8190

-+		max_dispatch = 1;

8191

-+

8192

-+	if (!bfq_bfqq_sync(bfqq))

8193

-+		max_dispatch = bfqd->bfq_max_budget_async_rq;

8194

-+

8195

-+	if (bfqq->dispatched >= max_dispatch) {

8196

-+		if (bfqd->busy_queues > 1)

8197

-+			return 0;

8198

-+		if (bfqq->dispatched >= 4 * max_dispatch)

8199

-+			return 0;

8200

-+	}

8201

-+

8202

-+	if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq))

8203

-+		return 0;

8204

-+

8205

-+	bfq_clear_bfqq_wait_request(bfqq);

8206

-+	BUG_ON(timer_pending(&bfqd->idle_slice_timer));

8207

-+

8208

-+	if (!bfq_dispatch_request(bfqd, bfqq))

8209

-+		return 0;

8210

-+

8211

-+	bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d (max_disp %d)",

8212

-+			bfqq->pid, max_dispatch);

8213

-+

8214

-+	return 1;

8215

-+}

8216

-+

8217

-+/*

8218

-+ * Task holds one reference to the queue, dropped when task exits.  Each rq

8219

-+ * in-flight on this queue also holds a reference, dropped when rq is freed.

8220

-+ *

8221

-+ * Queue lock must be held here.

8222

-+ */

8223

-+static void bfq_put_queue(struct bfq_queue *bfqq)

8224

-+{

8225

-+	struct bfq_data *bfqd = bfqq->bfqd;

8226

-+

8227

-+	BUG_ON(atomic_read(&bfqq->ref) <= 0);

8228

-+

8229

-+	bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq,

8230

-+		     atomic_read(&bfqq->ref));

8231

-+	if (!atomic_dec_and_test(&bfqq->ref))

8232

-+		return;

8233

-+

8234

-+	BUG_ON(rb_first(&bfqq->sort_list) != NULL);

8235

-+	BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0);

8236

-+	BUG_ON(bfqq->entity.tree != NULL);

8237

-+	BUG_ON(bfq_bfqq_busy(bfqq));

8238

-+	BUG_ON(bfqd->in_service_queue == bfqq);

8239

-+

8240

-+	bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq);

8241

-+

8242

-+	kmem_cache_free(bfq_pool, bfqq);

8243

-+}

8244

-+

8245

-+static void bfq_put_cooperator(struct bfq_queue *bfqq)

8246

-+{

8247

-+	struct bfq_queue *__bfqq, *next;

8248

-+

8249

-+	/*

8250

-+	 * If this queue was scheduled to merge with another queue, be

8251

-+	 * sure to drop the reference taken on that queue (and others in

8252

-+	 * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs.

8253

-+	 */

8254

-+	__bfqq = bfqq->new_bfqq;

8255

-+	while (__bfqq) {

8256

-+		if (__bfqq == bfqq) {

8257

-+			WARN(1, "bfqq->new_bfqq loop detected.\n");

8258

-+			break;

8259

-+		}

8260

-+		next = __bfqq->new_bfqq;

8261

-+		bfq_put_queue(__bfqq);

8262

-+		__bfqq = next;

8263

-+	}

8264

-+}

8265

-+

8266

-+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)

8267

-+{

8268

-+	if (bfqq == bfqd->in_service_queue) {

8269

-+		__bfq_bfqq_expire(bfqd, bfqq);

8270

-+		bfq_schedule_dispatch(bfqd);

8271

-+	}

8272

-+

8273

-+	bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq,

8274

-+		     atomic_read(&bfqq->ref));

8275

-+

8276

-+	bfq_put_cooperator(bfqq);

8277

-+

8278

-+	bfq_put_queue(bfqq);

8279

-+}

8280

-+

8281

-+static void bfq_init_icq(struct io_cq *icq)

8282

-+{

8283

-+	struct bfq_io_cq *bic = icq_to_bic(icq);

8284

-+

8285

-+	bic->ttime.last_end_request = jiffies;

8286

-+}

8287

-+

8288

-+static void bfq_exit_icq(struct io_cq *icq)

8289

-+{

8290

-+	struct bfq_io_cq *bic = icq_to_bic(icq);

8291

-+	struct bfq_data *bfqd = bic_to_bfqd(bic);

8292

-+

8293

-+	if (bic->bfqq[BLK_RW_ASYNC]) {

8294

-+		bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_ASYNC]);

8295

-+		bic->bfqq[BLK_RW_ASYNC] = NULL;

8296

-+	}

8297

-+

8298

-+	if (bic->bfqq[BLK_RW_SYNC]) {

8299

-+		bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);

8300

-+		bic->bfqq[BLK_RW_SYNC] = NULL;

8301

-+	}

8302

-+}

8303

-+

8304

-+/*

8305

-+ * Update the entity prio values; note that the new values will not

8306

-+ * be used until the next (re)activation.

8307

-+ */

8308

-+static void bfq_init_prio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)

8309

-+{

8310

-+	struct task_struct *tsk = current;

8311

-+	int ioprio_class;

8312

-+

8313

-+	if (!bfq_bfqq_prio_changed(bfqq))

8314

-+		return;

8315

-+

8316

-+	ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);

8317

-+	switch (ioprio_class) {

8318

-+	default:

8319

-+		dev_err(bfqq->bfqd->queue->backing_dev_info.dev,

8320

-+			"bfq: bad prio %x\n", ioprio_class);

8321

-+	case IOPRIO_CLASS_NONE:

8322

-+		/*

8323

-+		 * No prio set, inherit CPU scheduling settings.

8324

-+		 */

8325

-+		bfqq->entity.new_ioprio = task_nice_ioprio(tsk);

8326

-+		bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk);

8327

-+		break;

8328

-+	case IOPRIO_CLASS_RT:

8329

-+		bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);

8330

-+		bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT;

8331

-+		break;

8332

-+	case IOPRIO_CLASS_BE:

8333

-+		bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);

8334

-+		bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE;

8335

-+		break;

8336

-+	case IOPRIO_CLASS_IDLE:

8337

-+		bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE;

8338

-+		bfqq->entity.new_ioprio = 7;

8339

-+		bfq_clear_bfqq_idle_window(bfqq);

8340

-+		break;

8341

-+	}

8342

-+

8343

-+	bfqq->entity.ioprio_changed = 1;

8344

-+

8345

-+	/*

8346

-+	 * Keep track of original prio settings in case we have to temporarily

8347

-+	 * elevate the priority of this queue.

8348

-+	 */

8349

-+	bfqq->org_ioprio = bfqq->entity.new_ioprio;

8350

-+	bfq_clear_bfqq_prio_changed(bfqq);

8351

-+}

8352

-+

8353

-+static void bfq_changed_ioprio(struct bfq_io_cq *bic)

8354

-+{

8355

-+	struct bfq_data *bfqd;

8356

-+	struct bfq_queue *bfqq, *new_bfqq;

8357

-+	struct bfq_group *bfqg;

8358

-+	unsigned long uninitialized_var(flags);

8359

-+	int ioprio = bic->icq.ioc->ioprio;

8360

-+

8361

-+	bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),

8362

-+				   &flags);

8363

-+	/*

8364

-+	 * This condition may trigger on a newly created bic, be sure to drop

8365

-+	 * the lock before returning.

8366

-+	 */

8367

-+	if (unlikely(bfqd == NULL) || likely(bic->ioprio == ioprio))

8368

-+		goto out;

8369

-+

8370

-+	bfqq = bic->bfqq[BLK_RW_ASYNC];

8371

-+	if (bfqq != NULL) {

8372

-+		bfqg = container_of(bfqq->entity.sched_data, struct bfq_group,

8373

-+				    sched_data);

8374

-+		new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, bic,

8375

-+					 GFP_ATOMIC);

8376

-+		if (new_bfqq != NULL) {

8377

-+			bic->bfqq[BLK_RW_ASYNC] = new_bfqq;

8378

-+			bfq_log_bfqq(bfqd, bfqq,

8379

-+				     "changed_ioprio: bfqq %p %d",

8380

-+				     bfqq, atomic_read(&bfqq->ref));

8381

-+			bfq_put_queue(bfqq);

8382

-+		}

8383

-+	}

8384

-+

8385

-+	bfqq = bic->bfqq[BLK_RW_SYNC];

8386

-+	if (bfqq != NULL)

8387

-+		bfq_mark_bfqq_prio_changed(bfqq);

8388

-+

8389

-+	bic->ioprio = ioprio;

8390

-+

8391

-+out:

8392

-+	bfq_put_bfqd_unlock(bfqd, &flags);

8393

-+}

8394

-+

8395

-+static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,

8396

-+			  pid_t pid, int is_sync)

8397

-+{

8398

-+	RB_CLEAR_NODE(&bfqq->entity.rb_node);

8399

-+	INIT_LIST_HEAD(&bfqq->fifo);

8400

-+

8401

-+	atomic_set(&bfqq->ref, 0);

8402

-+	bfqq->bfqd = bfqd;

8403

-+

8404

-+	bfq_mark_bfqq_prio_changed(bfqq);

8405

-+

8406

-+	if (is_sync) {

8407

-+		if (!bfq_class_idle(bfqq))

8408

-+			bfq_mark_bfqq_idle_window(bfqq);

8409

-+		bfq_mark_bfqq_sync(bfqq);

8410

-+	}

8411

-+

8412

-+	/* Tentative initial value to trade off between thr and lat */

8413

-+	bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3;

8414

-+	bfqq->pid = pid;

8415

-+

8416

-+	bfqq->raising_coeff = 1;

8417

-+	bfqq->last_rais_start_finish = 0;

8418

-+	/*

8419

-+	 * Set to the value for which bfqq will not be deemed as

8420

-+	 * soft rt when it becomes backlogged.

8421

-+	 */

8422

-+	bfqq->soft_rt_next_start = bfq_infinity_from_now(jiffies);

8423

-+}

8424

-+

8425

-+static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd,

8426

-+					      struct bfq_group *bfqg,

8427

-+					      int is_sync,

8428

-+					      struct bfq_io_cq *bic,

8429

-+					      gfp_t gfp_mask)

8430

-+{

8431

-+	struct bfq_queue *bfqq, *new_bfqq = NULL;

8432

-+

8433

-+retry:

8434

-+	/* bic always exists here */

8435

-+	bfqq = bic_to_bfqq(bic, is_sync);

8436

-+

8437

-+	/*

8438

-+	 * Always try a new alloc if we fall back to the OOM bfqq

8439

-+	 * originally, since it should just be a temporary situation.

8440

-+	 */

8441

-+	if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) {

8442

-+		bfqq = NULL;

8443

-+		if (new_bfqq != NULL) {

8444

-+			bfqq = new_bfqq;

8445

-+			new_bfqq = NULL;

8446

-+		} else if (gfp_mask & __GFP_WAIT) {

8447

-+			spin_unlock_irq(bfqd->queue->queue_lock);

8448

-+			new_bfqq = kmem_cache_alloc_node(bfq_pool,

8449

-+					gfp_mask | __GFP_ZERO,

8450

-+					bfqd->queue->node);

8451

-+			spin_lock_irq(bfqd->queue->queue_lock);

8452

-+			if (new_bfqq != NULL)

8453

-+				goto retry;

8454

-+		} else {

8455

-+			bfqq = kmem_cache_alloc_node(bfq_pool,

8456

-+					gfp_mask | __GFP_ZERO,

8457

-+					bfqd->queue->node);

8458

-+		}

8459

-+

8460

-+		if (bfqq != NULL) {

8461

-+			bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync);

8462

-+			bfq_log_bfqq(bfqd, bfqq, "allocated");

8463

-+		} else {

8464

-+			bfqq = &bfqd->oom_bfqq;

8465

-+			bfq_log_bfqq(bfqd, bfqq, "using oom bfqq");

8466

-+		}

8467

-+

8468

-+		bfq_init_prio_data(bfqq, bic);

8469

-+		bfq_init_entity(&bfqq->entity, bfqg);

8470

-+	}

8471

-+

8472

-+	if (new_bfqq != NULL)

8473

-+		kmem_cache_free(bfq_pool, new_bfqq);

8474

-+

8475

-+	return bfqq;

8476

-+}

8477

-+

8478

-+static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,

8479

-+					       struct bfq_group *bfqg,

8480

-+					       int ioprio_class, int ioprio)

8481

-+{

8482

-+	switch (ioprio_class) {

8483

-+	case IOPRIO_CLASS_RT:

8484

-+		return &bfqg->async_bfqq[0][ioprio];

8485

-+	case IOPRIO_CLASS_NONE:

8486

-+		ioprio = IOPRIO_NORM;

8487

-+		/* fall through */

8488

-+	case IOPRIO_CLASS_BE:

8489

-+		return &bfqg->async_bfqq[1][ioprio];

8490

-+	case IOPRIO_CLASS_IDLE:

8491

-+		return &bfqg->async_idle_bfqq;

8492

-+	default:

8493

-+		BUG();

8494

-+	}

8495

-+}

8496

-+

8497

-+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,

8498

-+				       struct bfq_group *bfqg, int is_sync,

8499

-+				       struct bfq_io_cq *bic, gfp_t gfp_mask)

8500

-+{

8501

-+	const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio);

8502

-+	const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);

8503

-+	struct bfq_queue **async_bfqq = NULL;

8504

-+	struct bfq_queue *bfqq = NULL;

8505

-+

8506

-+	if (!is_sync) {

8507

-+		async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,

8508

-+						  ioprio);

8509

-+		bfqq = *async_bfqq;

8510

-+	}

8511

-+

8512

-+	if (bfqq == NULL)

8513

-+		bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, bic, gfp_mask);

8514

-+

8515

-+	/*

8516

-+	 * Pin the queue now that it's allocated, scheduler exit will prune it.

8517

-+	 */

8518

-+	if (!is_sync && *async_bfqq == NULL) {

8519

-+		atomic_inc(&bfqq->ref);

8520

-+		bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d",

8521

-+			     bfqq, atomic_read(&bfqq->ref));

8522

-+		*async_bfqq = bfqq;

8523

-+	}

8524

-+

8525

-+	atomic_inc(&bfqq->ref);

8526

-+	bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq,

8527

-+		     atomic_read(&bfqq->ref));

8528

-+	return bfqq;

8529

-+}

8530

-+

8531

-+static void bfq_update_io_thinktime(struct bfq_data *bfqd,

8532

-+				    struct bfq_io_cq *bic)

8533

-+{

8534

-+	unsigned long elapsed = jiffies - bic->ttime.last_end_request;

8535

-+	unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle);

8536

-+

8537

-+	bic->ttime.ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8;

8538

-+	bic->ttime.ttime_total = (7*bic->ttime.ttime_total + 256*ttime) / 8;

8539

-+	bic->ttime.ttime_mean = (bic->ttime.ttime_total + 128) /

8540

-+				bic->ttime.ttime_samples;

8541

-+}

8542

-+

8543

-+static void bfq_update_io_seektime(struct bfq_data *bfqd,

8544

-+				   struct bfq_queue *bfqq,

8545

-+				   struct request *rq)

8546

-+{

8547

-+	sector_t sdist;

8548

-+	u64 total;

8549

-+

8550

-+	if (bfqq->last_request_pos < blk_rq_pos(rq))

8551

-+		sdist = blk_rq_pos(rq) - bfqq->last_request_pos;

8552

-+	else

8553

-+		sdist = bfqq->last_request_pos - blk_rq_pos(rq);

8554

-+

8555

-+	/*

8556

-+	 * Don't allow the seek distance to get too large from the

8557

-+	 * odd fragment, pagein, etc.

8558

-+	 */

8559

-+	if (bfqq->seek_samples == 0) /* first request, not really a seek */

8560

-+		sdist = 0;

8561

-+	else if (bfqq->seek_samples <= 60) /* second & third seek */

8562

-+		sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024);

8563

-+	else

8564

-+		sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64);

8565

-+

8566

-+	bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8;

8567

-+	bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8;

8568

-+	total = bfqq->seek_total + (bfqq->seek_samples/2);

8569

-+	do_div(total, bfqq->seek_samples);

8570

-+	bfqq->seek_mean = (sector_t)total;

8571

-+

8572

-+	bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist,

8573

-+			(u64)bfqq->seek_mean);

8574

-+}

8575

-+

8576

-+/*

8577

-+ * Disable idle window if the process thinks too long or seeks so much that

8578

-+ * it doesn't matter.

8579

-+ */

8580

-+static void bfq_update_idle_window(struct bfq_data *bfqd,

8581

-+				   struct bfq_queue *bfqq,

8582

-+				   struct bfq_io_cq *bic)

8583

-+{

8584

-+	int enable_idle;

8585

-+

8586

-+	/* Don't idle for async or idle io prio class. */

8587

-+	if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))

8588

-+		return;

8589

-+

8590

-+	enable_idle = bfq_bfqq_idle_window(bfqq);

8591

-+

8592

-+	if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||

8593

-+	    bfqd->bfq_slice_idle == 0 ||

8594

-+		(bfqd->hw_tag && BFQQ_SEEKY(bfqq) &&

8595

-+			bfqq->raising_coeff == 1))

8596

-+		enable_idle = 0;

8597

-+	else if (bfq_sample_valid(bic->ttime.ttime_samples)) {

8598

-+		if (bic->ttime.ttime_mean > bfqd->bfq_slice_idle &&

8599

-+			bfqq->raising_coeff == 1)

8600

-+			enable_idle = 0;

8601

-+		else

8602

-+			enable_idle = 1;

8603

-+	}

8604

-+	bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d",

8605

-+		enable_idle);

8606

-+

8607

-+	if (enable_idle)

8608

-+		bfq_mark_bfqq_idle_window(bfqq);

8609

-+	else

8610

-+		bfq_clear_bfqq_idle_window(bfqq);

8611

-+}

8612

-+

8613

-+/*

8614

-+ * Called when a new fs request (rq) is added to bfqq.  Check if there's

8615

-+ * something we should do about it.

8616

-+ */

8617

-+static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,

8618

-+			    struct request *rq)

8619

-+{

8620

-+	struct bfq_io_cq *bic = RQ_BIC(rq);

8621

-+

8622

-+	if (rq->cmd_flags & REQ_META)

8623

-+		bfqq->meta_pending++;

8624

-+

8625

-+	bfq_update_io_thinktime(bfqd, bic);

8626

-+	bfq_update_io_seektime(bfqd, bfqq, rq);

8627

-+	if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||

8628

-+	    !BFQQ_SEEKY(bfqq))

8629

-+		bfq_update_idle_window(bfqd, bfqq, bic);

8630

-+

8631

-+	bfq_log_bfqq(bfqd, bfqq,

8632

-+		     "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",

8633

-+		     bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq),

8634

-+		     (long long unsigned)bfqq->seek_mean);

8635

-+

8636

-+	bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);

8637

-+

8638

-+	if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) {

8639

-+		int small_req = bfqq->queued[rq_is_sync(rq)] == 1 &&

8640

-+				blk_rq_sectors(rq) < 32;

8641

-+		int budget_timeout = bfq_bfqq_budget_timeout(bfqq);

8642

-+

8643

-+		/*

8644

-+		 * There is just this request queued: if the request

8645

-+		 * is small and the queue is not to be expired, then

8646

-+		 * just exit.

8647

-+		 *

8648

-+		 * In this way, if the disk is being idled to wait for

8649

-+		 * a new request from the in-service queue, we avoid

8650

-+		 * unplugging the device and committing the disk to serve

8651

-+		 * just a small request. On the contrary, we wait for

8652

-+		 * the block layer to decide when to unplug the device:

8653

-+		 * hopefully, new requests will be merged to this one

8654

-+		 * quickly, then the device will be unplugged and

8655

-+		 * larger requests will be dispatched.

8656

-+		 */

8657

-+		if (small_req && !budget_timeout)

8658

-+			return;

8659

-+

8660

-+		/*

8661

-+		 * A large enough request arrived, or the queue is to

8662

-+		 * be expired: in both cases disk idling is to be

8663

-+		 * stopped, so clear wait_request flag and reset

8664

-+		 * timer.

8665

-+		 */

8666

-+		bfq_clear_bfqq_wait_request(bfqq);

8667

-+		del_timer(&bfqd->idle_slice_timer);

8668

-+

8669

-+		/*

8670

-+		 * The queue is not empty, because a new request just

8671

-+		 * arrived. Hence we can safely expire the queue, in

8672

-+		 * case of budget timeout, without risking that the

8673

-+		 * timestamps of the queue are not updated correctly.

8674

-+		 * See [1] for more details.

8675

-+		 */

8676

-+		if (budget_timeout)

8677

-+			bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT);

8678

-+

8679

-+		/*

8680

-+		 * Let the request rip immediately, or let a new queue be

8681

-+		 * selected if bfqq has just been expired.

8682

-+		 */

8683

-+		__blk_run_queue(bfqd->queue);

8684

-+	}

8685

-+}

8686

-+

8687

-+static void bfq_insert_request(struct request_queue *q, struct request *rq)

8688

-+{

8689

-+	struct bfq_data *bfqd = q->elevator->elevator_data;

8690

-+	struct bfq_queue *bfqq = RQ_BFQQ(rq);

8691

-+

8692

-+	assert_spin_locked(bfqd->queue->queue_lock);

8693

-+	bfq_init_prio_data(bfqq, RQ_BIC(rq));

8694

-+

8695

-+	bfq_add_rq_rb(rq);

8696

-+

8697

-+	rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]);

8698

-+	list_add_tail(&rq->queuelist, &bfqq->fifo);

8699

-+

8700

-+	bfq_rq_enqueued(bfqd, bfqq, rq);

8701

-+}

8702

-+

8703

-+static void bfq_update_hw_tag(struct bfq_data *bfqd)

8704

-+{

8705

-+	bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver,

8706

-+				     bfqd->rq_in_driver);

8707

-+

8708

-+	if (bfqd->hw_tag == 1)

8709

-+		return;

8710

-+

8711

-+	/*

8712

-+	 * This sample is valid if the number of outstanding requests

8713

-+	 * is large enough to allow a queueing behavior.  Note that the

8714

-+	 * sum is not exact, as it's not taking into account deactivated

8715

-+	 * requests.

8716

-+	 */

8717

-+	if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD)

8718

-+		return;

8719

-+

8720

-+	if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES)

8721

-+		return;

8722

-+

8723

-+	bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD;

8724

-+	bfqd->max_rq_in_driver = 0;

8725

-+	bfqd->hw_tag_samples = 0;

8726

-+}

8727

-+

8728

-+static void bfq_completed_request(struct request_queue *q, struct request *rq)

8729

-+{

8730

-+	struct bfq_queue *bfqq = RQ_BFQQ(rq);

8731

-+	struct bfq_data *bfqd = bfqq->bfqd;

8732

-+	const int sync = rq_is_sync(rq);

8733

-+

8734

-+	bfq_log_bfqq(bfqd, bfqq, "completed %u sects req (%d)",

8735

-+			blk_rq_sectors(rq), sync);

8736

-+

8737

-+	bfq_update_hw_tag(bfqd);

8738

-+

8739

-+	WARN_ON(!bfqd->rq_in_driver);

8740

-+	WARN_ON(!bfqq->dispatched);

8741

-+	bfqd->rq_in_driver--;

8742

-+	bfqq->dispatched--;

8743

-+

8744

-+	if (bfq_bfqq_sync(bfqq))

8745

-+		bfqd->sync_flight--;

8746

-+

8747

-+	if (sync)

8748

-+		RQ_BIC(rq)->ttime.last_end_request = jiffies;

8749

-+

8750

-+	/*

8751

-+	 * If we are waiting to discover whether the request pattern of the

8752

-+	 * task associated with the queue is actually isochronous, and

8753

-+	 * both requisites for this condition to hold are satisfied, then

8754

-+	 * compute soft_rt_next_start (see the comments to the function

8755

-+	 * bfq_bfqq_softrt_next_start()).

8756

-+	 */

8757

-+	if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 &&

8758

-+	    RB_EMPTY_ROOT(&bfqq->sort_list))

8759

-+		bfqq->soft_rt_next_start =

8760

-+			bfq_bfqq_softrt_next_start(bfqd, bfqq);

8761

-+

8762

-+	/*

8763

-+	 * If this is the in-service queue, check if it needs to be expired,

8764

-+	 * or if we want to idle in case it has no pending requests.

8765

-+	 */

8766

-+	if (bfqd->in_service_queue == bfqq) {

8767

-+		if (bfq_bfqq_budget_new(bfqq))

8768

-+			bfq_set_budget_timeout(bfqd);

8769

-+

8770

-+		if (bfq_bfqq_must_idle(bfqq)) {

8771

-+			bfq_arm_slice_timer(bfqd);

8772

-+			goto out;

8773

-+		} else if (bfq_may_expire_for_budg_timeout(bfqq))

8774

-+			bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT);

8775

-+		else if (RB_EMPTY_ROOT(&bfqq->sort_list) &&

8776

-+			 (bfqq->dispatched == 0 ||

8777

-+			  !bfq_bfqq_must_not_expire(bfqq)))

8778

-+			bfq_bfqq_expire(bfqd, bfqq, 0,

8779

-+					BFQ_BFQQ_NO_MORE_REQUESTS);

8780

-+	}

8781

-+

8782

-+	if (!bfqd->rq_in_driver)

8783

-+		bfq_schedule_dispatch(bfqd);

8784

-+

8785

-+out:

8786

-+	return;

8787

-+}

8788

-+

8789

-+static inline int __bfq_may_queue(struct bfq_queue *bfqq)

8790

-+{

8791

-+	if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) {

8792

-+		bfq_clear_bfqq_must_alloc(bfqq);

8793

-+		return ELV_MQUEUE_MUST;

8794

-+	}

8795

-+

8796

-+	return ELV_MQUEUE_MAY;

8797

-+}

8798

-+

8799

-+static int bfq_may_queue(struct request_queue *q, int rw)

8800

-+{

8801

-+	struct bfq_data *bfqd = q->elevator->elevator_data;

8802

-+	struct task_struct *tsk = current;

8803

-+	struct bfq_io_cq *bic;

8804

-+	struct bfq_queue *bfqq;

8805

-+

8806

-+	/*

8807

-+	 * Don't force setup of a queue from here, as a call to may_queue

8808

-+	 * does not necessarily imply that a request actually will be queued.

8809

-+	 * So just lookup a possibly existing queue, or return 'may queue'

8810

-+	 * if that fails.

8811

-+	 */

8812

-+	bic = bfq_bic_lookup(bfqd, tsk->io_context);

8813

-+	if (bic == NULL)

8814

-+		return ELV_MQUEUE_MAY;

8815

-+

8816

-+	bfqq = bic_to_bfqq(bic, rw_is_sync(rw));

8817

-+	if (bfqq != NULL) {

8818

-+		bfq_init_prio_data(bfqq, bic);

8819

-+

8820

-+		return __bfq_may_queue(bfqq);

8821

-+	}

8822

-+

8823

-+	return ELV_MQUEUE_MAY;

8824

-+}

8825

-+

8826

-+/*

8827

-+ * Queue lock held here.

8828

-+ */

8829

-+static void bfq_put_request(struct request *rq)

8830

-+{

8831

-+	struct bfq_queue *bfqq = RQ_BFQQ(rq);

8832

-+

8833

-+	if (bfqq != NULL) {

8834

-+		const int rw = rq_data_dir(rq);

8835

-+

8836

-+		BUG_ON(!bfqq->allocated[rw]);

8837

-+		bfqq->allocated[rw]--;

8838

-+

8839

-+		rq->elv.priv[0] = NULL;

8840

-+		rq->elv.priv[1] = NULL;

8841

-+

8842

-+		bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d",

8843

-+			     bfqq, atomic_read(&bfqq->ref));

8844

-+		bfq_put_queue(bfqq);

8845

-+	}

8846

-+}

8847

-+

8848

-+static struct bfq_queue *

8849

-+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,

8850

-+		struct bfq_queue *bfqq)

8851

-+{

8852

-+	bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",

8853

-+		(long unsigned)bfqq->new_bfqq->pid);

8854

-+	bic_set_bfqq(bic, bfqq->new_bfqq, 1);

8855

-+	bfq_mark_bfqq_coop(bfqq->new_bfqq);

8856

-+	bfq_put_queue(bfqq);

8857

-+	return bic_to_bfqq(bic, 1);

8858

-+}

8859

-+

8860

-+/*

8861

-+ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this

8862

-+ * was the last process referring to said bfqq.

8863

-+ */

8864

-+static struct bfq_queue *

8865

-+bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)

8866

-+{

8867

-+	bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");

8868

-+	if (bfqq_process_refs(bfqq) == 1) {

8869

-+		bfqq->pid = current->pid;

8870

-+		bfq_clear_bfqq_coop(bfqq);

8871

-+		bfq_clear_bfqq_split_coop(bfqq);

8872

-+		return bfqq;

8873

-+	}

8874

-+

8875

-+	bic_set_bfqq(bic, NULL, 1);

8876

-+

8877

-+	bfq_put_cooperator(bfqq);

8878

-+

8879

-+	bfq_put_queue(bfqq);

8880

-+	return NULL;

8881

-+}

8882

-+

8883

-+/*

8884

-+ * Allocate bfq data structures associated with this request.

8885

-+ */

8886

-+static int bfq_set_request(struct request_queue *q, struct request *rq,

8887

-+			   struct bio *bio, gfp_t gfp_mask)

8888

-+{

8889

-+	struct bfq_data *bfqd = q->elevator->elevator_data;

8890

-+	struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);

8891

-+	const int rw = rq_data_dir(rq);

8892

-+	const int is_sync = rq_is_sync(rq);

8893

-+	struct bfq_queue *bfqq;

8894

-+	struct bfq_group *bfqg;

8895

-+	unsigned long flags;

8896

-+

8897

-+	might_sleep_if(gfp_mask & __GFP_WAIT);

8898

-+

8899

-+	bfq_changed_ioprio(bic);

8900

-+

8901

-+	spin_lock_irqsave(q->queue_lock, flags);

8902

-+

8903

-+	if (bic == NULL)

8904

-+		goto queue_fail;

8905

-+

8906

-+	bfqg = bfq_bic_update_cgroup(bic);

8907

-+

8908

-+new_queue:

8909

-+	bfqq = bic_to_bfqq(bic, is_sync);

8910

-+	if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) {

8911

-+		bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask);

8912

-+		bic_set_bfqq(bic, bfqq, is_sync);

8913

-+	} else {

8914

-+		/*

8915

-+		 * If the queue was seeky for too long, break it apart.

8916

-+		 */

8917

-+		if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {

8918

-+			bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");

8919

-+			bfqq = bfq_split_bfqq(bic, bfqq);

8920

-+			if (!bfqq)

8921

-+				goto new_queue;

8922

-+		}

8923

-+

8924

-+		/*

8925

-+		 * Check to see if this queue is scheduled to merge with

8926

-+		 * another closely cooperating queue. The merging of queues

8927

-+		 * happens here as it must be done in process context.

8928

-+		 * The reference on new_bfqq was taken in merge_bfqqs.

8929

-+		 */

8930

-+		if (bfqq->new_bfqq != NULL)

8931

-+			bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq);

8932

-+	}

8933

-+

8934

-+	bfqq->allocated[rw]++;

8935

-+	atomic_inc(&bfqq->ref);

8936

-+	bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq,

8937

-+		     atomic_read(&bfqq->ref));

8938

-+

8939

-+	rq->elv.priv[0] = bic;

8940

-+	rq->elv.priv[1] = bfqq;

8941

-+

8942

-+	spin_unlock_irqrestore(q->queue_lock, flags);

8943

-+

8944

-+	return 0;

8945

-+

8946

-+queue_fail:

8947

-+	bfq_schedule_dispatch(bfqd);

8948

-+	spin_unlock_irqrestore(q->queue_lock, flags);

8949

-+

8950

-+	return 1;

8951

-+}

8952

-+

8953

-+static void bfq_kick_queue(struct work_struct *work)

8954

-+{

8955

-+	struct bfq_data *bfqd =

8956

-+		container_of(work, struct bfq_data, unplug_work);

8957

-+	struct request_queue *q = bfqd->queue;

8958

-+

8959

-+	spin_lock_irq(q->queue_lock);

8960

-+	__blk_run_queue(q);

8961

-+	spin_unlock_irq(q->queue_lock);

8962

-+}

8963

-+

8964

-+/*

8965

-+ * Handler of the expiration of the timer running if the in-service queue

8966

-+ * is idling inside its time slice.

8967

-+ */

8968

-+static void bfq_idle_slice_timer(unsigned long data)

8969

-+{

8970

-+	struct bfq_data *bfqd = (struct bfq_data *)data;

8971

-+	struct bfq_queue *bfqq;

8972

-+	unsigned long flags;

8973

-+	enum bfqq_expiration reason;

8974

-+

8975

-+	spin_lock_irqsave(bfqd->queue->queue_lock, flags);

8976

-+

8977

-+	bfqq = bfqd->in_service_queue;

8978

-+	/*

8979

-+	 * Theoretical race here: the in-service queue can be NULL or different

8980

-+	 * from the queue that was idling if the timer handler spins on

8981

-+	 * the queue_lock and a new request arrives for the current

8982

-+	 * queue and there is a full dispatch cycle that changes the

8983

-+	 * in-service queue.  This can hardly happen, but in the worst case

8984

-+	 * we just expire a queue too early.

8985

-+	 */

8986

-+	if (bfqq != NULL) {

8987

-+		bfq_log_bfqq(bfqd, bfqq, "slice_timer expired");

8988

-+		if (bfq_bfqq_budget_timeout(bfqq))

8989

-+			/*

8990

-+			 * Also here the queue can be safely expired

8991

-+			 * for budget timeout without wasting

8992

-+			 * guarantees

8993

-+			 */

8994

-+			reason = BFQ_BFQQ_BUDGET_TIMEOUT;

8995

-+		else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0)

8996

-+			/*

8997

-+			 * The queue may not be empty upon timer expiration,

8998

-+			 * because we may not disable the timer when the first

8999

-+			 * request of the in-service queue arrives during

9000

-+			 * disk idling

9001

-+			 */

9002

-+			reason = BFQ_BFQQ_TOO_IDLE;

9003

-+		else

9004

-+			goto schedule_dispatch;

9005

-+

9006

-+		bfq_bfqq_expire(bfqd, bfqq, 1, reason);

9007

-+	}

9008

-+

9009

-+schedule_dispatch:

9010

-+	bfq_schedule_dispatch(bfqd);

9011

-+

9012

-+	spin_unlock_irqrestore(bfqd->queue->queue_lock, flags);

9013

-+}

9014

-+

9015

-+static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)

9016

-+{

9017

-+	del_timer_sync(&bfqd->idle_slice_timer);

9018

-+	cancel_work_sync(&bfqd->unplug_work);

9019

-+}

9020

-+

9021

-+static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd,

9022

-+					struct bfq_queue **bfqq_ptr)

9023

-+{

9024

-+	struct bfq_group *root_group = bfqd->root_group;

9025

-+	struct bfq_queue *bfqq = *bfqq_ptr;

9026

-+

9027

-+	bfq_log(bfqd, "put_async_bfqq: %p", bfqq);

9028

-+	if (bfqq != NULL) {

9029

-+		bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group);

9030

-+		bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d",

9031

-+			     bfqq, atomic_read(&bfqq->ref));

9032

-+		bfq_put_queue(bfqq);

9033

-+		*bfqq_ptr = NULL;

9034

-+	}

9035

-+}

9036

-+

9037

-+/*

9038

-+ * Release all the bfqg references to its async queues.  If we are

9039

-+ * deallocating the group these queues may still contain requests, so

9040

-+ * we reparent them to the root cgroup (i.e., the only one that will

9041

-+ * exist for sure until all the requests on a device are gone).

9042

-+ */

9043

-+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)

9044

-+{

9045

-+	int i, j;

9046

-+

9047

-+	for (i = 0; i < 2; i++)

9048

-+		for (j = 0; j < IOPRIO_BE_NR; j++)

9049

-+			__bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]);

9050

-+

9051

-+	__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);

9052

-+}

9053

-+

9054

-+static void bfq_exit_queue(struct elevator_queue *e)

9055

-+{

9056

-+	struct bfq_data *bfqd = e->elevator_data;

9057

-+	struct request_queue *q = bfqd->queue;

9058

-+	struct bfq_queue *bfqq, *n;

9059

-+

9060

-+	bfq_shutdown_timer_wq(bfqd);

9061

-+

9062

-+	spin_lock_irq(q->queue_lock);

9063

-+

9064

-+	BUG_ON(bfqd->in_service_queue != NULL);

9065

-+	list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)

9066

-+		bfq_deactivate_bfqq(bfqd, bfqq, 0);

9067

-+

9068

-+	bfq_disconnect_groups(bfqd);

9069

-+	spin_unlock_irq(q->queue_lock);

9070

-+

9071

-+	bfq_shutdown_timer_wq(bfqd);

9072

-+

9073

-+	synchronize_rcu();

9074

-+

9075

-+	BUG_ON(timer_pending(&bfqd->idle_slice_timer));

9076

-+

9077

-+	bfq_free_root_group(bfqd);

9078

-+	kfree(bfqd);

9079

-+}

9080

-+

9081

-+static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)

9082

-+{

9083

-+	struct bfq_group *bfqg;

9084

-+	struct bfq_data *bfqd;

9085

-+	struct elevator_queue *eq;

9086

-+

9087

-+	eq = elevator_alloc(q, e);

9088

-+	if (eq == NULL)

9089

-+		return -ENOMEM;

9090

-+

9091

-+	bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node);

9092

-+	if (bfqd == NULL) {

9093

-+		kobject_put(&eq->kobj);

9094

-+		return -ENOMEM;

9095

-+	}

9096

-+	eq->elevator_data = bfqd;

9097

-+

9098

-+	/*

9099

-+	 * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.

9100

-+	 * Grab a permanent reference to it, so that the normal code flow

9101

-+	 * will not attempt to free it.

9102

-+	 */

9103

-+	bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0);

9104

-+	atomic_inc(&bfqd->oom_bfqq.ref);

9105

-+

9106

-+	bfqd->queue = q;

9107

-+

9108

-+	spin_lock_irq(q->queue_lock);

9109

-+	q->elevator = eq;

9110

-+	spin_unlock_irq(q->queue_lock);

9111

-+

9112

-+	bfqg = bfq_alloc_root_group(bfqd, q->node);

9113

-+	if (bfqg == NULL) {

9114

-+		kfree(bfqd);

9115

-+		kobject_put(&eq->kobj);

9116

-+		return -ENOMEM;

9117

-+	}

9118

-+

9119

-+	bfqd->root_group = bfqg;

9120

-+

9121

-+	init_timer(&bfqd->idle_slice_timer);

9122

-+	bfqd->idle_slice_timer.function = bfq_idle_slice_timer;

9123

-+	bfqd->idle_slice_timer.data = (unsigned long)bfqd;

9124

-+

9125

-+	bfqd->rq_pos_tree = RB_ROOT;

9126

-+

9127

-+	INIT_WORK(&bfqd->unplug_work, bfq_kick_queue);

9128

-+

9129

-+	INIT_LIST_HEAD(&bfqd->active_list);

9130

-+	INIT_LIST_HEAD(&bfqd->idle_list);

9131

-+

9132

-+	bfqd->hw_tag = -1;

9133

-+

9134

-+	bfqd->bfq_max_budget = bfq_default_max_budget;

9135

-+

9136

-+	bfqd->bfq_quantum = bfq_quantum;

9137

-+	bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0];

9138

-+	bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1];

9139

-+	bfqd->bfq_back_max = bfq_back_max;

9140

-+	bfqd->bfq_back_penalty = bfq_back_penalty;

9141

-+	bfqd->bfq_slice_idle = bfq_slice_idle;

9142

-+	bfqd->bfq_class_idle_last_service = 0;

9143

-+	bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq;

9144

-+	bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async;

9145

-+	bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync;

9146

-+

9147

-+	bfqd->low_latency = true;

9148

-+

9149

-+	bfqd->bfq_raising_coeff = 20;

9150

-+	bfqd->bfq_raising_rt_max_time = msecs_to_jiffies(300);

9151

-+	bfqd->bfq_raising_max_time = 0;

9152

-+	bfqd->bfq_raising_min_idle_time = msecs_to_jiffies(2000);

9153

-+	bfqd->bfq_raising_min_inter_arr_async = msecs_to_jiffies(500);

9154

-+	bfqd->bfq_raising_max_softrt_rate = 7000; /*

9155

-+						   * Approximate rate required

9156

-+						   * to playback or record a

9157

-+						   * high-definition compressed

9158

-+						   * video.

9159

-+						   */

9160

-+	bfqd->raised_busy_queues = 0;

9161

-+

9162

-+	/* Initially estimate the device's peak rate as the reference rate */

9163

-+	if (blk_queue_nonrot(bfqd->queue)) {

9164

-+		bfqd->RT_prod = R_nonrot * T_nonrot;

9165

-+		bfqd->peak_rate = R_nonrot;

9166

-+	} else {

9167

-+		bfqd->RT_prod = R_rot * T_rot;

9168

-+		bfqd->peak_rate = R_rot;

9169

-+	}

9170

-+

9171

-+	return 0;

9172

-+}

9173

-+

9174

-+static void bfq_slab_kill(void)

9175

-+{

9176

-+	if (bfq_pool != NULL)

9177

-+		kmem_cache_destroy(bfq_pool);

9178

-+}

9179

-+

9180

-+static int __init bfq_slab_setup(void)

9181

-+{

9182

-+	bfq_pool = KMEM_CACHE(bfq_queue, 0);

9183

-+	if (bfq_pool == NULL)

9184

-+		return -ENOMEM;

9185

-+	return 0;

9186

-+}

9187

-+

9188

-+static ssize_t bfq_var_show(unsigned int var, char *page)

9189

-+{

9190

-+	return sprintf(page, "%d\n", var);

9191

-+}

9192

-+

9193

-+static ssize_t bfq_var_store(unsigned long *var, const char *page, size_t count)

9194

-+{

9195

-+	unsigned long new_val;

9196

-+	int ret = kstrtoul(page, 10, &new_val);

9197

-+

9198

-+	if (ret == 0)

9199

-+		*var = new_val;

9200

-+

9201

-+	return count;

9202

-+}

9203

-+

9204

-+static ssize_t bfq_raising_max_time_show(struct elevator_queue *e, char *page)

9205

-+{

9206

-+	struct bfq_data *bfqd = e->elevator_data;

9207

-+	return sprintf(page, "%d\n", bfqd->bfq_raising_max_time > 0 ?

9208

-+		       jiffies_to_msecs(bfqd->bfq_raising_max_time) :

9209

-+		       jiffies_to_msecs(bfq_wrais_duration(bfqd)));

9210

-+}

9211

-+

9212

-+static ssize_t bfq_weights_show(struct elevator_queue *e, char *page)

9213

-+{

9214

-+	struct bfq_queue *bfqq;

9215

-+	struct bfq_data *bfqd = e->elevator_data;

9216

-+	ssize_t num_char = 0;

9217

-+

9218

-+	num_char += sprintf(page + num_char, "Tot reqs queued %d\n\n",

9219

-+			    bfqd->queued);

9220

-+

9221

-+	spin_lock_irq(bfqd->queue->queue_lock);

9222

-+

9223

-+	num_char += sprintf(page + num_char, "Active:\n");

9224

-+	list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) {

9225

-+	  num_char += sprintf(page + num_char,

9226

-+			      "pid%d: weight %hu, nr_queued %d %d,"

9227

-+			      " dur %d/%u\n",

9228

-+			      bfqq->pid,

9229

-+			      bfqq->entity.weight,

9230

-+			      bfqq->queued[0],

9231

-+			      bfqq->queued[1],

9232

-+			jiffies_to_msecs(jiffies -

9233

-+				bfqq->last_rais_start_finish),

9234

-+			jiffies_to_msecs(bfqq->raising_cur_max_time));

9235

-+	}

9236

-+

9237

-+	num_char += sprintf(page + num_char, "Idle:\n");

9238

-+	list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) {

9239

-+			num_char += sprintf(page + num_char,

9240

-+				"pid%d: weight %hu, dur %d/%u\n",

9241

-+				bfqq->pid,

9242

-+				bfqq->entity.weight,

9243

-+				jiffies_to_msecs(jiffies -

9244

-+					bfqq->last_rais_start_finish),

9245

-+				jiffies_to_msecs(bfqq->raising_cur_max_time));

9246

-+	}

9247

-+

9248

-+	spin_unlock_irq(bfqd->queue->queue_lock);

9249

-+

9250

-+	return num_char;

9251

-+}

9252

-+

9253

-+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\

9254

-+static ssize_t __FUNC(struct elevator_queue *e, char *page)		\

9255

-+{									\

9256

-+	struct bfq_data *bfqd = e->elevator_data;			\

9257

-+	unsigned int __data = __VAR;					\

9258

-+	if (__CONV)							\

9259

-+		__data = jiffies_to_msecs(__data);			\

9260

-+	return bfq_var_show(__data, (page));				\

9261

-+}

9262

-+SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0);

9263

-+SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1);

9264

-+SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1);

9265

-+SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);

9266

-+SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);

9267

-+SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1);

9268

-+SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);

9269

-+SHOW_FUNCTION(bfq_max_budget_async_rq_show, bfqd->bfq_max_budget_async_rq, 0);

9270

-+SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1);

9271

-+SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1);

9272

-+SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);

9273

-+SHOW_FUNCTION(bfq_raising_coeff_show, bfqd->bfq_raising_coeff, 0);

9274

-+SHOW_FUNCTION(bfq_raising_rt_max_time_show, bfqd->bfq_raising_rt_max_time, 1);

9275

-+SHOW_FUNCTION(bfq_raising_min_idle_time_show, bfqd->bfq_raising_min_idle_time,

9276

-+	1);

9277

-+SHOW_FUNCTION(bfq_raising_min_inter_arr_async_show,

9278

-+	bfqd->bfq_raising_min_inter_arr_async,

9279

-+	1);

9280

-+SHOW_FUNCTION(bfq_raising_max_softrt_rate_show,

9281

-+	bfqd->bfq_raising_max_softrt_rate, 0);

9282

-+#undef SHOW_FUNCTION

9283

-+

9284

-+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\

9285

-+static ssize_t								\

9286

-+__FUNC(struct elevator_queue *e, const char *page, size_t count)	\

9287

-+{									\

9288

-+	struct bfq_data *bfqd = e->elevator_data;			\

9289

-+	unsigned long uninitialized_var(__data);			\

9290

-+	int ret = bfq_var_store(&__data, (page), count);		\

9291

-+	if (__data < (MIN))						\

9292

-+		__data = (MIN);						\

9293

-+	else if (__data > (MAX))					\

9294

-+		__data = (MAX);						\

9295

-+	if (__CONV)							\

9296

-+		*(__PTR) = msecs_to_jiffies(__data);			\

9297

-+	else								\

9298

-+		*(__PTR) = __data;					\

9299

-+	return ret;							\

9300

-+}

9301

-+STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0);

9302

-+STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1,

9303

-+		INT_MAX, 1);

9304

-+STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1,

9305

-+		INT_MAX, 1);

9306

-+STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);

9307

-+STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,

9308

-+		INT_MAX, 0);

9309

-+STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1);

9310

-+STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq,

9311

-+		1, INT_MAX, 0);

9312

-+STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0,

9313

-+		INT_MAX, 1);

9314

-+STORE_FUNCTION(bfq_raising_coeff_store, &bfqd->bfq_raising_coeff, 1,

9315

-+		INT_MAX, 0);

9316

-+STORE_FUNCTION(bfq_raising_max_time_store, &bfqd->bfq_raising_max_time, 0,

9317

-+		INT_MAX, 1);

9318

-+STORE_FUNCTION(bfq_raising_rt_max_time_store, &bfqd->bfq_raising_rt_max_time, 0,

9319

-+		INT_MAX, 1);

9320

-+STORE_FUNCTION(bfq_raising_min_idle_time_store,

9321

-+	       &bfqd->bfq_raising_min_idle_time, 0, INT_MAX, 1);

9322

-+STORE_FUNCTION(bfq_raising_min_inter_arr_async_store,

9323

-+		&bfqd->bfq_raising_min_inter_arr_async, 0, INT_MAX, 1);

9324

-+STORE_FUNCTION(bfq_raising_max_softrt_rate_store,

9325

-+	       &bfqd->bfq_raising_max_softrt_rate, 0, INT_MAX, 0);

9326

-+#undef STORE_FUNCTION

9327

-+

9328

-+/* do nothing for the moment */

9329

-+static ssize_t bfq_weights_store(struct elevator_queue *e,

9330

-+				    const char *page, size_t count)

9331

-+{

9332

-+	return count;

9333

-+}

9334

-+

9335

-+static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd)

9336

-+{

9337

-+	u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);

9338

-+

9339

-+	if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES)

9340

-+		return bfq_calc_max_budget(bfqd->peak_rate, timeout);

9341

-+	else

9342

-+		return bfq_default_max_budget;

9343

-+}

9344

-+

9345

-+static ssize_t bfq_max_budget_store(struct elevator_queue *e,

9346

-+				    const char *page, size_t count)

9347

-+{

9348

-+	struct bfq_data *bfqd = e->elevator_data;

9349

-+	unsigned long uninitialized_var(__data);

9350

-+	int ret = bfq_var_store(&__data, (page), count);

9351

-+

9352

-+	if (__data == 0)

9353

-+		bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);

9354

-+	else {

9355

-+		if (__data > INT_MAX)

9356

-+			__data = INT_MAX;

9357

-+		bfqd->bfq_max_budget = __data;

9358

-+	}

9359

-+

9360

-+	bfqd->bfq_user_max_budget = __data;

9361

-+

9362

-+	return ret;

9363

-+}

9364

-+

9365

-+static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,

9366

-+				      const char *page, size_t count)

9367

-+{

9368

-+	struct bfq_data *bfqd = e->elevator_data;

9369

-+	unsigned long uninitialized_var(__data);

9370

-+	int ret = bfq_var_store(&__data, (page), count);

9371

-+

9372

-+	if (__data < 1)

9373

-+		__data = 1;

9374

-+	else if (__data > INT_MAX)

9375

-+		__data = INT_MAX;

9376

-+

9377

-+	bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data);

9378

-+	if (bfqd->bfq_user_max_budget == 0)

9379

-+		bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);

9380

-+

9381

-+	return ret;

9382

-+}

9383

-+

9384

-+static ssize_t bfq_low_latency_store(struct elevator_queue *e,

9385

-+				     const char *page, size_t count)

9386

-+{

9387

-+	struct bfq_data *bfqd = e->elevator_data;

9388

-+	unsigned long uninitialized_var(__data);

9389

-+	int ret = bfq_var_store(&__data, (page), count);

9390

-+

9391

-+	if (__data > 1)

9392

-+		__data = 1;

9393

-+	if (__data == 0 && bfqd->low_latency != 0)

9394

-+		bfq_end_raising(bfqd);

9395

-+	bfqd->low_latency = __data;

9396

-+

9397

-+	return ret;

9398

-+}

9399

-+

9400

-+#define BFQ_ATTR(name) \

9401

-+	__ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store)

9402

-+

9403

-+static struct elv_fs_entry bfq_attrs[] = {

9404

-+	BFQ_ATTR(quantum),

9405

-+	BFQ_ATTR(fifo_expire_sync),

9406

-+	BFQ_ATTR(fifo_expire_async),

9407

-+	BFQ_ATTR(back_seek_max),

9408

-+	BFQ_ATTR(back_seek_penalty),

9409

-+	BFQ_ATTR(slice_idle),

9410

-+	BFQ_ATTR(max_budget),

9411

-+	BFQ_ATTR(max_budget_async_rq),

9412

-+	BFQ_ATTR(timeout_sync),

9413

-+	BFQ_ATTR(timeout_async),

9414

-+	BFQ_ATTR(low_latency),

9415

-+	BFQ_ATTR(raising_coeff),

9416

-+	BFQ_ATTR(raising_max_time),

9417

-+	BFQ_ATTR(raising_rt_max_time),

9418

-+	BFQ_ATTR(raising_min_idle_time),

9419

-+	BFQ_ATTR(raising_min_inter_arr_async),

9420

-+	BFQ_ATTR(raising_max_softrt_rate),

9421

-+	BFQ_ATTR(weights),

9422

-+	__ATTR_NULL

9423

-+};

9424

-+

9425

-+static struct elevator_type iosched_bfq = {

9426

-+	.ops = {

9427

-+		.elevator_merge_fn =		bfq_merge,

9428

-+		.elevator_merged_fn =		bfq_merged_request,

9429

-+		.elevator_merge_req_fn =	bfq_merged_requests,

9430

-+		.elevator_allow_merge_fn =	bfq_allow_merge,

9431

-+		.elevator_dispatch_fn =		bfq_dispatch_requests,

9432

-+		.elevator_add_req_fn =		bfq_insert_request,

9433

-+		.elevator_activate_req_fn =	bfq_activate_request,

9434

-+		.elevator_deactivate_req_fn =	bfq_deactivate_request,

9435

-+		.elevator_completed_req_fn =	bfq_completed_request,

9436

-+		.elevator_former_req_fn =	elv_rb_former_request,

9437

-+		.elevator_latter_req_fn =	elv_rb_latter_request,

9438

-+		.elevator_init_icq_fn =		bfq_init_icq,

9439

-+		.elevator_exit_icq_fn =		bfq_exit_icq,

9440

-+		.elevator_set_req_fn =		bfq_set_request,

9441

-+		.elevator_put_req_fn =		bfq_put_request,

9442

-+		.elevator_may_queue_fn =	bfq_may_queue,

9443

-+		.elevator_init_fn =		bfq_init_queue,

9444

-+		.elevator_exit_fn =		bfq_exit_queue,

9445

-+	},

9446

-+	.icq_size =		sizeof(struct bfq_io_cq),

9447

-+	.icq_align =		__alignof__(struct bfq_io_cq),

9448

-+	.elevator_attrs =	bfq_attrs,

9449

-+	.elevator_name =	"bfq",

9450

-+	.elevator_owner =	THIS_MODULE,

9451

-+};

9452

-+

9453

-+static int __init bfq_init(void)

9454

-+{

9455

-+	/*

9456

-+	 * Can be 0 on HZ < 1000 setups.

9457

-+	 */

9458

-+	if (bfq_slice_idle == 0)

9459

-+		bfq_slice_idle = 1;

9460

-+

9461

-+	if (bfq_timeout_async == 0)

9462

-+		bfq_timeout_async = 1;

9463

-+

9464

-+	if (bfq_slab_setup())

9465

-+		return -ENOMEM;

9466

-+

9467

-+	elv_register(&iosched_bfq);

9468

-+	pr_info("BFQ I/O-scheduler version: v7r2");

9469

-+

9470

-+	return 0;

9471

-+}

9472

-+

9473

-+static void __exit bfq_exit(void)

9474

-+{

9475

-+	elv_unregister(&iosched_bfq);

9476

-+	bfq_slab_kill();

9477

-+}

9478

-+

9479

-+module_init(bfq_init);

9480

-+module_exit(bfq_exit);

9481

-+

9482

-+MODULE_AUTHOR("Fabio Checconi, Paolo Valente");

9483

-diff --git a/block/bfq-sched.c b/block/bfq-sched.c

9484

-new file mode 100644

9485

-index 0000000..999b475

9486

---- /dev/null

9487

-+++ b/block/bfq-sched.c

9488

-@@ -0,0 +1,1078 @@

9489

-+/*

9490

-+ * BFQ: Hierarchical B-WF2Q+ scheduler.

9491

-+ *

9492

-+ * Based on ideas and code from CFQ:

9493

-+ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk>

9494

-+ *

9495

-+ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it>

9496

-+ *		      Paolo Valente <paolo.valente@×××××××.it>

9497

-+ *

9498

-+ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it>

9499

-+ */

9500

-+

9501

-+#ifdef CONFIG_CGROUP_BFQIO

9502

-+#define for_each_entity(entity)	\

9503

-+	for (; entity != NULL; entity = entity->parent)

9504

-+

9505

-+#define for_each_entity_safe(entity, parent) \

9506

-+	for (; entity && ({ parent = entity->parent; 1; }); entity = parent)

9507

-+

9508

-+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,

9509

-+						 int extract,

9510

-+						 struct bfq_data *bfqd);

9511

-+

9512

-+static inline void bfq_update_budget(struct bfq_entity *next_in_service)

9513

-+{

9514

-+	struct bfq_entity *bfqg_entity;

9515

-+	struct bfq_group *bfqg;

9516

-+	struct bfq_sched_data *group_sd;

9517

-+

9518

-+	BUG_ON(next_in_service == NULL);

9519

-+

9520

-+	group_sd = next_in_service->sched_data;

9521

-+

9522

-+	bfqg = container_of(group_sd, struct bfq_group, sched_data);

9523

-+	/*

9524

-+	 * bfq_group's my_entity field is not NULL only if the group

9525

-+	 * is not the root group. We must not touch the root entity

9526

-+	 * as it must never become an in-service entity.

9527

-+	 */

9528

-+	bfqg_entity = bfqg->my_entity;

9529

-+	if (bfqg_entity != NULL)

9530

-+		bfqg_entity->budget = next_in_service->budget;

9531

-+}

9532

-+

9533

-+static int bfq_update_next_in_service(struct bfq_sched_data *sd)

9534

-+{

9535

-+	struct bfq_entity *next_in_service;

9536

-+

9537

-+	if (sd->in_service_entity != NULL)

9538

-+		/* will update/requeue at the end of service */

9539

-+		return 0;

9540

-+

9541

-+	/*

9542

-+	 * NOTE: this can be improved in many ways, such as returning

9543

-+	 * 1 (and thus propagating upwards the update) only when the

9544

-+	 * budget changes, or caching the bfqq that will be scheduled

9545

-+	 * next from this subtree.  By now we worry more about

9546

-+	 * correctness than about performance...

9547

-+	 */

9548

-+	next_in_service = bfq_lookup_next_entity(sd, 0, NULL);

9549

-+	sd->next_in_service = next_in_service;

9550

-+

9551

-+	if (next_in_service != NULL)

9552

-+		bfq_update_budget(next_in_service);

9553

-+

9554

-+	return 1;

9555

-+}

9556

-+

9557

-+static inline void bfq_check_next_in_service(struct bfq_sched_data *sd,

9558

-+					     struct bfq_entity *entity)

9559

-+{

9560

-+	BUG_ON(sd->next_in_service != entity);

9561

-+}

9562

-+#else

9563

-+#define for_each_entity(entity)	\

9564

-+	for (; entity != NULL; entity = NULL)

9565

-+

9566

-+#define for_each_entity_safe(entity, parent) \

9567

-+	for (parent = NULL; entity != NULL; entity = parent)

9568

-+

9569

-+static inline int bfq_update_next_in_service(struct bfq_sched_data *sd)

9570

-+{

9571

-+	return 0;

9572

-+}

9573

-+

9574

-+static inline void bfq_check_next_in_service(struct bfq_sched_data *sd,

9575

-+					     struct bfq_entity *entity)

9576

-+{

9577

-+}

9578

-+

9579

-+static inline void bfq_update_budget(struct bfq_entity *next_in_service)

9580

-+{

9581

-+}

9582

-+#endif

9583

-+

9584

-+/*

9585

-+ * Shift for timestamp calculations.  This actually limits the maximum

9586

-+ * service allowed in one timestamp delta (small shift values increase it),

9587

-+ * the maximum total weight that can be used for the queues in the system

9588

-+ * (big shift values increase it), and the period of virtual time wraparounds.

9589

-+ */

9590

-+#define WFQ_SERVICE_SHIFT	22

9591

-+

9592

-+/**

9593

-+ * bfq_gt - compare two timestamps.

9594

-+ * @a: first ts.

9595

-+ * @b: second ts.

9596

-+ *

9597

-+ * Return @a > @b, dealing with wrapping correctly.

9598

-+ */

9599

-+static inline int bfq_gt(u64 a, u64 b)

9600

-+{

9601

-+	return (s64)(a - b) > 0;

9602

-+}

9603

-+

9604

-+static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)

9605

-+{

9606

-+	struct bfq_queue *bfqq = NULL;

9607

-+

9608

-+	BUG_ON(entity == NULL);

9609

-+

9610

-+	if (entity->my_sched_data == NULL)

9611

-+		bfqq = container_of(entity, struct bfq_queue, entity);

9612

-+

9613

-+	return bfqq;

9614

-+}

9615

-+

9616

-+

9617

-+/**

9618

-+ * bfq_delta - map service into the virtual time domain.

9619

-+ * @service: amount of service.

9620

-+ * @weight: scale factor (weight of an entity or weight sum).

9621

-+ */

9622

-+static inline u64 bfq_delta(unsigned long service,

9623

-+					unsigned long weight)

9624

-+{

9625

-+	u64 d = (u64)service << WFQ_SERVICE_SHIFT;

9626

-+

9627

-+	do_div(d, weight);

9628

-+	return d;

9629

-+}

9630

-+

9631

-+/**

9632

-+ * bfq_calc_finish - assign the finish time to an entity.

9633

-+ * @entity: the entity to act upon.

9634

-+ * @service: the service to be charged to the entity.

9635

-+ */

9636

-+static inline void bfq_calc_finish(struct bfq_entity *entity,

9637

-+				   unsigned long service)

9638

-+{

9639

-+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

9640

-+

9641

-+	BUG_ON(entity->weight == 0);

9642

-+

9643

-+	entity->finish = entity->start +

9644

-+		bfq_delta(service, entity->weight);

9645

-+

9646

-+	if (bfqq != NULL) {

9647

-+		bfq_log_bfqq(bfqq->bfqd, bfqq,

9648

-+			"calc_finish: serv %lu, w %d",

9649

-+			service, entity->weight);

9650

-+		bfq_log_bfqq(bfqq->bfqd, bfqq,

9651

-+			"calc_finish: start %llu, finish %llu, delta %llu",

9652

-+			entity->start, entity->finish,

9653

-+			bfq_delta(service, entity->weight));

9654

-+	}

9655

-+}

9656

-+

9657

-+/**

9658

-+ * bfq_entity_of - get an entity from a node.

9659

-+ * @node: the node field of the entity.

9660

-+ *

9661

-+ * Convert a node pointer to the relative entity.  This is used only

9662

-+ * to simplify the logic of some functions and not as the generic

9663

-+ * conversion mechanism because, e.g., in the tree walking functions,

9664

-+ * the check for a %NULL value would be redundant.

9665

-+ */

9666

-+static inline struct bfq_entity *bfq_entity_of(struct rb_node *node)

9667

-+{

9668

-+	struct bfq_entity *entity = NULL;

9669

-+

9670

-+	if (node != NULL)

9671

-+		entity = rb_entry(node, struct bfq_entity, rb_node);

9672

-+

9673

-+	return entity;

9674

-+}

9675

-+

9676

-+/**

9677

-+ * bfq_extract - remove an entity from a tree.

9678

-+ * @root: the tree root.

9679

-+ * @entity: the entity to remove.

9680

-+ */

9681

-+static inline void bfq_extract(struct rb_root *root,

9682

-+			       struct bfq_entity *entity)

9683

-+{

9684

-+	BUG_ON(entity->tree != root);

9685

-+

9686

-+	entity->tree = NULL;

9687

-+	rb_erase(&entity->rb_node, root);

9688

-+}

9689

-+

9690

-+/**

9691

-+ * bfq_idle_extract - extract an entity from the idle tree.

9692

-+ * @st: the service tree of the owning @entity.

9693

-+ * @entity: the entity being removed.

9694

-+ */

9695

-+static void bfq_idle_extract(struct bfq_service_tree *st,

9696

-+			     struct bfq_entity *entity)

9697

-+{

9698

-+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

9699

-+	struct rb_node *next;

9700

-+

9701

-+	BUG_ON(entity->tree != &st->idle);

9702

-+

9703

-+	if (entity == st->first_idle) {

9704

-+		next = rb_next(&entity->rb_node);

9705

-+		st->first_idle = bfq_entity_of(next);

9706

-+	}

9707

-+

9708

-+	if (entity == st->last_idle) {

9709

-+		next = rb_prev(&entity->rb_node);

9710

-+		st->last_idle = bfq_entity_of(next);

9711

-+	}

9712

-+

9713

-+	bfq_extract(&st->idle, entity);

9714

-+

9715

-+	if (bfqq != NULL)

9716

-+		list_del(&bfqq->bfqq_list);

9717

-+}

9718

-+

9719

-+/**

9720

-+ * bfq_insert - generic tree insertion.

9721

-+ * @root: tree root.

9722

-+ * @entity: entity to insert.

9723

-+ *

9724

-+ * This is used for the idle and the active tree, since they are both

9725

-+ * ordered by finish time.

9726

-+ */

9727

-+static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)

9728

-+{

9729

-+	struct bfq_entity *entry;

9730

-+	struct rb_node **node = &root->rb_node;

9731

-+	struct rb_node *parent = NULL;

9732

-+

9733

-+	BUG_ON(entity->tree != NULL);

9734

-+

9735

-+	while (*node != NULL) {

9736

-+		parent = *node;

9737

-+		entry = rb_entry(parent, struct bfq_entity, rb_node);

9738

-+

9739

-+		if (bfq_gt(entry->finish, entity->finish))

9740

-+			node = &parent->rb_left;

9741

-+		else

9742

-+			node = &parent->rb_right;

9743

-+	}

9744

-+

9745

-+	rb_link_node(&entity->rb_node, parent, node);

9746

-+	rb_insert_color(&entity->rb_node, root);

9747

-+

9748

-+	entity->tree = root;

9749

-+}

9750

-+

9751

-+/**

9752

-+ * bfq_update_min - update the min_start field of a entity.

9753

-+ * @entity: the entity to update.

9754

-+ * @node: one of its children.

9755

-+ *

9756

-+ * This function is called when @entity may store an invalid value for

9757

-+ * min_start due to updates to the active tree.  The function  assumes

9758

-+ * that the subtree rooted at @node (which may be its left or its right

9759

-+ * child) has a valid min_start value.

9760

-+ */

9761

-+static inline void bfq_update_min(struct bfq_entity *entity,

9762

-+				  struct rb_node *node)

9763

-+{

9764

-+	struct bfq_entity *child;

9765

-+

9766

-+	if (node != NULL) {

9767

-+		child = rb_entry(node, struct bfq_entity, rb_node);

9768

-+		if (bfq_gt(entity->min_start, child->min_start))

9769

-+			entity->min_start = child->min_start;

9770

-+	}

9771

-+}

9772

-+

9773

-+/**

9774

-+ * bfq_update_active_node - recalculate min_start.

9775

-+ * @node: the node to update.

9776

-+ *

9777

-+ * @node may have changed position or one of its children may have moved,

9778

-+ * this function updates its min_start value.  The left and right subtrees

9779

-+ * are assumed to hold a correct min_start value.

9780

-+ */

9781

-+static inline void bfq_update_active_node(struct rb_node *node)

9782

-+{

9783

-+	struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node);

9784

-+

9785

-+	entity->min_start = entity->start;

9786

-+	bfq_update_min(entity, node->rb_right);

9787

-+	bfq_update_min(entity, node->rb_left);

9788

-+}

9789

-+

9790

-+/**

9791

-+ * bfq_update_active_tree - update min_start for the whole active tree.

9792

-+ * @node: the starting node.

9793

-+ *

9794

-+ * @node must be the deepest modified node after an update.  This function

9795

-+ * updates its min_start using the values held by its children, assuming

9796

-+ * that they did not change, and then updates all the nodes that may have

9797

-+ * changed in the path to the root.  The only nodes that may have changed

9798

-+ * are the ones in the path or their siblings.

9799

-+ */

9800

-+static void bfq_update_active_tree(struct rb_node *node)

9801

-+{

9802

-+	struct rb_node *parent;

9803

-+

9804

-+up:

9805

-+	bfq_update_active_node(node);

9806

-+

9807

-+	parent = rb_parent(node);

9808

-+	if (parent == NULL)

9809

-+		return;

9810

-+

9811

-+	if (node == parent->rb_left && parent->rb_right != NULL)

9812

-+		bfq_update_active_node(parent->rb_right);

9813

-+	else if (parent->rb_left != NULL)

9814

-+		bfq_update_active_node(parent->rb_left);

9815

-+

9816

-+	node = parent;

9817

-+	goto up;

9818

-+}

9819

-+

9820

-+/**

9821

-+ * bfq_active_insert - insert an entity in the active tree of its group/device.

9822

-+ * @st: the service tree of the entity.

9823

-+ * @entity: the entity being inserted.

9824

-+ *

9825

-+ * The active tree is ordered by finish time, but an extra key is kept

9826

-+ * per each node, containing the minimum value for the start times of

9827

-+ * its children (and the node itself), so it's possible to search for

9828

-+ * the eligible node with the lowest finish time in logarithmic time.

9829

-+ */

9830

-+static void bfq_active_insert(struct bfq_service_tree *st,

9831

-+			      struct bfq_entity *entity)

9832

-+{

9833

-+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

9834

-+	struct rb_node *node = &entity->rb_node;

9835

-+

9836

-+	bfq_insert(&st->active, entity);

9837

-+

9838

-+	if (node->rb_left != NULL)

9839

-+		node = node->rb_left;

9840

-+	else if (node->rb_right != NULL)

9841

-+		node = node->rb_right;

9842

-+

9843

-+	bfq_update_active_tree(node);

9844

-+

9845

-+	if (bfqq != NULL)

9846

-+		list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);

9847

-+}

9848

-+

9849

-+/**

9850

-+ * bfq_ioprio_to_weight - calc a weight from an ioprio.

9851

-+ * @ioprio: the ioprio value to convert.

9852

-+ */

9853

-+static unsigned short bfq_ioprio_to_weight(int ioprio)

9854

-+{

9855

-+	WARN_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);

9856

-+	return IOPRIO_BE_NR - ioprio;

9857

-+}

9858

-+

9859

-+/**

9860

-+ * bfq_weight_to_ioprio - calc an ioprio from a weight.

9861

-+ * @weight: the weight value to convert.

9862

-+ *

9863

-+ * To preserve as mush as possible the old only-ioprio user interface,

9864

-+ * 0 is used as an escape ioprio value for weights (numerically) equal or

9865

-+ * larger than IOPRIO_BE_NR

9866

-+ */

9867

-+static unsigned short bfq_weight_to_ioprio(int weight)

9868

-+{

9869

-+	WARN_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT);

9870

-+	return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight;

9871

-+}

9872

-+

9873

-+static inline void bfq_get_entity(struct bfq_entity *entity)

9874

-+{

9875

-+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

9876

-+	struct bfq_sched_data *sd;

9877

-+

9878

-+	if (bfqq != NULL) {

9879

-+		sd = entity->sched_data;

9880

-+		atomic_inc(&bfqq->ref);

9881

-+		bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",

9882

-+			     bfqq, atomic_read(&bfqq->ref));

9883

-+	}

9884

-+}

9885

-+

9886

-+/**

9887

-+ * bfq_find_deepest - find the deepest node that an extraction can modify.

9888

-+ * @node: the node being removed.

9889

-+ *

9890

-+ * Do the first step of an extraction in an rb tree, looking for the

9891

-+ * node that will replace @node, and returning the deepest node that

9892

-+ * the following modifications to the tree can touch.  If @node is the

9893

-+ * last node in the tree return %NULL.

9894

-+ */

9895

-+static struct rb_node *bfq_find_deepest(struct rb_node *node)

9896

-+{

9897

-+	struct rb_node *deepest;

9898

-+

9899

-+	if (node->rb_right == NULL && node->rb_left == NULL)

9900

-+		deepest = rb_parent(node);

9901

-+	else if (node->rb_right == NULL)

9902

-+		deepest = node->rb_left;

9903

-+	else if (node->rb_left == NULL)

9904

-+		deepest = node->rb_right;

9905

-+	else {

9906

-+		deepest = rb_next(node);

9907

-+		if (deepest->rb_right != NULL)

9908

-+			deepest = deepest->rb_right;

9909

-+		else if (rb_parent(deepest) != node)

9910

-+			deepest = rb_parent(deepest);

9911

-+	}

9912

-+

9913

-+	return deepest;

9914

-+}

9915

-+

9916

-+/**

9917

-+ * bfq_active_extract - remove an entity from the active tree.

9918

-+ * @st: the service_tree containing the tree.

9919

-+ * @entity: the entity being removed.

9920

-+ */

9921

-+static void bfq_active_extract(struct bfq_service_tree *st,

9922

-+			       struct bfq_entity *entity)

9923

-+{

9924

-+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

9925

-+	struct rb_node *node;

9926

-+

9927

-+	node = bfq_find_deepest(&entity->rb_node);

9928

-+	bfq_extract(&st->active, entity);

9929

-+

9930

-+	if (node != NULL)

9931

-+		bfq_update_active_tree(node);

9932

-+

9933

-+	if (bfqq != NULL)

9934

-+		list_del(&bfqq->bfqq_list);

9935

-+}

9936

-+

9937

-+/**

9938

-+ * bfq_idle_insert - insert an entity into the idle tree.

9939

-+ * @st: the service tree containing the tree.

9940

-+ * @entity: the entity to insert.

9941

-+ */

9942

-+static void bfq_idle_insert(struct bfq_service_tree *st,

9943

-+			    struct bfq_entity *entity)

9944

-+{

9945

-+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

9946

-+	struct bfq_entity *first_idle = st->first_idle;

9947

-+	struct bfq_entity *last_idle = st->last_idle;

9948

-+

9949

-+	if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish))

9950

-+		st->first_idle = entity;

9951

-+	if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish))

9952

-+		st->last_idle = entity;

9953

-+

9954

-+	bfq_insert(&st->idle, entity);

9955

-+

9956

-+	if (bfqq != NULL)

9957

-+		list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list);

9958

-+}

9959

-+

9960

-+/**

9961

-+ * bfq_forget_entity - remove an entity from the wfq trees.

9962

-+ * @st: the service tree.

9963

-+ * @entity: the entity being removed.

9964

-+ *

9965

-+ * Update the device status and forget everything about @entity, putting

9966

-+ * the device reference to it, if it is a queue.  Entities belonging to

9967

-+ * groups are not refcounted.

9968

-+ */

9969

-+static void bfq_forget_entity(struct bfq_service_tree *st,

9970

-+			      struct bfq_entity *entity)

9971

-+{

9972

-+	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

9973

-+	struct bfq_sched_data *sd;

9974

-+

9975

-+	BUG_ON(!entity->on_st);

9976

-+

9977

-+	entity->on_st = 0;

9978

-+	st->wsum -= entity->weight;

9979

-+	if (bfqq != NULL) {

9980

-+		sd = entity->sched_data;

9981

-+		bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d",

9982

-+			     bfqq, atomic_read(&bfqq->ref));

9983

-+		bfq_put_queue(bfqq);

9984

-+	}

9985

-+}

9986

-+

9987

-+/**

9988

-+ * bfq_put_idle_entity - release the idle tree ref of an entity.

9989

-+ * @st: service tree for the entity.

9990

-+ * @entity: the entity being released.

9991

-+ */

9992

-+static void bfq_put_idle_entity(struct bfq_service_tree *st,

9993

-+				struct bfq_entity *entity)

9994

-+{

9995

-+	bfq_idle_extract(st, entity);

9996

-+	bfq_forget_entity(st, entity);

9997

-+}

9998

-+

9999

-+/**

10000

-+ * bfq_forget_idle - update the idle tree if necessary.

10001

-+ * @st: the service tree to act upon.

10002

-+ *

10003

-+ * To preserve the global O(log N) complexity we only remove one entry here;

10004

-+ * as the idle tree will not grow indefinitely this can be done safely.

10005

-+ */

10006

-+static void bfq_forget_idle(struct bfq_service_tree *st)

10007

-+{

10008

-+	struct bfq_entity *first_idle = st->first_idle;

10009

-+	struct bfq_entity *last_idle = st->last_idle;

10010

-+

10011

-+	if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL &&

10012

-+	    !bfq_gt(last_idle->finish, st->vtime)) {

10013

-+		/*

10014

-+		 * Forget the whole idle tree, increasing the vtime past

10015

-+		 * the last finish time of idle entities.

10016

-+		 */

10017

-+		st->vtime = last_idle->finish;

10018

-+	}

10019

-+

10020

-+	if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime))

10021

-+		bfq_put_idle_entity(st, first_idle);

10022

-+}

10023

-+

10024

-+static struct bfq_service_tree *

10025

-+__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,

10026

-+			 struct bfq_entity *entity)

10027

-+{

10028

-+	struct bfq_service_tree *new_st = old_st;

10029

-+

10030

-+	if (entity->ioprio_changed) {

10031

-+		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

10032

-+

10033

-+		BUG_ON(old_st->wsum < entity->weight);

10034

-+		old_st->wsum -= entity->weight;

10035

-+

10036

-+		if (entity->new_weight != entity->orig_weight) {

10037

-+			entity->orig_weight = entity->new_weight;

10038

-+			entity->ioprio =

10039

-+				bfq_weight_to_ioprio(entity->orig_weight);

10040

-+		} else if (entity->new_ioprio != entity->ioprio) {

10041

-+			entity->ioprio = entity->new_ioprio;

10042

-+			entity->orig_weight =

10043

-+					bfq_ioprio_to_weight(entity->ioprio);

10044

-+		} else

10045

-+			entity->new_weight = entity->orig_weight =

10046

-+				bfq_ioprio_to_weight(entity->ioprio);

10047

-+

10048

-+		entity->ioprio_class = entity->new_ioprio_class;

10049

-+		entity->ioprio_changed = 0;

10050

-+

10051

-+		/*

10052

-+		 * NOTE: here we may be changing the weight too early,

10053

-+		 * this will cause unfairness.  The correct approach

10054

-+		 * would have required additional complexity to defer

10055

-+		 * weight changes to the proper time instants (i.e.,

10056

-+		 * when entity->finish <= old_st->vtime).

10057

-+		 */

10058

-+		new_st = bfq_entity_service_tree(entity);

10059

-+		entity->weight = entity->orig_weight *

10060

-+			(bfqq != NULL ? bfqq->raising_coeff : 1);

10061

-+		new_st->wsum += entity->weight;

10062

-+

10063

-+		if (new_st != old_st)

10064

-+			entity->start = new_st->vtime;

10065

-+	}

10066

-+

10067

-+	return new_st;

10068

-+}

10069

-+

10070

-+/**

10071

-+ * bfq_bfqq_served - update the scheduler status after selection for service.

10072

-+ * @bfqq: the queue being served.

10073

-+ * @served: bytes to transfer.

10074

-+ *

10075

-+ * NOTE: this can be optimized, as the timestamps of upper level entities

10076

-+ * are synchronized every time a new bfqq is selected for service.  By now,

10077

-+ * we keep it to better check consistency.

10078

-+ */

10079

-+static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served)

10080

-+{

10081

-+	struct bfq_entity *entity = &bfqq->entity;

10082

-+	struct bfq_service_tree *st;

10083

-+

10084

-+	for_each_entity(entity) {

10085

-+		st = bfq_entity_service_tree(entity);

10086

-+

10087

-+		entity->service += served;

10088

-+		BUG_ON(entity->service > entity->budget);

10089

-+		BUG_ON(st->wsum == 0);

10090

-+

10091

-+		st->vtime += bfq_delta(served, st->wsum);

10092

-+		bfq_forget_idle(st);

10093

-+	}

10094

-+	bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served);

10095

-+}

10096

-+

10097

-+/**

10098

-+ * bfq_bfqq_charge_full_budget - set the service to the entity budget.

10099

-+ * @bfqq: the queue that needs a service update.

10100

-+ *

10101

-+ * When it's not possible to be fair in the service domain, because

10102

-+ * a queue is not consuming its budget fast enough (the meaning of

10103

-+ * fast depends on the timeout parameter), we charge it a full

10104

-+ * budget.  In this way we should obtain a sort of time-domain

10105

-+ * fairness among all the seeky/slow queues.

10106

-+ */

10107

-+static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)

10108

-+{

10109

-+	struct bfq_entity *entity = &bfqq->entity;

10110

-+

10111

-+	bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget");

10112

-+

10113

-+	bfq_bfqq_served(bfqq, entity->budget - entity->service);

10114

-+}

10115

-+

10116

-+/**

10117

-+ * __bfq_activate_entity - activate an entity.

10118

-+ * @entity: the entity being activated.

10119

-+ *

10120

-+ * Called whenever an entity is activated, i.e., it is not active and one

10121

-+ * of its children receives a new request, or has to be reactivated due to

10122

-+ * budget exhaustion.  It uses the current budget of the entity (and the

10123

-+ * service received if @entity is active) of the queue to calculate its

10124

-+ * timestamps.

10125

-+ */

10126

-+static void __bfq_activate_entity(struct bfq_entity *entity)

10127

-+{

10128

-+	struct bfq_sched_data *sd = entity->sched_data;

10129

-+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);

10130

-+

10131

-+	if (entity == sd->in_service_entity) {

10132

-+		BUG_ON(entity->tree != NULL);

10133

-+		/*

10134

-+		 * If we are requeueing the current entity we have

10135

-+		 * to take care of not charging to it service it has

10136

-+		 * not received.

10137

-+		 */

10138

-+		bfq_calc_finish(entity, entity->service);

10139

-+		entity->start = entity->finish;

10140

-+		sd->in_service_entity = NULL;

10141

-+	} else if (entity->tree == &st->active) {

10142

-+		/*

10143

-+		 * Requeueing an entity due to a change of some

10144

-+		 * next_in_service entity below it.  We reuse the

10145

-+		 * old start time.

10146

-+		 */

10147

-+		bfq_active_extract(st, entity);

10148

-+	} else if (entity->tree == &st->idle) {

10149

-+		/*

10150

-+		 * Must be on the idle tree, bfq_idle_extract() will

10151

-+		 * check for that.

10152

-+		 */

10153

-+		bfq_idle_extract(st, entity);

10154

-+		entity->start = bfq_gt(st->vtime, entity->finish) ?

10155

-+				       st->vtime : entity->finish;

10156

-+	} else {

10157

-+		/*

10158

-+		 * The finish time of the entity may be invalid, and

10159

-+		 * it is in the past for sure, otherwise the queue

10160

-+		 * would have been on the idle tree.

10161

-+		 */

10162

-+		entity->start = st->vtime;

10163

-+		st->wsum += entity->weight;

10164

-+		bfq_get_entity(entity);

10165

-+

10166

-+		BUG_ON(entity->on_st);

10167

-+		entity->on_st = 1;

10168

-+	}

10169

-+

10170

-+	st = __bfq_entity_update_weight_prio(st, entity);

10171

-+	bfq_calc_finish(entity, entity->budget);

10172

-+	bfq_active_insert(st, entity);

10173

-+}

10174

-+

10175

-+/**

10176

-+ * bfq_activate_entity - activate an entity and its ancestors if necessary.

10177

-+ * @entity: the entity to activate.

10178

-+ *

10179

-+ * Activate @entity and all the entities on the path from it to the root.

10180

-+ */

10181

-+static void bfq_activate_entity(struct bfq_entity *entity)

10182

-+{

10183

-+	struct bfq_sched_data *sd;

10184

-+

10185

-+	for_each_entity(entity) {

10186

-+		__bfq_activate_entity(entity);

10187

-+

10188

-+		sd = entity->sched_data;

10189

-+		if (!bfq_update_next_in_service(sd))

10190

-+			/*

10191

-+			 * No need to propagate the activation to the

10192

-+			 * upper entities, as they will be updated when

10193

-+			 * the in-service entity is rescheduled.

10194

-+			 */

10195

-+			break;

10196

-+	}

10197

-+}

10198

-+

10199

-+/**

10200

-+ * __bfq_deactivate_entity - deactivate an entity from its service tree.

10201

-+ * @entity: the entity to deactivate.

10202

-+ * @requeue: if false, the entity will not be put into the idle tree.

10203

-+ *

10204

-+ * Deactivate an entity, independently from its previous state.  If the

10205

-+ * entity was not on a service tree just return, otherwise if it is on

10206

-+ * any scheduler tree, extract it from that tree, and if necessary

10207

-+ * and if the caller did not specify @requeue, put it on the idle tree.

10208

-+ *

10209

-+ * Return %1 if the caller should update the entity hierarchy, i.e.,

10210

-+ * if the entity was under service or if it was the next_in_service for

10211

-+ * its sched_data; return %0 otherwise.

10212

-+ */

10213

-+static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)

10214

-+{

10215

-+	struct bfq_sched_data *sd = entity->sched_data;

10216

-+	struct bfq_service_tree *st = bfq_entity_service_tree(entity);

10217

-+	int was_in_service = entity == sd->in_service_entity;

10218

-+	int ret = 0;

10219

-+

10220

-+	if (!entity->on_st)

10221

-+		return 0;

10222

-+

10223

-+	BUG_ON(was_in_service && entity->tree != NULL);

10224

-+

10225

-+	if (was_in_service) {

10226

-+		bfq_calc_finish(entity, entity->service);

10227

-+		sd->in_service_entity = NULL;

10228

-+	} else if (entity->tree == &st->active)

10229

-+		bfq_active_extract(st, entity);

10230

-+	else if (entity->tree == &st->idle)

10231

-+		bfq_idle_extract(st, entity);

10232

-+	else if (entity->tree != NULL)

10233

-+		BUG();

10234

-+

10235

-+	if (was_in_service || sd->next_in_service == entity)

10236

-+		ret = bfq_update_next_in_service(sd);

10237

-+

10238

-+	if (!requeue || !bfq_gt(entity->finish, st->vtime))

10239

-+		bfq_forget_entity(st, entity);

10240

-+	else

10241

-+		bfq_idle_insert(st, entity);

10242

-+

10243

-+	BUG_ON(sd->in_service_entity == entity);

10244

-+	BUG_ON(sd->next_in_service == entity);

10245

-+

10246

-+	return ret;

10247

-+}

10248

-+

10249

-+/**

10250

-+ * bfq_deactivate_entity - deactivate an entity.

10251

-+ * @entity: the entity to deactivate.

10252

-+ * @requeue: true if the entity can be put on the idle tree

10253

-+ */

10254

-+static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)

10255

-+{

10256

-+	struct bfq_sched_data *sd;

10257

-+	struct bfq_entity *parent;

10258

-+

10259

-+	for_each_entity_safe(entity, parent) {

10260

-+		sd = entity->sched_data;

10261

-+

10262

-+		if (!__bfq_deactivate_entity(entity, requeue))

10263

-+			/*

10264

-+			 * The parent entity is still backlogged, and

10265

-+			 * we don't need to update it as it is still

10266

-+			 * under service.

10267

-+			 */

10268

-+			break;

10269

-+

10270

-+		if (sd->next_in_service != NULL)

10271

-+			/*

10272

-+			 * The parent entity is still backlogged and

10273

-+			 * the budgets on the path towards the root

10274

-+			 * need to be updated.

10275

-+			 */

10276

-+			goto update;

10277

-+

10278

-+		/*

10279

-+		 * If we reach there the parent is no more backlogged and

10280

-+		 * we want to propagate the dequeue upwards.

10281

-+		 */

10282

-+		requeue = 1;

10283

-+	}

10284

-+

10285

-+	return;

10286

-+

10287

-+update:

10288

-+	entity = parent;

10289

-+	for_each_entity(entity) {

10290

-+		__bfq_activate_entity(entity);

10291

-+

10292

-+		sd = entity->sched_data;

10293

-+		if (!bfq_update_next_in_service(sd))

10294

-+			break;

10295

-+	}

10296

-+}

10297

-+

10298

-+/**

10299

-+ * bfq_update_vtime - update vtime if necessary.

10300

-+ * @st: the service tree to act upon.

10301

-+ *

10302

-+ * If necessary update the service tree vtime to have at least one

10303

-+ * eligible entity, skipping to its start time.  Assumes that the

10304

-+ * active tree of the device is not empty.

10305

-+ *

10306

-+ * NOTE: this hierarchical implementation updates vtimes quite often,

10307

-+ * we may end up with reactivated tasks getting timestamps after a

10308

-+ * vtime skip done because we needed a ->first_active entity on some

10309

-+ * intermediate node.

10310

-+ */

10311

-+static void bfq_update_vtime(struct bfq_service_tree *st)

10312

-+{

10313

-+	struct bfq_entity *entry;

10314

-+	struct rb_node *node = st->active.rb_node;

10315

-+

10316

-+	entry = rb_entry(node, struct bfq_entity, rb_node);

10317

-+	if (bfq_gt(entry->min_start, st->vtime)) {

10318

-+		st->vtime = entry->min_start;

10319

-+		bfq_forget_idle(st);

10320

-+	}

10321

-+}

10322

-+

10323

-+/**

10324

-+ * bfq_first_active_entity - find the eligible entity with

10325

-+ *                           the smallest finish time

10326

-+ * @st: the service tree to select from.

10327

-+ *

10328

-+ * This function searches the first schedulable entity, starting from the

10329

-+ * root of the tree and going on the left every time on this side there is

10330

-+ * a subtree with at least one eligible (start >= vtime) entity.  The path

10331

-+ * on the right is followed only if a) the left subtree contains no eligible

10332

-+ * entities and b) no eligible entity has been found yet.

10333

-+ */

10334

-+static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)

10335

-+{

10336

-+	struct bfq_entity *entry, *first = NULL;

10337

-+	struct rb_node *node = st->active.rb_node;

10338

-+

10339

-+	while (node != NULL) {

10340

-+		entry = rb_entry(node, struct bfq_entity, rb_node);

10341

-+left:

10342

-+		if (!bfq_gt(entry->start, st->vtime))

10343

-+			first = entry;

10344

-+

10345

-+		BUG_ON(bfq_gt(entry->min_start, st->vtime));

10346

-+

10347

-+		if (node->rb_left != NULL) {

10348

-+			entry = rb_entry(node->rb_left,

10349

-+					 struct bfq_entity, rb_node);

10350

-+			if (!bfq_gt(entry->min_start, st->vtime)) {

10351

-+				node = node->rb_left;

10352

-+				goto left;

10353

-+			}

10354

-+		}

10355

-+		if (first != NULL)

10356

-+			break;

10357

-+		node = node->rb_right;

10358

-+	}

10359

-+

10360

-+	BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active));

10361

-+	return first;

10362

-+}

10363

-+

10364

-+/**

10365

-+ * __bfq_lookup_next_entity - return the first eligible entity in @st.

10366

-+ * @st: the service tree.

10367

-+ *

10368

-+ * Update the virtual time in @st and return the first eligible entity

10369

-+ * it contains.

10370

-+ */

10371

-+static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st,

10372

-+						   bool force)

10373

-+{

10374

-+	struct bfq_entity *entity, *new_next_in_service = NULL;

10375

-+

10376

-+	if (RB_EMPTY_ROOT(&st->active))

10377

-+		return NULL;

10378

-+

10379

-+	bfq_update_vtime(st);

10380

-+	entity = bfq_first_active_entity(st);

10381

-+	BUG_ON(bfq_gt(entity->start, st->vtime));

10382

-+

10383

-+	/*

10384

-+	 * If the chosen entity does not match with the sched_data's

10385

-+	 * next_in_service and we are forcedly serving the IDLE priority

10386

-+	 * class tree, bubble up budget update.

10387

-+	 */

10388

-+	if (unlikely(force && entity != entity->sched_data->next_in_service)) {

10389

-+		new_next_in_service = entity;

10390

-+		for_each_entity(new_next_in_service)

10391

-+			bfq_update_budget(new_next_in_service);

10392

-+	}

10393

-+

10394

-+	return entity;

10395

-+}

10396

-+

10397

-+/**

10398

-+ * bfq_lookup_next_entity - return the first eligible entity in @sd.

10399

-+ * @sd: the sched_data.

10400

-+ * @extract: if true the returned entity will be also extracted from @sd.

10401

-+ *

10402

-+ * NOTE: since we cache the next_in_service entity at each level of the

10403

-+ * hierarchy, the complexity of the lookup can be decreased with

10404

-+ * absolutely no effort just returning the cached next_in_service value;

10405

-+ * we prefer to do full lookups to test the consistency of * the data

10406

-+ * structures.

10407

-+ */

10408

-+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,

10409

-+						 int extract,

10410

-+						 struct bfq_data *bfqd)

10411

-+{

10412

-+	struct bfq_service_tree *st = sd->service_tree;

10413

-+	struct bfq_entity *entity;

10414

-+	int i = 0;

10415

-+

10416

-+	BUG_ON(sd->in_service_entity != NULL);

10417

-+

10418

-+	if (bfqd != NULL &&

10419

-+	    jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) {

10420

-+		entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1,

10421

-+						  true);

10422

-+		if (entity != NULL) {

10423

-+			i = BFQ_IOPRIO_CLASSES - 1;

10424

-+			bfqd->bfq_class_idle_last_service = jiffies;

10425

-+			sd->next_in_service = entity;

10426

-+		}

10427

-+	}

10428

-+	for (; i < BFQ_IOPRIO_CLASSES; i++) {

10429

-+		entity = __bfq_lookup_next_entity(st + i, false);

10430

-+		if (entity != NULL) {

10431

-+			if (extract) {

10432

-+				bfq_check_next_in_service(sd, entity);

10433

-+				bfq_active_extract(st + i, entity);

10434

-+				sd->in_service_entity = entity;

10435

-+				sd->next_in_service = NULL;

10436

-+			}

10437

-+			break;

10438

-+		}

10439

-+	}

10440

-+

10441

-+	return entity;

10442

-+}

10443

-+

10444

-+/*

10445

-+ * Get next queue for service.

10446

-+ */

10447

-+static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)

10448

-+{

10449

-+	struct bfq_entity *entity = NULL;

10450

-+	struct bfq_sched_data *sd;

10451

-+	struct bfq_queue *bfqq;

10452

-+

10453

-+	BUG_ON(bfqd->in_service_queue != NULL);

10454

-+

10455

-+	if (bfqd->busy_queues == 0)

10456

-+		return NULL;

10457

-+

10458

-+	sd = &bfqd->root_group->sched_data;

10459

-+	for (; sd != NULL; sd = entity->my_sched_data) {

10460

-+		entity = bfq_lookup_next_entity(sd, 1, bfqd);

10461

-+		BUG_ON(entity == NULL);

10462

-+		entity->service = 0;

10463

-+	}

10464

-+

10465

-+	bfqq = bfq_entity_to_bfqq(entity);

10466

-+	BUG_ON(bfqq == NULL);

10467

-+

10468

-+	return bfqq;

10469

-+}

10470

-+

10471

-+/*

10472

-+ * Forced extraction of the given queue.

10473

-+ */

10474

-+static void bfq_get_next_queue_forced(struct bfq_data *bfqd,

10475

-+				      struct bfq_queue *bfqq)

10476

-+{

10477

-+	struct bfq_entity *entity;

10478

-+	struct bfq_sched_data *sd;

10479

-+

10480

-+	BUG_ON(bfqd->in_service_queue != NULL);

10481

-+

10482

-+	entity = &bfqq->entity;

10483

-+	/*

10484

-+	 * Bubble up extraction/update from the leaf to the root.

10485

-+	*/

10486

-+	for_each_entity(entity) {

10487

-+		sd = entity->sched_data;

10488

-+		bfq_update_budget(entity);

10489

-+		bfq_update_vtime(bfq_entity_service_tree(entity));

10490

-+		bfq_active_extract(bfq_entity_service_tree(entity), entity);

10491

-+		sd->active_entity = entity;

10492

-+		sd->next_active = NULL;

10493

-+		entity->service = 0;

10494

-+	}

10495

-+

10496

-+	return;

10497

-+}

10498

-+

10499

-+static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)

10500

-+{

10501

-+	if (bfqd->in_service_bic != NULL) {

10502

-+		put_io_context(bfqd->in_service_bic->icq.ioc);

10503

-+		bfqd->in_service_bic = NULL;

10504

-+	}

10505

-+

10506

-+	bfqd->in_service_queue = NULL;

10507

-+	del_timer(&bfqd->idle_slice_timer);

10508

-+}

10509

-+

10510

-+static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,

10511

-+				int requeue)

10512

-+{

10513

-+	struct bfq_entity *entity = &bfqq->entity;

10514

-+

10515

-+	if (bfqq == bfqd->in_service_queue)

10516

-+		__bfq_bfqd_reset_in_service(bfqd);

10517

-+

10518

-+	bfq_deactivate_entity(entity, requeue);

10519

-+}

10520

-+

10521

-+static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)

10522

-+{

10523

-+	struct bfq_entity *entity = &bfqq->entity;

10524

-+

10525

-+	bfq_activate_entity(entity);

10526

-+}

10527

-+

10528

-+/*

10529

-+ * Called when the bfqq no longer has requests pending, remove it from

10530

-+ * the service tree.

10531

-+ */

10532

-+static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,

10533

-+			      int requeue)

10534

-+{

10535

-+	BUG_ON(!bfq_bfqq_busy(bfqq));

10536

-+	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));

10537

-+

10538

-+	bfq_log_bfqq(bfqd, bfqq, "del from busy");

10539

-+

10540

-+	bfq_clear_bfqq_busy(bfqq);

10541

-+

10542

-+	BUG_ON(bfqd->busy_queues == 0);

10543

-+	bfqd->busy_queues--;

10544

-+	if (bfqq->raising_coeff > 1)

10545

-+		bfqd->raised_busy_queues--;

10546

-+

10547

-+	bfq_deactivate_bfqq(bfqd, bfqq, requeue);

10548

-+}

10549

-+

10550

-+/*

10551

-+ * Called when an inactive queue receives a new request.

10552

-+ */

10553

-+static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)

10554

-+{

10555

-+	BUG_ON(bfq_bfqq_busy(bfqq));

10556

-+	BUG_ON(bfqq == bfqd->in_service_queue);

10557

-+

10558

-+	bfq_log_bfqq(bfqd, bfqq, "add to busy");

10559

-+

10560

-+	bfq_activate_bfqq(bfqd, bfqq);

10561

-+

10562

-+	bfq_mark_bfqq_busy(bfqq);

10563

-+	bfqd->busy_queues++;

10564

-+	if (bfqq->raising_coeff > 1)

10565

-+		bfqd->raised_busy_queues++;

10566

-+}

10567

-diff --git a/block/bfq.h b/block/bfq.h

10568

-new file mode 100644

10569

-index 0000000..3ca8482

10570

---- /dev/null

10571

-+++ b/block/bfq.h

10572

-@@ -0,0 +1,622 @@

10573

-+/*

10574

-+ * BFQ-v7r2 for 3.14.0: data structures and common functions prototypes.

10575

-+ *

10576

-+ * Based on ideas and code from CFQ:

10577

-+ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk>

10578

-+ *

10579

-+ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it>

10580

-+ *		      Paolo Valente <paolo.valente@×××××××.it>

10581

-+ *

10582

-+ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it>

10583

-+ */

10584

-+

10585

-+#ifndef _BFQ_H

10586

-+#define _BFQ_H

10587

-+

10588

-+#include <linux/blktrace_api.h>

10589

-+#include <linux/hrtimer.h>

10590

-+#include <linux/ioprio.h>

10591

-+#include <linux/rbtree.h>

10592

-+

10593

-+#define BFQ_IOPRIO_CLASSES	3

10594

-+#define BFQ_CL_IDLE_TIMEOUT	(HZ/5)

10595

-+

10596

-+#define BFQ_MIN_WEIGHT	1

10597

-+#define BFQ_MAX_WEIGHT	1000

10598

-+

10599

-+#define BFQ_DEFAULT_GRP_WEIGHT	10

10600

-+#define BFQ_DEFAULT_GRP_IOPRIO	0

10601

-+#define BFQ_DEFAULT_GRP_CLASS	IOPRIO_CLASS_BE

10602

-+

10603

-+struct bfq_entity;

10604

-+

10605

-+/**

10606

-+ * struct bfq_service_tree - per ioprio_class service tree.

10607

-+ * @active: tree for active entities (i.e., those backlogged).

10608

-+ * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i).

10609

-+ * @first_idle: idle entity with minimum F_i.

10610

-+ * @last_idle: idle entity with maximum F_i.

10611

-+ * @vtime: scheduler virtual time.

10612

-+ * @wsum: scheduler weight sum; active and idle entities contribute to it.

10613

-+ *

10614

-+ * Each service tree represents a B-WF2Q+ scheduler on its own.  Each

10615

-+ * ioprio_class has its own independent scheduler, and so its own

10616

-+ * bfq_service_tree.  All the fields are protected by the queue lock

10617

-+ * of the containing bfqd.

10618

-+ */

10619

-+struct bfq_service_tree {

10620

-+	struct rb_root active;

10621

-+	struct rb_root idle;

10622

-+

10623

-+	struct bfq_entity *first_idle;

10624

-+	struct bfq_entity *last_idle;

10625

-+

10626

-+	u64 vtime;

10627

-+	unsigned long wsum;

10628

-+};

10629

-+

10630

-+/**

10631

-+ * struct bfq_sched_data - multi-class scheduler.

10632

-+ * @in_service_entity: entity under service.

10633

-+ * @next_in_service: head-of-the-line entity in the scheduler.

10634

-+ * @service_tree: array of service trees, one per ioprio_class.

10635

-+ *

10636

-+ * bfq_sched_data is the basic scheduler queue.  It supports three

10637

-+ * ioprio_classes, and can be used either as a toplevel queue or as

10638

-+ * an intermediate queue on a hierarchical setup.

10639

-+ * @next_in_service points to the active entity of the sched_data

10640

-+ * service trees that will be scheduled next.

10641

-+ *

10642

-+ * The supported ioprio_classes are the same as in CFQ, in descending

10643

-+ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.

10644

-+ * Requests from higher priority queues are served before all the

10645

-+ * requests from lower priority queues; among requests of the same

10646

-+ * queue requests are served according to B-WF2Q+.

10647

-+ * All the fields are protected by the queue lock of the containing bfqd.

10648

-+ */

10649

-+struct bfq_sched_data {

10650

-+	struct bfq_entity *in_service_entity;

10651

-+	struct bfq_entity *next_in_service;

10652

-+	struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];

10653

-+};

10654

-+

10655

-+/**

10656

-+ * struct bfq_entity - schedulable entity.

10657

-+ * @rb_node: service_tree member.

10658

-+ * @on_st: flag, true if the entity is on a tree (either the active or

10659

-+ *         the idle one of its service_tree).

10660

-+ * @finish: B-WF2Q+ finish timestamp (aka F_i).

10661

-+ * @start: B-WF2Q+ start timestamp (aka S_i).

10662

-+ * @tree: tree the entity is enqueued into; %NULL if not on a tree.

10663

-+ * @min_start: minimum start time of the (active) subtree rooted at

10664

-+ *             this entity; used for O(log N) lookups into active trees.

10665

-+ * @service: service received during the last round of service.

10666

-+ * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight.

10667

-+ * @weight: weight of the queue

10668

-+ * @parent: parent entity, for hierarchical scheduling.

10669

-+ * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the

10670

-+ *                 associated scheduler queue, %NULL on leaf nodes.

10671

-+ * @sched_data: the scheduler queue this entity belongs to.

10672

-+ * @ioprio: the ioprio in use.

10673

-+ * @new_weight: when a weight change is requested, the new weight value.

10674

-+ * @orig_weight: original weight, used to implement weight boosting

10675

-+ * @new_ioprio: when an ioprio change is requested, the new ioprio value.

10676

-+ * @ioprio_class: the ioprio_class in use.

10677

-+ * @new_ioprio_class: when an ioprio_class change is requested, the new

10678

-+ *                    ioprio_class value.

10679

-+ * @ioprio_changed: flag, true when the user requested a weight, ioprio or

10680

-+ *                  ioprio_class change.

10681

-+ *

10682

-+ * A bfq_entity is used to represent either a bfq_queue (leaf node in the

10683

-+ * cgroup hierarchy) or a bfq_group into the upper level scheduler.  Each

10684

-+ * entity belongs to the sched_data of the parent group in the cgroup

10685

-+ * hierarchy.  Non-leaf entities have also their own sched_data, stored

10686

-+ * in @my_sched_data.

10687

-+ *

10688

-+ * Each entity stores independently its priority values; this would

10689

-+ * allow different weights on different devices, but this

10690

-+ * functionality is not exported to userspace by now.  Priorities and

10691

-+ * weights are updated lazily, first storing the new values into the

10692

-+ * new_* fields, then setting the @ioprio_changed flag.  As soon as

10693

-+ * there is a transition in the entity state that allows the priority

10694

-+ * update to take place the effective and the requested priority

10695

-+ * values are synchronized.

10696

-+ *

10697

-+ * Unless cgroups are used, the weight value is calculated from the

10698

-+ * ioprio to export the same interface as CFQ.  When dealing with

10699

-+ * ``well-behaved'' queues (i.e., queues that do not spend too much

10700

-+ * time to consume their budget and have true sequential behavior, and

10701

-+ * when there are no external factors breaking anticipation) the

10702

-+ * relative weights at each level of the cgroups hierarchy should be

10703

-+ * guaranteed.  All the fields are protected by the queue lock of the

10704

-+ * containing bfqd.

10705

-+ */

10706

-+struct bfq_entity {

10707

-+	struct rb_node rb_node;

10708

-+

10709

-+	int on_st;

10710

-+

10711

-+	u64 finish;

10712

-+	u64 start;

10713

-+

10714

-+	struct rb_root *tree;

10715

-+

10716

-+	u64 min_start;

10717

-+

10718

-+	unsigned long service, budget;

10719

-+	unsigned short weight, new_weight;

10720

-+	unsigned short orig_weight;

10721

-+

10722

-+	struct bfq_entity *parent;

10723

-+

10724

-+	struct bfq_sched_data *my_sched_data;

10725

-+	struct bfq_sched_data *sched_data;

10726

-+

10727

-+	unsigned short ioprio, new_ioprio;

10728

-+	unsigned short ioprio_class, new_ioprio_class;

10729

-+

10730

-+	int ioprio_changed;

10731

-+};

10732

-+

10733

-+struct bfq_group;

10734

-+

10735

-+/**

10736

-+ * struct bfq_queue - leaf schedulable entity.

10737

-+ * @ref: reference counter.

10738

-+ * @bfqd: parent bfq_data.

10739

-+ * @new_bfqq: shared bfq_queue if queue is cooperating with

10740

-+ *           one or more other queues.

10741

-+ * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree).

10742

-+ * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree).

10743

-+ * @sort_list: sorted list of pending requests.

10744

-+ * @next_rq: if fifo isn't expired, next request to serve.

10745

-+ * @queued: nr of requests queued in @sort_list.

10746

-+ * @allocated: currently allocated requests.

10747

-+ * @meta_pending: pending metadata requests.

10748

-+ * @fifo: fifo list of requests in sort_list.

10749

-+ * @entity: entity representing this queue in the scheduler.

10750

-+ * @max_budget: maximum budget allowed from the feedback mechanism.

10751

-+ * @budget_timeout: budget expiration (in jiffies).

10752

-+ * @dispatched: number of requests on the dispatch list or inside driver.

10753

-+ * @org_ioprio: saved ioprio during boosted periods.

10754

-+ * @flags: status flags.

10755

-+ * @bfqq_list: node for active/idle bfqq list inside our bfqd.

10756

-+ * @seek_samples: number of seeks sampled

10757

-+ * @seek_total: sum of the distances of the seeks sampled

10758

-+ * @seek_mean: mean seek distance

10759

-+ * @last_request_pos: position of the last request enqueued

10760

-+ * @pid: pid of the process owning the queue, used for logging purposes.

10761

-+ * @last_rais_start_finish: start time of the current weight-raising period if

10762

-+ *                          the @bfq-queue is being weight-raised, otherwise

10763

-+ *                          finish time of the last weight-raising period

10764

-+ * @raising_cur_max_time: current max raising time for this queue

10765

-+ * @soft_rt_next_start: minimum time instant such that, only if a new request

10766

-+ *                      is enqueued after this time instant in an idle

10767

-+ *                      @bfq_queue with no outstanding requests, then the

10768

-+ *                      task associated with the queue it is deemed as soft

10769

-+ *                      real-time (see the comments to the function

10770

-+ *                      bfq_bfqq_softrt_next_start())

10771

-+ * @last_idle_bklogged: time of the last transition of the @bfq_queue from

10772

-+ *                      idle to backlogged

10773

-+ * @service_from_backlogged: cumulative service received from the @bfq_queue

10774

-+ *                           since the last transition from idle to backlogged

10775

-+ *

10776

-+ * A bfq_queue is a leaf request queue; it can be associated with an io_context

10777

-+ * or more, if it is async or shared between cooperating processes. @cgroup

10778

-+ * holds a reference to the cgroup, to be sure that it does not disappear while

10779

-+ * a bfqq still references it (mostly to avoid races between request issuing and

10780

-+ * task migration followed by cgroup destruction).

10781

-+ * All the fields are protected by the queue lock of the containing bfqd.

10782

-+ */

10783

-+struct bfq_queue {

10784

-+	atomic_t ref;

10785

-+	struct bfq_data *bfqd;

10786

-+

10787

-+	/* fields for cooperating queues handling */

10788

-+	struct bfq_queue *new_bfqq;

10789

-+	struct rb_node pos_node;

10790

-+	struct rb_root *pos_root;

10791

-+

10792

-+	struct rb_root sort_list;

10793

-+	struct request *next_rq;

10794

-+	int queued[2];

10795

-+	int allocated[2];

10796

-+	int meta_pending;

10797

-+	struct list_head fifo;

10798

-+

10799

-+	struct bfq_entity entity;

10800

-+

10801

-+	unsigned long max_budget;

10802

-+	unsigned long budget_timeout;

10803

-+

10804

-+	int dispatched;

10805

-+

10806

-+	unsigned short org_ioprio;

10807

-+

10808

-+	unsigned int flags;

10809

-+

10810

-+	struct list_head bfqq_list;

10811

-+

10812

-+	unsigned int seek_samples;

10813

-+	u64 seek_total;

10814

-+	sector_t seek_mean;

10815

-+	sector_t last_request_pos;

10816

-+

10817

-+	pid_t pid;

10818

-+

10819

-+	/* weight-raising fields */

10820

-+	unsigned long raising_cur_max_time;

10821

-+	unsigned long soft_rt_next_start;

10822

-+	unsigned long last_rais_start_finish;

10823

-+	unsigned int raising_coeff;

10824

-+	unsigned long last_idle_bklogged;

10825

-+	unsigned long service_from_backlogged;

10826

-+};

10827

-+

10828

-+/**

10829

-+ * struct bfq_ttime - per process thinktime stats.

10830

-+ * @ttime_total: total process thinktime

10831

-+ * @ttime_samples: number of thinktime samples

10832

-+ * @ttime_mean: average process thinktime

10833

-+ */

10834

-+struct bfq_ttime {

10835

-+	unsigned long last_end_request;

10836

-+

10837

-+	unsigned long ttime_total;

10838

-+	unsigned long ttime_samples;

10839

-+	unsigned long ttime_mean;

10840

-+};

10841

-+

10842

-+/**

10843

-+ * struct bfq_io_cq - per (request_queue, io_context) structure.

10844

-+ * @icq: associated io_cq structure

10845

-+ * @bfqq: array of two process queues, the sync and the async

10846

-+ * @ttime: associated @bfq_ttime struct

10847

-+ */

10848

-+struct bfq_io_cq {

10849

-+	struct io_cq icq; /* must be the first member */

10850

-+	struct bfq_queue *bfqq[2];

10851

-+	struct bfq_ttime ttime;

10852

-+	int ioprio;

10853

-+};

10854

-+

10855

-+/**

10856

-+ * struct bfq_data - per device data structure.

10857

-+ * @queue: request queue for the managed device.

10858

-+ * @root_group: root bfq_group for the device.

10859

-+ * @rq_pos_tree: rbtree sorted by next_request position,

10860

-+ *		used when determining if two or more queues

10861

-+ *		have interleaving requests (see bfq_close_cooperator).

10862

-+ * @busy_queues: number of bfq_queues containing requests (including the

10863

-+ *		 queue under service, even if it is idling).

10864

-+ * @raised_busy_queues: number of weight-raised busy bfq_queues.

10865

-+ * @queued: number of queued requests.

10866

-+ * @rq_in_driver: number of requests dispatched and waiting for completion.

10867

-+ * @sync_flight: number of sync requests in the driver.

10868

-+ * @max_rq_in_driver: max number of reqs in driver in the last @hw_tag_samples

10869

-+ *		      completed requests .

10870

-+ * @hw_tag_samples: nr of samples used to calculate hw_tag.

10871

-+ * @hw_tag: flag set to one if the driver is showing a queueing behavior.

10872

-+ * @budgets_assigned: number of budgets assigned.

10873

-+ * @idle_slice_timer: timer set when idling for the next sequential request

10874

-+ *                    from the queue under service.

10875

-+ * @unplug_work: delayed work to restart dispatching on the request queue.

10876

-+ * @in_service_queue: bfq_queue under service.

10877

-+ * @in_service_bic: bfq_io_cq (bic) associated with the @in_service_queue.

10878

-+ * @last_position: on-disk position of the last served request.

10879

-+ * @last_budget_start: beginning of the last budget.

10880

-+ * @last_idling_start: beginning of the last idle slice.

10881

-+ * @peak_rate: peak transfer rate observed for a budget.

10882

-+ * @peak_rate_samples: number of samples used to calculate @peak_rate.

10883

-+ * @bfq_max_budget: maximum budget allotted to a bfq_queue before rescheduling.

10884

-+ * @group_list: list of all the bfq_groups active on the device.

10885

-+ * @active_list: list of all the bfq_queues active on the device.

10886

-+ * @idle_list: list of all the bfq_queues idle on the device.

10887

-+ * @bfq_quantum: max number of requests dispatched per dispatch round.

10888

-+ * @bfq_fifo_expire: timeout for async/sync requests; when it expires

10889

-+ *                   requests are served in fifo order.

10890

-+ * @bfq_back_penalty: weight of backward seeks wrt forward ones.

10891

-+ * @bfq_back_max: maximum allowed backward seek.

10892

-+ * @bfq_slice_idle: maximum idling time.

10893

-+ * @bfq_user_max_budget: user-configured max budget value (0 for auto-tuning).

10894

-+ * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to

10895

-+ *                           async queues.

10896

-+ * @bfq_timeout: timeout for bfq_queues to consume their budget; used to

10897

-+ *               to prevent seeky queues to impose long latencies to well

10898

-+ *               behaved ones (this also implies that seeky queues cannot

10899

-+ *               receive guarantees in the service domain; after a timeout

10900

-+ *               they are charged for the whole allocated budget, to try

10901

-+ *               to preserve a behavior reasonably fair among them, but

10902

-+ *               without service-domain guarantees).

10903

-+ * @bfq_raising_coeff: Maximum factor by which the weight of a boosted

10904

-+ *                            queue is multiplied

10905

-+ * @bfq_raising_max_time: maximum duration of a weight-raising period (jiffies)

10906

-+ * @bfq_raising_rt_max_time: maximum duration for soft real-time processes

10907

-+ * @bfq_raising_min_idle_time: minimum idle period after which weight-raising

10908

-+ *			       may be reactivated for a queue (in jiffies)

10909

-+ * @bfq_raising_min_inter_arr_async: minimum period between request arrivals

10910

-+ *				     after which weight-raising may be

10911

-+ *				     reactivated for an already busy queue

10912

-+ *				     (in jiffies)

10913

-+ * @bfq_raising_max_softrt_rate: max service-rate for a soft real-time queue,

10914

-+ *			         sectors per seconds

10915

-+ * @RT_prod: cached value of the product R*T used for computing the maximum

10916

-+ *	     duration of the weight raising automatically

10917

-+ * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions

10918

-+ *

10919

-+ * All the fields are protected by the @queue lock.

10920

-+ */

10921

-+struct bfq_data {

10922

-+	struct request_queue *queue;

10923

-+

10924

-+	struct bfq_group *root_group;

10925

-+

10926

-+	struct rb_root rq_pos_tree;

10927

-+

10928

-+	int busy_queues;

10929

-+	int raised_busy_queues;

10930

-+	int queued;

10931

-+	int rq_in_driver;

10932

-+	int sync_flight;

10933

-+

10934

-+	int max_rq_in_driver;

10935

-+	int hw_tag_samples;

10936

-+	int hw_tag;

10937

-+

10938

-+	int budgets_assigned;

10939

-+

10940

-+	struct timer_list idle_slice_timer;

10941

-+	struct work_struct unplug_work;

10942

-+

10943

-+	struct bfq_queue *in_service_queue;

10944

-+	struct bfq_io_cq *in_service_bic;

10945

-+

10946

-+	sector_t last_position;

10947

-+

10948

-+	ktime_t last_budget_start;

10949

-+	ktime_t last_idling_start;

10950

-+	int peak_rate_samples;

10951

-+	u64 peak_rate;

10952

-+	unsigned long bfq_max_budget;

10953

-+

10954

-+	struct hlist_head group_list;

10955

-+	struct list_head active_list;

10956

-+	struct list_head idle_list;

10957

-+

10958

-+	unsigned int bfq_quantum;

10959

-+	unsigned int bfq_fifo_expire[2];

10960

-+	unsigned int bfq_back_penalty;

10961

-+	unsigned int bfq_back_max;

10962

-+	unsigned int bfq_slice_idle;

10963

-+	u64 bfq_class_idle_last_service;

10964

-+

10965

-+	unsigned int bfq_user_max_budget;

10966

-+	unsigned int bfq_max_budget_async_rq;

10967

-+	unsigned int bfq_timeout[2];

10968

-+

10969

-+	bool low_latency;

10970

-+

10971

-+	/* parameters of the low_latency heuristics */

10972

-+	unsigned int bfq_raising_coeff;

10973

-+	unsigned int bfq_raising_max_time;

10974

-+	unsigned int bfq_raising_rt_max_time;

10975

-+	unsigned int bfq_raising_min_idle_time;

10976

-+	unsigned long bfq_raising_min_inter_arr_async;

10977

-+	unsigned int bfq_raising_max_softrt_rate;

10978

-+	u64 RT_prod;

10979

-+

10980

-+	struct bfq_queue oom_bfqq;

10981

-+};

10982

-+

10983

-+enum bfqq_state_flags {

10984

-+	BFQ_BFQQ_FLAG_busy = 0,		/* has requests or is under service */

10985

-+	BFQ_BFQQ_FLAG_wait_request,	/* waiting for a request */

10986

-+	BFQ_BFQQ_FLAG_must_alloc,	/* must be allowed rq alloc */

10987

-+	BFQ_BFQQ_FLAG_fifo_expire,	/* FIFO checked in this slice */

10988

-+	BFQ_BFQQ_FLAG_idle_window,	/* slice idling enabled */

10989

-+	BFQ_BFQQ_FLAG_prio_changed,	/* task priority has changed */

10990

-+	BFQ_BFQQ_FLAG_sync,		/* synchronous queue */

10991

-+	BFQ_BFQQ_FLAG_budget_new,	/* no completion with this budget */

10992

-+	BFQ_BFQQ_FLAG_coop,		/* bfqq is shared */

10993

-+	BFQ_BFQQ_FLAG_split_coop,	/* shared bfqq will be splitted */

10994

-+	BFQ_BFQQ_FLAG_softrt_update,	/* needs softrt-next-start update */

10995

-+};

10996

-+

10997

-+#define BFQ_BFQQ_FNS(name)						\

10998

-+static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq)		\

10999

-+{									\

11000

-+	(bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name);			\

11001

-+}									\

11002

-+static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq)	\

11003

-+{									\

11004

-+	(bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name);			\

11005

-+}									\

11006

-+static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq)		\

11007

-+{									\

11008

-+	return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0;	\

11009

-+}

11010

-+

11011

-+BFQ_BFQQ_FNS(busy);

11012

-+BFQ_BFQQ_FNS(wait_request);

11013

-+BFQ_BFQQ_FNS(must_alloc);

11014

-+BFQ_BFQQ_FNS(fifo_expire);

11015

-+BFQ_BFQQ_FNS(idle_window);

11016

-+BFQ_BFQQ_FNS(prio_changed);

11017

-+BFQ_BFQQ_FNS(sync);

11018

-+BFQ_BFQQ_FNS(budget_new);

11019

-+BFQ_BFQQ_FNS(coop);

11020

-+BFQ_BFQQ_FNS(split_coop);

11021

-+BFQ_BFQQ_FNS(softrt_update);

11022

-+#undef BFQ_BFQQ_FNS

11023

-+

11024

-+/* Logging facilities. */

11025

-+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \

11026

-+	blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args)

11027

-+

11028

-+#define bfq_log(bfqd, fmt, args...) \

11029

-+	blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)

11030

-+

11031

-+/* Expiration reasons. */

11032

-+enum bfqq_expiration {

11033

-+	BFQ_BFQQ_TOO_IDLE = 0,		/* queue has been idling for too long */

11034

-+	BFQ_BFQQ_BUDGET_TIMEOUT,	/* budget took too long to be used */

11035

-+	BFQ_BFQQ_BUDGET_EXHAUSTED,	/* budget consumed */

11036

-+	BFQ_BFQQ_NO_MORE_REQUESTS,	/* the queue has no more requests */

11037

-+};

11038

-+

11039

-+#ifdef CONFIG_CGROUP_BFQIO

11040

-+/**

11041

-+ * struct bfq_group - per (device, cgroup) data structure.

11042

-+ * @entity: schedulable entity to insert into the parent group sched_data.

11043

-+ * @sched_data: own sched_data, to contain child entities (they may be

11044

-+ *              both bfq_queues and bfq_groups).

11045

-+ * @group_node: node to be inserted into the bfqio_cgroup->group_data

11046

-+ *              list of the containing cgroup's bfqio_cgroup.

11047

-+ * @bfqd_node: node to be inserted into the @bfqd->group_list list

11048

-+ *             of the groups active on the same device; used for cleanup.

11049

-+ * @bfqd: the bfq_data for the device this group acts upon.

11050

-+ * @async_bfqq: array of async queues for all the tasks belonging to

11051

-+ *              the group, one queue per ioprio value per ioprio_class,

11052

-+ *              except for the idle class that has only one queue.

11053

-+ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored).

11054

-+ * @my_entity: pointer to @entity, %NULL for the toplevel group; used

11055

-+ *             to avoid too many special cases during group creation/migration.

11056

-+ *

11057

-+ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup

11058

-+ * there is a set of bfq_groups, each one collecting the lower-level

11059

-+ * entities belonging to the group that are acting on the same device.

11060

-+ *

11061

-+ * Locking works as follows:

11062

-+ *    o @group_node is protected by the bfqio_cgroup lock, and is accessed

11063

-+ *      via RCU from its readers.

11064

-+ *    o @bfqd is protected by the queue lock, RCU is used to access it

11065

-+ *      from the readers.

11066

-+ *    o All the other fields are protected by the @bfqd queue lock.

11067

-+ */

11068

-+struct bfq_group {

11069

-+	struct bfq_entity entity;

11070

-+	struct bfq_sched_data sched_data;

11071

-+

11072

-+	struct hlist_node group_node;

11073

-+	struct hlist_node bfqd_node;

11074

-+

11075

-+	void *bfqd;

11076

-+

11077

-+	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];

11078

-+	struct bfq_queue *async_idle_bfqq;

11079

-+

11080

-+	struct bfq_entity *my_entity;

11081

-+};

11082

-+

11083

-+/**

11084

-+ * struct bfqio_cgroup - bfq cgroup data structure.

11085

-+ * @css: subsystem state for bfq in the containing cgroup.

11086

-+ * @online: flag marked when the subsystem is inserted.

11087

-+ * @weight: cgroup weight.

11088

-+ * @ioprio: cgroup ioprio.

11089

-+ * @ioprio_class: cgroup ioprio_class.

11090

-+ * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data.

11091

-+ * @group_data: list containing the bfq_group belonging to this cgroup.

11092

-+ *

11093

-+ * @group_data is accessed using RCU, with @lock protecting the updates,

11094

-+ * @ioprio and @ioprio_class are protected by @lock.

11095

-+ */

11096

-+struct bfqio_cgroup {

11097

-+	struct cgroup_subsys_state css;

11098

-+	bool online;

11099

-+

11100

-+	unsigned short weight, ioprio, ioprio_class;

11101

-+

11102

-+	spinlock_t lock;

11103

-+	struct hlist_head group_data;

11104

-+};

11105

-+#else

11106

-+struct bfq_group {

11107

-+	struct bfq_sched_data sched_data;

11108

-+

11109

-+	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];

11110

-+	struct bfq_queue *async_idle_bfqq;

11111

-+};

11112

-+#endif

11113

-+

11114

-+static inline struct bfq_service_tree *

11115

-+bfq_entity_service_tree(struct bfq_entity *entity)

11116

-+{

11117

-+	struct bfq_sched_data *sched_data = entity->sched_data;

11118

-+	unsigned int idx = entity->ioprio_class - 1;

11119

-+

11120

-+	BUG_ON(idx >= BFQ_IOPRIO_CLASSES);

11121

-+	BUG_ON(sched_data == NULL);

11122

-+

11123

-+	return sched_data->service_tree + idx;

11124

-+}

11125

-+

11126

-+static inline struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic,

11127

-+					    int is_sync)

11128

-+{

11129

-+	return bic->bfqq[!!is_sync];

11130

-+}

11131

-+

11132

-+static inline void bic_set_bfqq(struct bfq_io_cq *bic,

11133

-+				struct bfq_queue *bfqq, int is_sync)

11134

-+{

11135

-+	bic->bfqq[!!is_sync] = bfqq;

11136

-+}

11137

-+

11138

-+static inline struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)

11139

-+{

11140

-+	return bic->icq.q->elevator->elevator_data;

11141

-+}

11142

-+

11143

-+/**

11144

-+ * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer.

11145

-+ * @ptr: a pointer to a bfqd.

11146

-+ * @flags: storage for the flags to be saved.

11147

-+ *

11148

-+ * This function allows bfqg->bfqd to be protected by the

11149

-+ * queue lock of the bfqd they reference; the pointer is dereferenced

11150

-+ * under RCU, so the storage for bfqd is assured to be safe as long

11151

-+ * as the RCU read side critical section does not end.  After the

11152

-+ * bfqd->queue->queue_lock is taken the pointer is rechecked, to be

11153

-+ * sure that no other writer accessed it.  If we raced with a writer,

11154

-+ * the function returns NULL, with the queue unlocked, otherwise it

11155

-+ * returns the dereferenced pointer, with the queue locked.

11156

-+ */

11157

-+static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr,

11158

-+						   unsigned long *flags)

11159

-+{

11160

-+	struct bfq_data *bfqd;

11161

-+

11162

-+	rcu_read_lock();

11163

-+	bfqd = rcu_dereference(*(struct bfq_data **)ptr);

11164

-+

11165

-+	if (bfqd != NULL) {

11166

-+		spin_lock_irqsave(bfqd->queue->queue_lock, *flags);

11167

-+		if (*ptr == bfqd)

11168

-+			goto out;

11169

-+		spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);

11170

-+	}

11171

-+

11172

-+	bfqd = NULL;

11173

-+out:

11174

-+	rcu_read_unlock();

11175

-+	return bfqd;

11176

-+}

11177

-+

11178

-+static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd,

11179

-+				       unsigned long *flags)

11180

-+{

11181

-+	spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);

11182

-+}

11183

-+

11184

-+static void bfq_changed_ioprio(struct bfq_io_cq *bic);

11185

-+static void bfq_put_queue(struct bfq_queue *bfqq);

11186

-+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);

11187

-+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,

11188

-+				       struct bfq_group *bfqg, int is_sync,

11189

-+				       struct bfq_io_cq *bic, gfp_t gfp_mask);

11190

-+static void bfq_end_raising_async_queues(struct bfq_data *bfqd,

11191

-+					 struct bfq_group *bfqg);

11192

-+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);

11193

-+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);

11194

-+#endif

11195

---

11196

-1.9.0

11197

-

11198

11199

Deleted: genpatches-2.6/trunk/3.15/5003_BFQ-3-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r2-for-3.14.0.patch

11200

===================================================================

11201

--- genpatches-2.6/trunk/3.15/5003_BFQ-3-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r2-for-3.14.0.patch	2014-05-05 14:22:27 UTC (rev 2771)

11202

+++ genpatches-2.6/trunk/3.15/5003_BFQ-3-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r2-for-3.14.0.patch	2014-05-05 14:28:48 UTC (rev 2772)

11203

@@ -1,1038 +0,0 @@

11204

-From 4fbeda28a90d7fccd05d28a89d9fc409b2344e0a Mon Sep 17 00:00:00 2001

11205

-From: Mauro Andreolini <mauro.andreolini@×××××××.it>

11206

-Date: Fri, 14 Feb 2014 12:52:49 +0100

11207

-Subject: [PATCH 3/3] block, bfq: add Early Queue Merge (EQM) to BFQ-v7r2 for

11208

- 3.14.0

11209

-

11210

-A set of processes may happen  to  perform interleaved reads, i.e., requests

11211

-whose union would give rise to a  sequential read  pattern.  There are two

11212

-typical  cases: in the first  case,   processes  read  fixed-size chunks of

11213

-data at a fixed distance from each other, while in the second case processes

11214

-may read variable-size chunks at  variable distances. The latter case occurs

11215

-for  example with  KVM, which  splits the  I/O generated  by the  guest into

11216

-multiple chunks,  and lets these chunks  be served by a  pool of cooperating

11217

-processes,  iteratively  assigning  the  next  chunk of  I/O  to  the first

11218

-available  process. CFQ  uses actual  queue merging  for the  first type of

11219

-rocesses, whereas it  uses preemption to get a sequential  read pattern out

11220

-of the read requests  performed by the second type of  processes. In the end

11221

-it uses  two different  mechanisms to  achieve the  same goal: boosting the

11222

-throughput with interleaved I/O.

11223

-

11224

-This patch introduces  Early Queue Merge (EQM), a unified mechanism to get a

11225

-sequential  read pattern  with both  types of  processes. The  main idea is

11226

-checking newly arrived requests against the next request of the active queue

11227

-both in case of actual request insert and in case of request merge. By doing

11228

-so, both the types of processes can be handled by just merging their queues.

11229

-EQM is  then simpler and  more compact than the  pair of mechanisms used in

11230

-CFQ.

11231

-

11232

-Finally, EQM  also preserves the  typical low-latency properties of BFQ, by

11233

-properly restoring the weight-raising state of  a queue when it gets back to

11234

-a non-merged state.

11235

-

11236

-Signed-off-by: Mauro Andreolini <mauro.andreolini@×××××××.it>

11237

-Signed-off-by: Arianna Avanzini <avanzini.arianna@×××××.com>

11238

-Reviewed-by: Paolo Valente <paolo.valente@×××××××.it>

11239

----

11240

- block/bfq-iosched.c | 657 ++++++++++++++++++++++++++++++++++++----------------

11241

- block/bfq-sched.c   |  28 ---

11242

- block/bfq.h         |  20 +-

11243

- 3 files changed, 476 insertions(+), 229 deletions(-)

11244

-

11245

-diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c

11246

-index f5f71e4..0d3503d 100644

11247

---- a/block/bfq-iosched.c

11248

-+++ b/block/bfq-iosched.c

11249

-@@ -445,6 +445,46 @@ static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd)

11250

- 	return dur;

11251

- }

11252

-

11253

-+static inline void

11254

-+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)

11255

-+{

11256

-+	if (bic->saved_idle_window)

11257

-+		bfq_mark_bfqq_idle_window(bfqq);

11258

-+	else

11259

-+		bfq_clear_bfqq_idle_window(bfqq);

11260

-+	if (bic->raising_time_left && bfqq->bfqd->low_latency) {

11261

-+		/*

11262

-+		 * Start a weight raising period with the duration given by

11263

-+		 * the raising_time_left snapshot.

11264

-+		 */

11265

-+		if (bfq_bfqq_busy(bfqq))

11266

-+			bfqq->bfqd->raised_busy_queues++;

11267

-+		bfqq->raising_coeff = bfqq->bfqd->bfq_raising_coeff;

11268

-+		bfqq->raising_cur_max_time = bic->raising_time_left;

11269

-+		bfqq->last_rais_start_finish = jiffies;

11270

-+		bfqq->entity.ioprio_changed = 1;

11271

-+	}

11272

-+	/*

11273

-+	 * Clear raising_time_left to prevent bfq_bfqq_save_state() from

11274

-+	 * getting confused about the queue's need of a weight-raising

11275

-+	 * period.

11276

-+	 */

11277

-+	bic->raising_time_left = 0;

11278

-+}

11279

-+

11280

-+/*

11281

-+ * Must be called with the queue_lock held.

11282

-+ */

11283

-+static int bfqq_process_refs(struct bfq_queue *bfqq)

11284

-+{

11285

-+	int process_refs, io_refs;

11286

-+

11287

-+	io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];

11288

-+	process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;

11289

-+	BUG_ON(process_refs < 0);

11290

-+	return process_refs;

11291

-+}

11292

-+

11293

- static void bfq_add_rq_rb(struct request *rq)

11294

- {

11295

- 	struct bfq_queue *bfqq = RQ_BFQQ(rq);

11296

-@@ -486,12 +526,20 @@ static void bfq_add_rq_rb(struct request *rq)

11297

- 		if (!bfqd->low_latency)

11298

- 			goto add_bfqq_busy;

11299

-

11300

-+		if (bfq_bfqq_just_split(bfqq))

11301

-+			goto set_ioprio_changed;

11302

-+

11303

- 		/*

11304

--		 * If the queue is not being boosted and has been idle

11305

--		 * for enough time, start a weight-raising period

11306

-+		 * If the queue:

11307

-+		 * - is not being boosted,

11308

-+		 * - has been idle for enough time,

11309

-+		 * - is not a sync queue or is linked to a bfq_io_cq (it is

11310

-+		 *   shared "for its nature" or it is not shared and its

11311

-+		 *   requests have not been redirected to a shared queue)

11312

-+		 * start a weight-raising period.

11313

- 		 */

11314

--		if (old_raising_coeff == 1 &&

11315

--		    (idle_for_long_time || soft_rt)) {

11316

-+		if (old_raising_coeff == 1 && (idle_for_long_time || soft_rt) &&

11317

-+		    (!bfq_bfqq_sync(bfqq) || bfqq->bic != NULL)) {

11318

- 			bfqq->raising_coeff = bfqd->bfq_raising_coeff;

11319

- 			if (idle_for_long_time)

11320

- 				bfqq->raising_cur_max_time =

11321

-@@ -574,6 +622,7 @@ static void bfq_add_rq_rb(struct request *rq)

11322

- 					bfqd->bfq_raising_rt_max_time;

11323

- 			}

11324

- 		}

11325

-+set_ioprio_changed:

11326

- 		if (old_raising_coeff != bfqq->raising_coeff)

11327

- 			entity->ioprio_changed = 1;

11328

- add_bfqq_busy:

11329

-@@ -756,90 +805,35 @@ static void bfq_end_raising(struct bfq_data *bfqd)

11330

- 	spin_unlock_irq(bfqd->queue->queue_lock);

11331

- }

11332

-

11333

--static int bfq_allow_merge(struct request_queue *q, struct request *rq,

11334

--			   struct bio *bio)

11335

--{

11336

--	struct bfq_data *bfqd = q->elevator->elevator_data;

11337

--	struct bfq_io_cq *bic;

11338

--	struct bfq_queue *bfqq;

11339

--

11340

--	/*

11341

--	 * Disallow merge of a sync bio into an async request.

11342

--	 */

11343

--	if (bfq_bio_sync(bio) && !rq_is_sync(rq))

11344

--		return 0;

11345

--

11346

--	/*

11347

--	 * Lookup the bfqq that this bio will be queued with. Allow

11348

--	 * merge only if rq is queued there.

11349

--	 * Queue lock is held here.

11350

--	 */

11351

--	bic = bfq_bic_lookup(bfqd, current->io_context);

11352

--	if (bic == NULL)

11353

--		return 0;

11354

--

11355

--	bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));

11356

--	return bfqq == RQ_BFQQ(rq);

11357

--}

11358

--

11359

--static void __bfq_set_in_service_queue(struct bfq_data *bfqd,

11360

--				       struct bfq_queue *bfqq)

11361

--{

11362

--	if (bfqq != NULL) {

11363

--		bfq_mark_bfqq_must_alloc(bfqq);

11364

--		bfq_mark_bfqq_budget_new(bfqq);

11365

--		bfq_clear_bfqq_fifo_expire(bfqq);

11366

--

11367

--		bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;

11368

--

11369

--		bfq_log_bfqq(bfqd, bfqq,

11370

--			     "set_in_service_queue, cur-budget = %lu",

11371

--			     bfqq->entity.budget);

11372

--	}

11373

--

11374

--	bfqd->in_service_queue = bfqq;

11375

--}

11376

--

11377

--/*

11378

-- * Get and set a new queue for service.

11379

-- */

11380

--static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd,

11381

--						  struct bfq_queue *bfqq)

11382

-+static inline sector_t bfq_io_struct_pos(void *io_struct, bool request)

11383

- {

11384

--	if (!bfqq)

11385

--		bfqq = bfq_get_next_queue(bfqd);

11386

-+	if (request)

11387

-+		return blk_rq_pos(io_struct);

11388

- 	else

11389

--		bfq_get_next_queue_forced(bfqd, bfqq);

11390

--

11391

--	__bfq_set_in_service_queue(bfqd, bfqq);

11392

--	return bfqq;

11393

-+		return ((struct bio *)io_struct)->bi_iter.bi_sector;

11394

- }

11395

-

11396

--static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd,

11397

--					  struct request *rq)

11398

-+static inline sector_t bfq_dist_from(sector_t pos1,

11399

-+				     sector_t pos2)

11400

- {

11401

--	if (blk_rq_pos(rq) >= bfqd->last_position)

11402

--		return blk_rq_pos(rq) - bfqd->last_position;

11403

-+	if (pos1 >= pos2)

11404

-+		return pos1 - pos2;

11405

- 	else

11406

--		return bfqd->last_position - blk_rq_pos(rq);

11407

-+		return pos2 - pos1;

11408

- }

11409

-

11410

--/*

11411

-- * Return true if bfqq has no request pending and rq is close enough to

11412

-- * bfqd->last_position, or if rq is closer to bfqd->last_position than

11413

-- * bfqq->next_rq

11414

-- */

11415

--static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq)

11416

-+static inline int bfq_rq_close_to_sector(void *io_struct, bool request,

11417

-+					 sector_t sector)

11418

- {

11419

--	return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR;

11420

-+	return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <=

11421

-+	       BFQQ_SEEK_THR;

11422

- }

11423

-

11424

--static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)

11425

-+static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector)

11426

- {

11427

- 	struct rb_root *root = &bfqd->rq_pos_tree;

11428

- 	struct rb_node *parent, *node;

11429

- 	struct bfq_queue *__bfqq;

11430

--	sector_t sector = bfqd->last_position;

11431

-

11432

- 	if (RB_EMPTY_ROOT(root))

11433

- 		return NULL;

11434

-@@ -858,7 +852,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)

11435

- 	 * position).

11436

- 	 */

11437

- 	__bfqq = rb_entry(parent, struct bfq_queue, pos_node);

11438

--	if (bfq_rq_close(bfqd, __bfqq->next_rq))

11439

-+	if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))

11440

- 		return __bfqq;

11441

-

11442

- 	if (blk_rq_pos(__bfqq->next_rq) < sector)

11443

-@@ -869,7 +863,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)

11444

- 		return NULL;

11445

-

11446

- 	__bfqq = rb_entry(node, struct bfq_queue, pos_node);

11447

--	if (bfq_rq_close(bfqd, __bfqq->next_rq))

11448

-+	if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))

11449

- 		return __bfqq;

11450

-

11451

- 	return NULL;

11452

-@@ -878,14 +872,12 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)

11453

- /*

11454

-  * bfqd - obvious

11455

-  * cur_bfqq - passed in so that we don't decide that the current queue

11456

-- *            is closely cooperating with itself.

11457

-- *

11458

-- * We are assuming that cur_bfqq has dispatched at least one request,

11459

-- * and that bfqd->last_position reflects a position on the disk associated

11460

-- * with the I/O issued by cur_bfqq.

11461

-+ *            is closely cooperating with itself

11462

-+ * sector - used as a reference point to search for a close queue

11463

-  */

11464

- static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,

11465

--					      struct bfq_queue *cur_bfqq)

11466

-+					      struct bfq_queue *cur_bfqq,

11467

-+					      sector_t sector)

11468

- {

11469

- 	struct bfq_queue *bfqq;

11470

-

11471

-@@ -905,7 +897,7 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,

11472

- 	 * working closely on the same area of the disk. In that case,

11473

- 	 * we can group them together and don't waste time idling.

11474

- 	 */

11475

--	bfqq = bfqq_close(bfqd);

11476

-+	bfqq = bfqq_close(bfqd, sector);

11477

- 	if (bfqq == NULL || bfqq == cur_bfqq)

11478

- 		return NULL;

11479

-

11480

-@@ -932,6 +924,282 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,

11481

- 	return bfqq;

11482

- }

11483

-

11484

-+static struct bfq_queue *

11485

-+bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)

11486

-+{

11487

-+	int process_refs, new_process_refs;

11488

-+	struct bfq_queue *__bfqq;

11489

-+

11490

-+	/*

11491

-+	 * If there are no process references on the new_bfqq, then it is

11492

-+	 * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain

11493

-+	 * may have dropped their last reference (not just their last process

11494

-+	 * reference).

11495

-+	 */

11496

-+	if (!bfqq_process_refs(new_bfqq))

11497

-+		return NULL;

11498

-+

11499

-+	/* Avoid a circular list and skip interim queue merges. */

11500

-+	while ((__bfqq = new_bfqq->new_bfqq)) {

11501

-+		if (__bfqq == bfqq)

11502

-+			return NULL;

11503

-+		new_bfqq = __bfqq;

11504

-+	}

11505

-+

11506

-+	process_refs = bfqq_process_refs(bfqq);

11507

-+	new_process_refs = bfqq_process_refs(new_bfqq);

11508

-+	/*

11509

-+	 * If the process for the bfqq has gone away, there is no

11510

-+	 * sense in merging the queues.

11511

-+	 */

11512

-+	if (process_refs == 0 || new_process_refs == 0)

11513

-+		return NULL;

11514

-+

11515

-+	bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",

11516

-+		new_bfqq->pid);

11517

-+

11518

-+	/*

11519

-+	 * Merging is just a redirection: the requests of the process owning

11520

-+	 * one of the two queues are redirected to the other queue. The latter

11521

-+	 * queue, in its turn, is set as shared if this is the first time that

11522

-+	 * the requests of some process are redirected to it.

11523

-+	 *

11524

-+	 * We redirect bfqq to new_bfqq and not the opposite, because we

11525

-+	 * are in the context of the process owning bfqq, hence we have the

11526

-+	 * io_cq of this process. So we can immediately configure this io_cq

11527

-+	 * to redirect the requests of the process to new_bfqq.

11528

-+	 *

11529

-+	 * NOTE, even if new_bfqq coincides with the in-service queue, the

11530

-+	 * io_cq of new_bfqq is not available, because, if the in-service queue

11531

-+	 * is shared, bfqd->in_service_bic may not point to the io_cq of the

11532

-+	 * in-service queue.

11533

-+	 * Redirecting the requests of the process owning bfqq to the currently

11534

-+	 * in-service queue is in any case the best option, as we feed the

11535

-+	 * in-service queue with new requests close to the last request served

11536

-+	 * and, by doing so, hopefully increase the throughput.

11537

-+	 */

11538

-+	bfqq->new_bfqq = new_bfqq;

11539

-+	atomic_add(process_refs, &new_bfqq->ref);

11540

-+	return new_bfqq;

11541

-+}

11542

-+

11543

-+/*

11544

-+ * Attempt to schedule a merge of bfqq with the currently in-service queue or

11545

-+ * with a close queue among the scheduled queues.

11546

-+ * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue

11547

-+ * structure otherwise.

11548

-+ */

11549

-+static struct bfq_queue *

11550

-+bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,

11551

-+		     void *io_struct, bool request)

11552

-+{

11553

-+	struct bfq_queue *in_service_bfqq, *new_bfqq;

11554

-+

11555

-+	if (bfqq->new_bfqq)

11556

-+		return bfqq->new_bfqq;

11557

-+

11558

-+	if (!io_struct)

11559

-+		return NULL;

11560

-+

11561

-+	in_service_bfqq = bfqd->in_service_queue;

11562

-+

11563

-+	if (in_service_bfqq == NULL || in_service_bfqq == bfqq ||

11564

-+	    !bfqd->in_service_bic)

11565

-+		goto check_scheduled;

11566

-+

11567

-+	if (bfq_class_idle(in_service_bfqq) || bfq_class_idle(bfqq))

11568

-+		goto check_scheduled;

11569

-+

11570

-+	if (bfq_class_rt(in_service_bfqq) != bfq_class_rt(bfqq))

11571

-+		goto check_scheduled;

11572

-+

11573

-+	if (in_service_bfqq->entity.parent != bfqq->entity.parent)

11574

-+		goto check_scheduled;

11575

-+

11576

-+	if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&

11577

-+	    bfq_bfqq_sync(in_service_bfqq) && bfq_bfqq_sync(bfqq)) {

11578

-+		new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq);

11579

-+		if (new_bfqq != NULL)

11580

-+			return new_bfqq; /* Merge with the in-service queue */

11581

-+	}

11582

-+

11583

-+	/*

11584

-+	 * Check whether there is a cooperator among currently scheduled

11585

-+	 * queues. The only thing we need is that the bio/request is not

11586

-+	 * NULL, as we need it to establish whether a cooperator exists.

11587

-+	 */

11588

-+check_scheduled:

11589

-+	new_bfqq = bfq_close_cooperator(bfqd, bfqq,

11590

-+					bfq_io_struct_pos(io_struct, request));

11591

-+	if (new_bfqq)

11592

-+		return bfq_setup_merge(bfqq, new_bfqq);

11593

-+

11594

-+	return NULL;

11595

-+}

11596

-+

11597

-+static inline void

11598

-+bfq_bfqq_save_state(struct bfq_queue *bfqq)

11599

-+{

11600

-+	/*

11601

-+	 * If bfqq->bic == NULL, the queue is already shared or its requests

11602

-+	 * have already been redirected to a shared queue; both idle window

11603

-+	 * and weight raising state have already been saved. Do nothing.

11604

-+	 */

11605

-+	if (bfqq->bic == NULL)

11606

-+		return;

11607

-+	if (bfqq->bic->raising_time_left)

11608

-+		/*

11609

-+		 * This is the queue of a just-started process, and would

11610

-+		 * deserve weight raising: we set raising_time_left to the full

11611

-+		 * weight-raising duration to trigger weight-raising when and

11612

-+		 * if the queue is split and the first request of the queue

11613

-+		 * is enqueued.

11614

-+		 */

11615

-+		bfqq->bic->raising_time_left = bfq_wrais_duration(bfqq->bfqd);

11616

-+	else if (bfqq->raising_coeff > 1) {

11617

-+		unsigned long wrais_duration =

11618

-+			jiffies - bfqq->last_rais_start_finish;

11619

-+		/*

11620

-+		 * It may happen that a queue's weight raising period lasts

11621

-+		 * longer than its raising_cur_max_time, as weight raising is

11622

-+		 * handled only when a request is enqueued or dispatched (it

11623

-+		 * does not use any timer). If the weight raising period is

11624

-+		 * about to end, don't save it.

11625

-+		 */

11626

-+		if (bfqq->raising_cur_max_time <= wrais_duration)

11627

-+			bfqq->bic->raising_time_left = 0;

11628

-+		else

11629

-+			bfqq->bic->raising_time_left =

11630

-+				bfqq->raising_cur_max_time - wrais_duration;

11631

-+		/*

11632

-+		 * The bfq_queue is becoming shared or the requests of the

11633

-+		 * process owning the queue are being redirected to a shared

11634

-+		 * queue. Stop the weight raising period of the queue, as in

11635

-+		 * both cases it should not be owned by an interactive or soft

11636

-+		 * real-time application.

11637

-+		 */

11638

-+		bfq_bfqq_end_raising(bfqq);

11639

-+	} else

11640

-+		bfqq->bic->raising_time_left = 0;

11641

-+	bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);

11642

-+}

11643

-+

11644

-+static inline void

11645

-+bfq_get_bic_reference(struct bfq_queue *bfqq)

11646

-+{

11647

-+	/*

11648

-+	 * If bfqq->bic has a non-NULL value, the bic to which it belongs

11649

-+	 * is about to begin using a shared bfq_queue.

11650

-+	 */

11651

-+	if (bfqq->bic)

11652

-+		atomic_long_inc(&bfqq->bic->icq.ioc->refcount);

11653

-+}

11654

-+

11655

-+static void

11656

-+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,

11657

-+		struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)

11658

-+{

11659

-+	bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",

11660

-+		(long unsigned)new_bfqq->pid);

11661

-+	/* Save weight raising and idle window of the merged queues */

11662

-+	bfq_bfqq_save_state(bfqq);

11663

-+	bfq_bfqq_save_state(new_bfqq);

11664

-+	/*

11665

-+	 * Grab a reference to the bic, to prevent it from being destroyed

11666

-+	 * before being possibly touched by a bfq_split_bfqq().

11667

-+	 */

11668

-+	bfq_get_bic_reference(bfqq);

11669

-+	bfq_get_bic_reference(new_bfqq);

11670

-+	/* Merge queues (that is, let bic redirect its requests to new_bfqq) */

11671

-+	bic_set_bfqq(bic, new_bfqq, 1);

11672

-+	bfq_mark_bfqq_coop(new_bfqq);

11673

-+	/*

11674

-+	 * new_bfqq now belongs to at least two bics (it is a shared queue): set

11675

-+	 * new_bfqq->bic to NULL. bfqq either:

11676

-+	 * - does not belong to any bic any more, and hence bfqq->bic must

11677

-+	 *   be set to NULL, or

11678

-+	 * - is a queue whose owning bics have already been redirected to a

11679

-+	 *   different queue, hence the queue is destined to not belong to any

11680

-+	 *   bic soon and bfqq->bic is already NULL (therefore the next

11681

-+	 *   assignment causes no harm).

11682

-+	 */

11683

-+	new_bfqq->bic = NULL;

11684

-+	bfqq->bic = NULL;

11685

-+	bfq_put_queue(bfqq);

11686

-+}

11687

-+

11688

-+static int bfq_allow_merge(struct request_queue *q, struct request *rq,

11689

-+			   struct bio *bio)

11690

-+{

11691

-+	struct bfq_data *bfqd = q->elevator->elevator_data;

11692

-+	struct bfq_io_cq *bic;

11693

-+	struct bfq_queue *bfqq, *new_bfqq;

11694

-+

11695

-+	/*

11696

-+	 * Disallow merge of a sync bio into an async request.

11697

-+	 */

11698

-+	if (bfq_bio_sync(bio) && !rq_is_sync(rq))

11699

-+		return 0;

11700

-+

11701

-+	/*

11702

-+	 * Lookup the bfqq that this bio will be queued with. Allow

11703

-+	 * merge only if rq is queued there.

11704

-+	 * Queue lock is held here.

11705

-+	 */

11706

-+	bic = bfq_bic_lookup(bfqd, current->io_context);

11707

-+	if (bic == NULL)

11708

-+		return 0;

11709

-+

11710

-+	bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));

11711

-+	/*

11712

-+	 * We take advantage of this function to perform an early merge

11713

-+	 * of the queues of possible cooperating processes.

11714

-+	 */

11715

-+	if (bfqq != NULL) {

11716

-+		new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false);

11717

-+		if (new_bfqq != NULL) {

11718

-+			bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);

11719

-+			/*

11720

-+			 * If we get here, the bio will be queued in the shared

11721

-+			 * queue, i.e., new_bfqq, so use new_bfqq to decide

11722

-+			 * whether bio and rq can be merged.

11723

-+			 */

11724

-+			bfqq = new_bfqq;

11725

-+		}

11726

-+	}

11727

-+

11728

-+	return bfqq == RQ_BFQQ(rq);

11729

-+}

11730

-+

11731

-+static void __bfq_set_in_service_queue(struct bfq_data *bfqd,

11732

-+				       struct bfq_queue *bfqq)

11733

-+{

11734

-+	if (bfqq != NULL) {

11735

-+		bfq_mark_bfqq_must_alloc(bfqq);

11736

-+		bfq_mark_bfqq_budget_new(bfqq);

11737

-+		bfq_clear_bfqq_fifo_expire(bfqq);

11738

-+

11739

-+		bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;

11740

-+

11741

-+		bfq_log_bfqq(bfqd, bfqq,

11742

-+			     "set_in_service_queue, cur-budget = %lu",

11743

-+			     bfqq->entity.budget);

11744

-+	}

11745

-+

11746

-+	bfqd->in_service_queue = bfqq;

11747

-+}

11748

-+

11749

-+/*

11750

-+ * Get and set a new queue for service.

11751

-+ */

11752

-+static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)

11753

-+{

11754

-+	struct bfq_queue *bfqq = bfq_get_next_queue(bfqd);

11755

-+

11756

-+	__bfq_set_in_service_queue(bfqd, bfqq);

11757

-+	return bfqq;

11758

-+}

11759

-+

11760

- /*

11761

-  * If enough samples have been computed, return the current max budget

11762

-  * stored in bfqd, which is dynamically updated according to the

11763

-@@ -1079,63 +1347,6 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq)

11764

- 	return rq;

11765

- }

11766

-

11767

--/*

11768

-- * Must be called with the queue_lock held.

11769

-- */

11770

--static int bfqq_process_refs(struct bfq_queue *bfqq)

11771

--{

11772

--	int process_refs, io_refs;

11773

--

11774

--	io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];

11775

--	process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;

11776

--	BUG_ON(process_refs < 0);

11777

--	return process_refs;

11778

--}

11779

--

11780

--static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)

11781

--{

11782

--	int process_refs, new_process_refs;

11783

--	struct bfq_queue *__bfqq;

11784

--

11785

--	/*

11786

--	 * If there are no process references on the new_bfqq, then it is

11787

--	 * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain

11788

--	 * may have dropped their last reference (not just their last process

11789

--	 * reference).

11790

--	 */

11791

--	if (!bfqq_process_refs(new_bfqq))

11792

--		return;

11793

--

11794

--	/* Avoid a circular list and skip interim queue merges. */

11795

--	while ((__bfqq = new_bfqq->new_bfqq)) {

11796

--		if (__bfqq == bfqq)

11797

--			return;

11798

--		new_bfqq = __bfqq;

11799

--	}

11800

--

11801

--	process_refs = bfqq_process_refs(bfqq);

11802

--	new_process_refs = bfqq_process_refs(new_bfqq);

11803

--	/*

11804

--	 * If the process for the bfqq has gone away, there is no

11805

--	 * sense in merging the queues.

11806

--	 */

11807

--	if (process_refs == 0 || new_process_refs == 0)

11808

--		return;

11809

--

11810

--	/*

11811

--	 * Merge in the direction of the lesser amount of work.

11812

--	 */

11813

--	if (new_process_refs >= process_refs) {

11814

--		bfqq->new_bfqq = new_bfqq;

11815

--		atomic_add(process_refs, &new_bfqq->ref);

11816

--	} else {

11817

--		new_bfqq->new_bfqq = bfqq;

11818

--		atomic_add(new_process_refs, &bfqq->ref);

11819

--	}

11820

--	bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",

11821

--		new_bfqq->pid);

11822

--}

11823

--

11824

- static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq)

11825

- {

11826

- 	struct bfq_entity *entity = &bfqq->entity;

11827

-@@ -1729,7 +1940,7 @@ static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)

11828

-  */

11829

- static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)

11830

- {

11831

--	struct bfq_queue *bfqq, *new_bfqq = NULL;

11832

-+	struct bfq_queue *bfqq;

11833

- 	struct request *next_rq;

11834

- 	enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;

11835

-

11836

-@@ -1739,17 +1950,6 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)

11837

-

11838

- 	bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue");

11839

-

11840

--	/*

11841

--         * If another queue has a request waiting within our mean seek

11842

--         * distance, let it run. The expire code will check for close

11843

--         * cooperators and put the close queue at the front of the

11844

--         * service tree. If possible, merge the expiring queue with the

11845

--         * new bfqq.

11846

--         */

11847

--        new_bfqq = bfq_close_cooperator(bfqd, bfqq);

11848

--        if (new_bfqq != NULL && bfqq->new_bfqq == NULL)

11849

--                bfq_setup_merge(bfqq, new_bfqq);

11850

--

11851

- 	if (bfq_may_expire_for_budg_timeout(bfqq) &&

11852

- 	    !timer_pending(&bfqd->idle_slice_timer) &&

11853

- 	    !bfq_bfqq_must_idle(bfqq))

11854

-@@ -1786,36 +1986,26 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)

11855

- 				bfq_clear_bfqq_wait_request(bfqq);

11856

- 				del_timer(&bfqd->idle_slice_timer);

11857

- 			}

11858

--			if (new_bfqq == NULL)

11859

--				goto keep_queue;

11860

--			else

11861

--				goto expire;

11862

-+			goto keep_queue;

11863

- 		}

11864

- 	}

11865

-

11866

- 	/*

11867

--	 * No requests pending.  If the in-service queue has no cooperator and

11868

--	 * still has requests in flight (possibly waiting for a completion)

11869

--	 * or is idling for a new request, then keep it.

11870

-+	 * No requests pending.  If the in-service queue still has requests in

11871

-+	 * flight (possibly waiting for a completion) or is idling for a new

11872

-+	 * request, then keep it.

11873

- 	 */

11874

--	if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) ||

11875

--	    (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq)))) {

11876

-+	if (timer_pending(&bfqd->idle_slice_timer) ||

11877

-+	    (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq))) {

11878

- 		bfqq = NULL;

11879

- 		goto keep_queue;

11880

--	} else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) {

11881

--		/*

11882

--		 * Expiring the queue because there is a close cooperator,

11883

--		 * cancel timer.

11884

--		 */

11885

--		bfq_clear_bfqq_wait_request(bfqq);

11886

--		del_timer(&bfqd->idle_slice_timer);

11887

- 	}

11888

-

11889

- 	reason = BFQ_BFQQ_NO_MORE_REQUESTS;

11890

- expire:

11891

- 	bfq_bfqq_expire(bfqd, bfqq, 0, reason);

11892

- new_queue:

11893

--	bfqq = bfq_set_in_service_queue(bfqd, new_bfqq);

11894

-+	bfqq = bfq_set_in_service_queue(bfqd);

11895

- 	bfq_log(bfqd, "select_queue: new queue %d returned",

11896

- 		bfqq != NULL ? bfqq->pid : 0);

11897

- keep_queue:

11898

-@@ -1825,9 +2015,8 @@ keep_queue:

11899

- static void bfq_update_raising_data(struct bfq_data *bfqd,

11900

- 				    struct bfq_queue *bfqq)

11901

- {

11902

-+	struct bfq_entity *entity = &bfqq->entity;

11903

- 	if (bfqq->raising_coeff > 1) { /* queue is being boosted */

11904

--		struct bfq_entity *entity = &bfqq->entity;

11905

--

11906

- 		bfq_log_bfqq(bfqd, bfqq,

11907

- 			"raising period dur %u/%u msec, "

11908

- 			"old raising coeff %u, w %d(%d)",

11909

-@@ -1844,7 +2033,7 @@ static void bfq_update_raising_data(struct bfq_data *bfqd,

11910

- 			"WARN: pending prio change");

11911

- 		/*

11912

- 		 * If too much time has elapsed from the beginning

11913

--		 * of this weight-raising, stop it.

11914

-+		 * of this weight-raising period, stop it.

11915

- 		 */

11916

- 		if (time_is_before_jiffies(bfqq->last_rais_start_finish +

11917

- 					   bfqq->raising_cur_max_time)) {

11918

-@@ -1856,11 +2045,13 @@ static void bfq_update_raising_data(struct bfq_data *bfqd,

11919

- 				     jiffies_to_msecs(bfqq->

11920

- 					raising_cur_max_time));

11921

- 			bfq_bfqq_end_raising(bfqq);

11922

--			__bfq_entity_update_weight_prio(

11923

--				bfq_entity_service_tree(entity),

11924

--				entity);

11925

- 		}

11926

- 	}

11927

-+	/* Update weight both if it must be raised and if it must be lowered */

11928

-+	if ((entity->weight > entity->orig_weight) != (bfqq->raising_coeff > 1))

11929

-+		__bfq_entity_update_weight_prio(

11930

-+			bfq_entity_service_tree(entity),

11931

-+			entity);

11932

- }

11933

-

11934

- /*

11935

-@@ -2101,6 +2292,25 @@ static void bfq_init_icq(struct io_cq *icq)

11936

- 	struct bfq_io_cq *bic = icq_to_bic(icq);

11937

-

11938

- 	bic->ttime.last_end_request = jiffies;

11939

-+	/*

11940

-+	 * A newly created bic indicates that the process has just

11941

-+	 * started doing I/O, and is probably mapping into memory its

11942

-+	 * executable and libraries: it definitely needs weight raising.

11943

-+	 * There is however the possibility that the process performs,

11944

-+	 * for a while, I/O close to some other process. EQM intercepts

11945

-+	 * this behavior and may merge the queue corresponding to the

11946

-+	 * process  with some other queue, BEFORE the weight of the queue

11947

-+	 * is raised. Merged queues are not weight-raised (they are assumed

11948

-+	 * to belong to processes that benefit only from high throughput).

11949

-+	 * If the merge is basically the consequence of an accident, then

11950

-+	 * the queue will be split soon and will get back its old weight.

11951

-+	 * It is then important to write down somewhere that this queue

11952

-+	 * does need weight raising, even if it did not make it to get its

11953

-+	 * weight raised before being merged. To this purpose, we overload

11954

-+	 * the field raising_time_left and assign 1 to it, to mark the queue

11955

-+	 * as needing weight raising.

11956

-+	 */

11957

-+	bic->raising_time_left = 1;

11958

- }

11959

-

11960

- static void bfq_exit_icq(struct io_cq *icq)

11961

-@@ -2114,6 +2324,13 @@ static void bfq_exit_icq(struct io_cq *icq)

11962

- 	}

11963

-

11964

- 	if (bic->bfqq[BLK_RW_SYNC]) {

11965

-+		/*

11966

-+		 * If the bic is using a shared queue, put the reference

11967

-+		 * taken on the io_context when the bic started using a

11968

-+		 * shared bfq_queue.

11969

-+		 */

11970

-+		if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC]))

11971

-+			put_io_context(icq->ioc);

11972

- 		bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);

11973

- 		bic->bfqq[BLK_RW_SYNC] = NULL;

11974

- 	}

11975

-@@ -2405,6 +2622,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,

11976

- 	if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))

11977

- 		return;

11978

-

11979

-+	/* Idle window just restored, statistics are meaningless. */

11980

-+	if (bfq_bfqq_just_split(bfqq))

11981

-+		return;

11982

-+

11983

- 	enable_idle = bfq_bfqq_idle_window(bfqq);

11984

-

11985

- 	if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||

11986

-@@ -2445,6 +2666,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,

11987

- 	if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||

11988

- 	    !BFQQ_SEEKY(bfqq))

11989

- 		bfq_update_idle_window(bfqd, bfqq, bic);

11990

-+	bfq_clear_bfqq_just_split(bfqq);

11991

-

11992

- 	bfq_log_bfqq(bfqd, bfqq,

11993

- 		     "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",

11994

-@@ -2505,13 +2727,48 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,

11995

- static void bfq_insert_request(struct request_queue *q, struct request *rq)

11996

- {

11997

- 	struct bfq_data *bfqd = q->elevator->elevator_data;

11998

--	struct bfq_queue *bfqq = RQ_BFQQ(rq);

11999

-+	struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq;

12000

-

12001

- 	assert_spin_locked(bfqd->queue->queue_lock);

12002

-+

12003

-+	/*

12004

-+	 * An unplug may trigger a requeue of a request from the device

12005

-+	 * driver: make sure we are in process context while trying to

12006

-+	 * merge two bfq_queues.

12007

-+	 */

12008

-+	if (!in_interrupt()) {

12009

-+		new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);

12010

-+		if (new_bfqq != NULL) {

12011

-+			if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)

12012

-+				new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1);

12013

-+			/*

12014

-+			 * Release the request's reference to the old bfqq

12015

-+			 * and make sure one is taken to the shared queue.

12016

-+			 */

12017

-+			new_bfqq->allocated[rq_data_dir(rq)]++;

12018

-+			bfqq->allocated[rq_data_dir(rq)]--;

12019

-+			atomic_inc(&new_bfqq->ref);

12020

-+			bfq_put_queue(bfqq);

12021

-+			if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)

12022

-+				bfq_merge_bfqqs(bfqd, RQ_BIC(rq),

12023

-+						bfqq, new_bfqq);

12024

-+			rq->elv.priv[1] = new_bfqq;

12025

-+			bfqq = new_bfqq;

12026

-+		}

12027

-+	}

12028

-+

12029

- 	bfq_init_prio_data(bfqq, RQ_BIC(rq));

12030

-

12031

- 	bfq_add_rq_rb(rq);

12032

-

12033

-+	/*

12034

-+	 * Here a newly-created bfq_queue has already started a weight-raising

12035

-+	 * period: clear raising_time_left to prevent bfq_bfqq_save_state()

12036

-+	 * from assigning it a full weight-raising period. See the detailed

12037

-+	 * comments about this field in bfq_init_icq().

12038

-+	 */

12039

-+	if (bfqq->bic != NULL)

12040

-+		bfqq->bic->raising_time_left = 0;

12041

- 	rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]);

12042

- 	list_add_tail(&rq->queuelist, &bfqq->fifo);

12043

-

12044

-@@ -2663,18 +2920,6 @@ static void bfq_put_request(struct request *rq)

12045

- 	}

12046

- }

12047

-

12048

--static struct bfq_queue *

12049

--bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,

12050

--		struct bfq_queue *bfqq)

12051

--{

12052

--	bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",

12053

--		(long unsigned)bfqq->new_bfqq->pid);

12054

--	bic_set_bfqq(bic, bfqq->new_bfqq, 1);

12055

--	bfq_mark_bfqq_coop(bfqq->new_bfqq);

12056

--	bfq_put_queue(bfqq);

12057

--	return bic_to_bfqq(bic, 1);

12058

--}

12059

--

12060

- /*

12061

-  * Returns NULL if a new bfqq should be allocated, or the old bfqq if this

12062

-  * was the last process referring to said bfqq.

12063

-@@ -2683,6 +2928,9 @@ static struct bfq_queue *

12064

- bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)

12065

- {

12066

- 	bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");

12067

-+

12068

-+	put_io_context(bic->icq.ioc);

12069

-+

12070

- 	if (bfqq_process_refs(bfqq) == 1) {

12071

- 		bfqq->pid = current->pid;

12072

- 		bfq_clear_bfqq_coop(bfqq);

12073

-@@ -2711,6 +2959,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,

12074

- 	struct bfq_queue *bfqq;

12075

- 	struct bfq_group *bfqg;

12076

- 	unsigned long flags;

12077

-+	bool split = false;

12078

-

12079

- 	might_sleep_if(gfp_mask & __GFP_WAIT);

12080

-

12081

-@@ -2729,24 +2978,14 @@ new_queue:

12082

- 		bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask);

12083

- 		bic_set_bfqq(bic, bfqq, is_sync);

12084

- 	} else {

12085

--		/*

12086

--		 * If the queue was seeky for too long, break it apart.

12087

--		 */

12088

-+		/* If the queue was seeky for too long, break it apart. */

12089

- 		if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {

12090

- 			bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");

12091

- 			bfqq = bfq_split_bfqq(bic, bfqq);

12092

-+			split = true;

12093

- 			if (!bfqq)

12094

- 				goto new_queue;

12095

- 		}

12096

--

12097

--		/*

12098

--		 * Check to see if this queue is scheduled to merge with

12099

--		 * another closely cooperating queue. The merging of queues

12100

--		 * happens here as it must be done in process context.

12101

--		 * The reference on new_bfqq was taken in merge_bfqqs.

12102

--		 */

12103

--		if (bfqq->new_bfqq != NULL)

12104

--			bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq);

12105

- 	}

12106

-

12107

- 	bfqq->allocated[rw]++;

12108

-@@ -2757,6 +2996,26 @@ new_queue:

12109

- 	rq->elv.priv[0] = bic;

12110

- 	rq->elv.priv[1] = bfqq;

12111

-

12112

-+	/*

12113

-+	 * If a bfq_queue has only one process reference, it is owned

12114

-+	 * by only one bfq_io_cq: we can set the bic field of the

12115

-+	 * bfq_queue to the address of that structure. Also, if the

12116

-+	 * queue has just been split, mark a flag so that the

12117

-+	 * information is available to the other scheduler hooks.

12118

-+	 */

12119

-+	if (bfqq_process_refs(bfqq) == 1) {

12120

-+		bfqq->bic = bic;

12121

-+		if (split) {

12122

-+			bfq_mark_bfqq_just_split(bfqq);

12123

-+			/*

12124

-+			 * If the queue has just been split from a shared queue,

12125

-+			 * restore the idle window and the possible weight

12126

-+			 * raising period.

12127

-+			 */

12128

-+			bfq_bfqq_resume_state(bfqq, bic);

12129

-+		}

12130

-+	}

12131

-+

12132

- 	spin_unlock_irqrestore(q->queue_lock, flags);

12133

-

12134

- 	return 0;

12135

-diff --git a/block/bfq-sched.c b/block/bfq-sched.c

12136

-index 999b475..e54ea33 100644

12137

---- a/block/bfq-sched.c

12138

-+++ b/block/bfq-sched.c

12139

-@@ -980,34 +980,6 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)

12140

- 	return bfqq;

12141

- }

12142

-

12143

--/*

12144

-- * Forced extraction of the given queue.

12145

-- */

12146

--static void bfq_get_next_queue_forced(struct bfq_data *bfqd,

12147

--				      struct bfq_queue *bfqq)

12148

--{

12149

--	struct bfq_entity *entity;

12150

--	struct bfq_sched_data *sd;

12151

--

12152

--	BUG_ON(bfqd->in_service_queue != NULL);

12153

--

12154

--	entity = &bfqq->entity;

12155

--	/*

12156

--	 * Bubble up extraction/update from the leaf to the root.

12157

--	*/

12158

--	for_each_entity(entity) {

12159

--		sd = entity->sched_data;

12160

--		bfq_update_budget(entity);

12161

--		bfq_update_vtime(bfq_entity_service_tree(entity));

12162

--		bfq_active_extract(bfq_entity_service_tree(entity), entity);

12163

--		sd->active_entity = entity;

12164

--		sd->next_active = NULL;

12165

--		entity->service = 0;

12166

--	}

12167

--

12168

--	return;

12169

--}

12170

--

12171

- static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)

12172

- {

12173

- 	if (bfqd->in_service_bic != NULL) {

12174

-diff --git a/block/bfq.h b/block/bfq.h

12175

-index 3ca8482..c278796 100644

12176

---- a/block/bfq.h

12177

-+++ b/block/bfq.h

12178

-@@ -200,6 +200,8 @@ struct bfq_group;

12179

-  *                      idle to backlogged

12180

-  * @service_from_backlogged: cumulative service received from the @bfq_queue

12181

-  *                           since the last transition from idle to backlogged

12182

-+ * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the

12183

-+ *	 queue is shared

12184

-  *

12185

-  * A bfq_queue is a leaf request queue; it can be associated with an io_context

12186

-  * or more, if it is async or shared between cooperating processes. @cgroup

12187

-@@ -243,6 +245,7 @@ struct bfq_queue {

12188

- 	sector_t last_request_pos;

12189

-

12190

- 	pid_t pid;

12191

-+	struct bfq_io_cq *bic;

12192

-

12193

- 	/* weight-raising fields */

12194

- 	unsigned long raising_cur_max_time;

12195

-@@ -272,12 +275,23 @@ struct bfq_ttime {

12196

-  * @icq: associated io_cq structure

12197

-  * @bfqq: array of two process queues, the sync and the async

12198

-  * @ttime: associated @bfq_ttime struct

12199

-+ * @raising_time_left: snapshot of the time left before weight raising ends

12200

-+ *		       for the sync queue associated to this process; this

12201

-+ *		       snapshot is taken to remember this value while the weight

12202

-+ *		       raising is suspended because the queue is merged with a

12203

-+ *		       shared queue, and is used to set @raising_cur_max_time

12204

-+ *		       when the queue is split from the shared queue and its

12205

-+ *		       weight is raised again

12206

-+ * @saved_idle_window: same purpose as the previous field for the idle window

12207

-  */

12208

- struct bfq_io_cq {

12209

- 	struct io_cq icq; /* must be the first member */

12210

- 	struct bfq_queue *bfqq[2];

12211

- 	struct bfq_ttime ttime;

12212

- 	int ioprio;

12213

-+

12214

-+	unsigned int raising_time_left;

12215

-+	unsigned int saved_idle_window;

12216

- };

12217

-

12218

- /**

12219

-@@ -418,8 +432,9 @@ enum bfqq_state_flags {

12220

- 	BFQ_BFQQ_FLAG_sync,		/* synchronous queue */

12221

- 	BFQ_BFQQ_FLAG_budget_new,	/* no completion with this budget */

12222

- 	BFQ_BFQQ_FLAG_coop,		/* bfqq is shared */

12223

--	BFQ_BFQQ_FLAG_split_coop,	/* shared bfqq will be splitted */

12224

--	BFQ_BFQQ_FLAG_softrt_update,	/* needs softrt-next-start update */

12225

-+	BFQ_BFQQ_FLAG_split_coop,	/* shared bfqq will be split */

12226

-+	BFQ_BFQQ_FLAG_just_split,	/* queue has just been split */

12227

-+	BFQ_BFQQ_FLAG_softrt_update,	/* may need softrt-next-start update */

12228

- };

12229

-

12230

- #define BFQ_BFQQ_FNS(name)						\

12231

-@@ -446,6 +461,7 @@ BFQ_BFQQ_FNS(sync);

12232

- BFQ_BFQQ_FNS(budget_new);

12233

- BFQ_BFQQ_FNS(coop);

12234

- BFQ_BFQQ_FNS(split_coop);

12235

-+BFQ_BFQQ_FNS(just_split);

12236

- BFQ_BFQQ_FNS(softrt_update);

12237

- #undef BFQ_BFQQ_FNS

12238

-

12239

---

12240

-1.9.0

12241

-

Gentoo Archives: gentoo-commits