[gentoo-commits] linux-patches r2423 - genpatches-2.6/trunk/3.10 - gentoo-commits

From:	"Tom Wijsman (tomwij)" <tomwij@g.o>
To:	gentoo-commits@l.g.o
Subject:	[gentoo-commits] linux-patches r2423 - genpatches-2.6/trunk/3.10
Date:	Mon, 01 Jul 2013 07:02:44
Message-Id:	`20130701070235.9282C2171C@flycatcher.gentoo.org`

1

Author: tomwij

2

Date: 2013-07-01 07:02:35 +0000 (Mon, 01 Jul 2013)

3

New Revision: 2423

4

5

Added:

6

   genpatches-2.6/trunk/3.10/1801_block-cgroups-kconfig-build-bits-for-BFQ-v6r2-3.9.patch

7

   genpatches-2.6/trunk/3.10/1802_block-introduce-the-BFQ-v6r2-I-O-sched-for-3.9.patch1

8

   genpatches-2.6/trunk/3.10/1803_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v6r2-for-3.9.0.patch1

9

Modified:

10

   genpatches-2.6/trunk/3.10/0000_README

11

Log:

12

Bring missing BFQ patches from 3.9 to 3.10 branch.

13

14

Modified: genpatches-2.6/trunk/3.10/0000_README

15

===================================================================

16

--- genpatches-2.6/trunk/3.10/0000_README	2013-07-01 01:14:09 UTC (rev 2422)

17

+++ genpatches-2.6/trunk/3.10/0000_README	2013-07-01 07:02:35 UTC (rev 2423)

18

@@ -51,6 +51,18 @@

19

 From:   https://bugs.gentoo.org/show_bug.cgi?id=462066

20

 Desc:   Revert memcg patches that prevent OOM with too many dirty pages.

21

22

+Patch:  1801_block-cgroups-kconfig-build-bits-for-BFQ-v6r2-3.9.patch        

23

+From:   http://algo.ing.unimo.it/people/paolo/disk_sched/

24

+Desc:   BFQ v6r2 patch 1 for 3.9: Build, cgroups and kconfig bits

25

+

26

+Patch:  1802_block-introduce-the-BFQ-v6r2-I-O-sched-for-3.9.patch1

27

+From:   http://algo.ing.unimo.it/people/paolo/disk_sched/

28

+Desc:   BFQ v6r2 patch 2 for 3.9: BFQ Scheduler

29

+

30

+Patch:  1803_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v6r2-for-3.9.0.patch1

31

+From:   http://algo.ing.unimo.it/people/paolo/disk_sched/

32

+Desc:   BFQ v6r2 patch 3 for 3.9: Early Queue Merge (EQM)

33

+

34

 Patch:  2400_kcopy-patch-for-infiniband-driver.patch

35

 From:   Alexey Shvetsov <alexxy@g.o>

36

 Desc:   Zero copy for infiniband psm userspace driver

37

38

Added: genpatches-2.6/trunk/3.10/1801_block-cgroups-kconfig-build-bits-for-BFQ-v6r2-3.9.patch

39

===================================================================

40

--- genpatches-2.6/trunk/3.10/1801_block-cgroups-kconfig-build-bits-for-BFQ-v6r2-3.9.patch	                        (rev 0)

41

+++ genpatches-2.6/trunk/3.10/1801_block-cgroups-kconfig-build-bits-for-BFQ-v6r2-3.9.patch	2013-07-01 07:02:35 UTC (rev 2423)

42

@@ -0,0 +1,97 @@

43

+From 6946e7e4e53df0836fe13a2a8a750c0d70f66f3d Mon Sep 17 00:00:00 2001

44

+From: Matteo Bernardini <matteo.bernardini@×××××.com>

45

+Date: Thu, 9 May 2013 18:58:50 +0200

46

+Subject: [PATCH 1/3] block: cgroups, kconfig, build bits for BFQ-v6r2-3.9

47

+

48

+Update Kconfig.iosched and do the related Makefile changes to include

49

+kernel configuration options for BFQ. Also add the bfqio controller

50

+to the cgroups subsystem.

51

+

52

+Signed-off-by: Paolo Valente <paolo.valente@×××××××.it>

53

+Signed-off-by: Arianna Avanzini <avanzini.arianna@×××××.com>

54

+Signed-off-by: Matteo Bernardini <matteo.bernardini@×××××.com>

55

+---

56

+ block/Kconfig.iosched         | 25 +++++++++++++++++++++++++

57

+ block/Makefile                |  1 +

58

+ include/linux/cgroup_subsys.h |  6 ++++++

59

+ 3 files changed, 32 insertions(+)

60

+

61

+diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched

62

+index 421bef9..695e064 100644

63

+--- a/block/Kconfig.iosched

64

++++ b/block/Kconfig.iosched

65

+@@ -39,6 +39,27 @@ config CFQ_GROUP_IOSCHED

66

+ 	---help---

67

+ 	  Enable group IO scheduling in CFQ.

68

+

69

++config IOSCHED_BFQ

70

++	tristate "BFQ I/O scheduler"

71

++	default n

72

++	---help---

73

++	  The BFQ I/O scheduler tries to distribute bandwidth among

74

++	  all processes according to their weights.

75

++	  It aims at distributing the bandwidth as desired, independently of

76

++	  the disk parameters and with any workload. It also tries to

77

++	  guarantee low latency to interactive and soft real-time

78

++	  applications.  If compiled built-in (saying Y here), BFQ can

79

++	  be configured to support hierarchical scheduling.

80

++

81

++config CGROUP_BFQIO

82

++	bool "BFQ hierarchical scheduling support"

83

++	depends on CGROUPS && IOSCHED_BFQ=y

84

++	default n

85

++	---help---

86

++	  Enable hierarchical scheduling in BFQ, using the cgroups

87

++	  filesystem interface.  The name of the subsystem will be

88

++	  bfqio.

89

++

90

+ choice

91

+ 	prompt "Default I/O scheduler"

92

+ 	default DEFAULT_CFQ

93

+@@ -52,6 +73,9 @@ choice

94

+ 	config DEFAULT_CFQ

95

+ 		bool "CFQ" if IOSCHED_CFQ=y

96

+

97

++	config DEFAULT_BFQ

98

++		bool "BFQ" if IOSCHED_BFQ=y

99

++

100

+ 	config DEFAULT_NOOP

101

+ 		bool "No-op"

102

+

103

+@@ -61,6 +85,7 @@ config DEFAULT_IOSCHED

104

+ 	string

105

+ 	default "deadline" if DEFAULT_DEADLINE

106

+ 	default "cfq" if DEFAULT_CFQ

107

++	default "bfq" if DEFAULT_BFQ

108

+ 	default "noop" if DEFAULT_NOOP

109

+

110

+ endmenu

111

+diff --git a/block/Makefile b/block/Makefile

112

+index 39b76ba..c0d20fa 100644

113

+--- a/block/Makefile

114

++++ b/block/Makefile

115

+@@ -15,6 +15,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o

116

+ obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o

117

+ obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o

118

+ obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o

119

++obj-$(CONFIG_IOSCHED_BFQ)	+= bfq-iosched.o

120

+

121

+ obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o

122

+ obj-$(CONFIG_BLK_DEV_INTEGRITY)	+= blk-integrity.o

123

+diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h

124

+index f204a7a..b999bfa 100644

125

+--- a/include/linux/cgroup_subsys.h

126

++++ b/include/linux/cgroup_subsys.h

127

+@@ -78,3 +78,9 @@ SUBSYS(hugetlb)

128

+ #endif

129

+

130

+ /* */

131

++

132

++#ifdef CONFIG_CGROUP_BFQIO

133

++SUBSYS(bfqio)

134

++#endif

135

++

136

++/* */

137

+--

138

+1.8.1.4

139

+

140

141

Added: genpatches-2.6/trunk/3.10/1802_block-introduce-the-BFQ-v6r2-I-O-sched-for-3.9.patch1

142

===================================================================

143

--- genpatches-2.6/trunk/3.10/1802_block-introduce-the-BFQ-v6r2-I-O-sched-for-3.9.patch1	                        (rev 0)

144

+++ genpatches-2.6/trunk/3.10/1802_block-introduce-the-BFQ-v6r2-I-O-sched-for-3.9.patch1	2013-07-01 07:02:35 UTC (rev 2423)

145

@@ -0,0 +1,5748 @@

146

+From c85fc6e997b49039c6580a7257b9773777656d8a Mon Sep 17 00:00:00 2001

147

+From: Arianna Avanzini <avanzini.arianna@×××××.com>

148

+Date: Thu, 9 May 2013 19:10:02 +0200

149

+Subject: [PATCH 2/3] block: introduce the BFQ-v6r2 I/O sched for 3.9

150

+

151

+Add the BFQ-v6r2 I/O scheduler to 3.9.

152

+The general structure is borrowed from CFQ, as much code. A (bfq_)queue

153

+is associated to each task doing I/O on a device, and each time a

154

+scheduling decision has to be made a queue is selected and served until

155

+it expires.

156

+

157

+    - Slices are given in the service domain: tasks are assigned

158

+      budgets, measured in number of sectors. Once got the disk, a task

159

+      must however consume its assigned budget within a configurable

160

+      maximum time (by default, the maximum possible value of the

161

+      budgets is automatically computed to comply with this timeout).

162

+      This allows the desired latency vs "throughput boosting" tradeoff

163

+      to be set.

164

+

165

+    - Budgets are scheduled according to a variant of WF2Q+, implemented

166

+      using an augmented rb-tree to take eligibility into account while

167

+      preserving an O(log N) overall complexity.

168

+

169

+    - A low-latency tunable is provided; if enabled, both interactive

170

+      and soft real-time applications are guaranteed very low latency.

171

+

172

+    - Latency guarantees are preserved also in presence of NCQ.

173

+

174

+    - Also with flash-based devices, a high throughput is achieved while

175

+      still preserving latency guarantees.

176

+

177

+    - Useful features borrowed from CFQ: cooperating-queues merging (with

178

+      some additional optimizations with respect to the original CFQ version),

179

+      static fallback queue for OOM.

180

+

181

+    - BFQ supports full hierarchical scheduling, exporting a cgroups

182

+      interface.  Each node has a full scheduler, so each group can

183

+      be assigned its own ioprio (mapped to a weight, see next point)

184

+      and an ioprio_class.

185

+

186

+    - If the cgroups interface is used, weights can be explictly

187

+      assigned, otherwise ioprio values are mapped to weights using the

188

+      relation weight = IOPRIO_BE_NR - ioprio.

189

+

190

+    - ioprio classes are served in strict priority order, i.e., lower

191

+      priority queues are not served as long as there are higher

192

+      priority queues.  Among queues in the same class the bandwidth is

193

+      distributed in proportion to the weight of each queue. A very

194

+      thin extra bandwidth is however guaranteed to the Idle class, to

195

+      prevent it from starving.

196

+

197

+Signed-off-by: Paolo Valente <paolo.valente@×××××××.it>

198

+Signed-off-by: Arianna Avanzini <avanzini.arianna@×××××.com>

199

+---

200

+ block/bfq-cgroup.c  |  868 +++++++++++++++

201

+ block/bfq-ioc.c     |   36 +

202

+ block/bfq-iosched.c | 3070 +++++++++++++++++++++++++++++++++++++++++++++++++++

203

+ block/bfq-sched.c   | 1072 ++++++++++++++++++

204

+ block/bfq.h         |  603 ++++++++++

205

+ 5 files changed, 5649 insertions(+)

206

+ create mode 100644 block/bfq-cgroup.c

207

+ create mode 100644 block/bfq-ioc.c

208

+ create mode 100644 block/bfq-iosched.c

209

+ create mode 100644 block/bfq-sched.c

210

+ create mode 100644 block/bfq.h

211

+

212

+diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c

213

+new file mode 100644

214

+index 0000000..2c569cc

215

+--- /dev/null

216

++++ b/block/bfq-cgroup.c

217

+@@ -0,0 +1,868 @@

218

++/*

219

++ * BFQ: CGROUPS support.

220

++ *

221

++ * Based on ideas and code from CFQ:

222

++ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk>

223

++ *

224

++ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it>

225

++ *		      Paolo Valente <paolo.valente@×××××××.it>

226

++ *

227

++ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it>

228

++ *

229

++ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file.

230

++ */

231

++

232

++#ifdef CONFIG_CGROUP_BFQIO

233

++static struct bfqio_cgroup bfqio_root_cgroup = {

234

++	.weight = BFQ_DEFAULT_GRP_WEIGHT,

235

++	.ioprio = BFQ_DEFAULT_GRP_IOPRIO,

236

++	.ioprio_class = BFQ_DEFAULT_GRP_CLASS,

237

++};

238

++

239

++static inline void bfq_init_entity(struct bfq_entity *entity,

240

++				   struct bfq_group *bfqg)

241

++{

242

++	entity->weight = entity->new_weight;

243

++	entity->orig_weight = entity->new_weight;

244

++	entity->ioprio = entity->new_ioprio;

245

++	entity->ioprio_class = entity->new_ioprio_class;

246

++	entity->parent = bfqg->my_entity;

247

++	entity->sched_data = &bfqg->sched_data;

248

++}

249

++

250

++static struct bfqio_cgroup *cgroup_to_bfqio(struct cgroup *cgroup)

251

++{

252

++	return container_of(cgroup_subsys_state(cgroup, bfqio_subsys_id),

253

++			    struct bfqio_cgroup, css);

254

++}

255

++

256

++/*

257

++ * Search the bfq_group for bfqd into the hash table (by now only a list)

258

++ * of bgrp.  Must be called under rcu_read_lock().

259

++ */

260

++static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp,

261

++					    struct bfq_data *bfqd)

262

++{

263

++	struct bfq_group *bfqg;

264

++	void *key;

265

++

266

++	hlist_for_each_entry_rcu(bfqg, &bgrp->group_data, group_node) {

267

++		key = rcu_dereference(bfqg->bfqd);

268

++		if (key == bfqd)

269

++			return bfqg;

270

++	}

271

++

272

++	return NULL;

273

++}

274

++

275

++static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp,

276

++					 struct bfq_group *bfqg)

277

++{

278

++	struct bfq_entity *entity = &bfqg->entity;

279

++

280

++	/*

281

++	 * If the weight of the entity has never been set via the sysfs

282

++	 * interface, then bgrp->weight == 0. In this case we initialize

283

++	 * the weight from the current ioprio value. Otherwise, the group

284

++	 * weight, if set, has priority over the ioprio value.

285

++	 */

286

++	if (bgrp->weight == 0) {

287

++		entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio);

288

++		entity->new_ioprio = bgrp->ioprio;

289

++	} else {

290

++		entity->new_weight = bgrp->weight;

291

++		entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight);

292

++	}

293

++	entity->orig_weight = entity->weight = entity->new_weight;

294

++	entity->ioprio = entity->new_ioprio;

295

++	entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class;

296

++	entity->my_sched_data = &bfqg->sched_data;

297

++}

298

++

299

++static inline void bfq_group_set_parent(struct bfq_group *bfqg,

300

++					struct bfq_group *parent)

301

++{

302

++	struct bfq_entity *entity;

303

++

304

++	BUG_ON(parent == NULL);

305

++	BUG_ON(bfqg == NULL);

306

++

307

++	entity = &bfqg->entity;

308

++	entity->parent = parent->my_entity;

309

++	entity->sched_data = &parent->sched_data;

310

++}

311

++

312

++/**

313

++ * bfq_group_chain_alloc - allocate a chain of groups.

314

++ * @bfqd: queue descriptor.

315

++ * @cgroup: the leaf cgroup this chain starts from.

316

++ *

317

++ * Allocate a chain of groups starting from the one belonging to

318

++ * @cgroup up to the root cgroup.  Stop if a cgroup on the chain

319

++ * to the root has already an allocated group on @bfqd.

320

++ */

321

++static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd,

322

++					       struct cgroup *cgroup)

323

++{

324

++	struct bfqio_cgroup *bgrp;

325

++	struct bfq_group *bfqg, *prev = NULL, *leaf = NULL;

326

++

327

++	for (; cgroup != NULL; cgroup = cgroup->parent) {

328

++		bgrp = cgroup_to_bfqio(cgroup);

329

++

330

++		bfqg = bfqio_lookup_group(bgrp, bfqd);

331

++		if (bfqg != NULL) {

332

++			/*

333

++			 * All the cgroups in the path from there to the

334

++			 * root must have a bfq_group for bfqd, so we don't

335

++			 * need any more allocations.

336

++			 */

337

++			break;

338

++		}

339

++

340

++		bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC);

341

++		if (bfqg == NULL)

342

++			goto cleanup;

343

++

344

++		bfq_group_init_entity(bgrp, bfqg);

345

++		bfqg->my_entity = &bfqg->entity;

346

++

347

++		if (leaf == NULL) {

348

++			leaf = bfqg;

349

++			prev = leaf;

350

++		} else {

351

++			bfq_group_set_parent(prev, bfqg);

352

++			/*

353

++			 * Build a list of allocated nodes using the bfqd

354

++			 * filed, that is still unused and will be initialized

355

++			 * only after the node will be connected.

356

++			 */

357

++			prev->bfqd = bfqg;

358

++			prev = bfqg;

359

++		}

360

++	}

361

++

362

++	return leaf;

363

++

364

++cleanup:

365

++	while (leaf != NULL) {

366

++		prev = leaf;

367

++		leaf = leaf->bfqd;

368

++		kfree(prev);

369

++	}

370

++

371

++	return NULL;

372

++}

373

++

374

++/**

375

++ * bfq_group_chain_link - link an allocatd group chain to a cgroup hierarchy.

376

++ * @bfqd: the queue descriptor.

377

++ * @cgroup: the leaf cgroup to start from.

378

++ * @leaf: the leaf group (to be associated to @cgroup).

379

++ *

380

++ * Try to link a chain of groups to a cgroup hierarchy, connecting the

381

++ * nodes bottom-up, so we can be sure that when we find a cgroup in the

382

++ * hierarchy that already as a group associated to @bfqd all the nodes

383

++ * in the path to the root cgroup have one too.

384

++ *

385

++ * On locking: the queue lock protects the hierarchy (there is a hierarchy

386

++ * per device) while the bfqio_cgroup lock protects the list of groups

387

++ * belonging to the same cgroup.

388

++ */

389

++static void bfq_group_chain_link(struct bfq_data *bfqd, struct cgroup *cgroup,

390

++				 struct bfq_group *leaf)

391

++{

392

++	struct bfqio_cgroup *bgrp;

393

++	struct bfq_group *bfqg, *next, *prev = NULL;

394

++	unsigned long flags;

395

++

396

++	assert_spin_locked(bfqd->queue->queue_lock);

397

++

398

++	for (; cgroup != NULL && leaf != NULL; cgroup = cgroup->parent) {

399

++		bgrp = cgroup_to_bfqio(cgroup);

400

++		next = leaf->bfqd;

401

++

402

++		bfqg = bfqio_lookup_group(bgrp, bfqd);

403

++		BUG_ON(bfqg != NULL);

404

++

405

++		spin_lock_irqsave(&bgrp->lock, flags);

406

++

407

++		rcu_assign_pointer(leaf->bfqd, bfqd);

408

++		hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data);

409

++		hlist_add_head(&leaf->bfqd_node, &bfqd->group_list);

410

++

411

++		spin_unlock_irqrestore(&bgrp->lock, flags);

412

++

413

++		prev = leaf;

414

++		leaf = next;

415

++	}

416

++

417

++	BUG_ON(cgroup == NULL && leaf != NULL);

418

++	if (cgroup != NULL && prev != NULL) {

419

++		bgrp = cgroup_to_bfqio(cgroup);

420

++		bfqg = bfqio_lookup_group(bgrp, bfqd);

421

++		bfq_group_set_parent(prev, bfqg);

422

++	}

423

++}

424

++

425

++/**

426

++ * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup.

427

++ * @bfqd: queue descriptor.

428

++ * @cgroup: cgroup being searched for.

429

++ *

430

++ * Return a group associated to @bfqd in @cgroup, allocating one if

431

++ * necessary.  When a group is returned all the cgroups in the path

432

++ * to the root have a group associated to @bfqd.

433

++ *

434

++ * If the allocation fails, return the root group: this breaks guarantees

435

++ * but is a safe fallbak.  If this loss becames a problem it can be

436

++ * mitigated using the equivalent weight (given by the product of the

437

++ * weights of the groups in the path from @group to the root) in the

438

++ * root scheduler.

439

++ *

440

++ * We allocate all the missing nodes in the path from the leaf cgroup

441

++ * to the root and we connect the nodes only after all the allocations

442

++ * have been successful.

443

++ */

444

++static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,

445

++					      struct cgroup *cgroup)

446

++{

447

++	struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup);

448

++	struct bfq_group *bfqg;

449

++

450

++	bfqg = bfqio_lookup_group(bgrp, bfqd);

451

++	if (bfqg != NULL)

452

++		return bfqg;

453

++

454

++	bfqg = bfq_group_chain_alloc(bfqd, cgroup);

455

++	if (bfqg != NULL)

456

++		bfq_group_chain_link(bfqd, cgroup, bfqg);

457

++	else

458

++		bfqg = bfqd->root_group;

459

++

460

++	return bfqg;

461

++}

462

++

463

++/**

464

++ * bfq_bfqq_move - migrate @bfqq to @bfqg.

465

++ * @bfqd: queue descriptor.

466

++ * @bfqq: the queue to move.

467

++ * @entity: @bfqq's entity.

468

++ * @bfqg: the group to move to.

469

++ *

470

++ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating

471

++ * it on the new one.  Avoid putting the entity on the old group idle tree.

472

++ *

473

++ * Must be called under the queue lock; the cgroup owning @bfqg must

474

++ * not disappear (by now this just means that we are called under

475

++ * rcu_read_lock()).

476

++ */

477

++static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,

478

++			  struct bfq_entity *entity, struct bfq_group *bfqg)

479

++{

480

++	int busy, resume;

481

++

482

++	busy = bfq_bfqq_busy(bfqq);

483

++	resume = !RB_EMPTY_ROOT(&bfqq->sort_list);

484

++

485

++	BUG_ON(resume && !entity->on_st);

486

++	BUG_ON(busy && !resume && entity->on_st && bfqq != bfqd->active_queue);

487

++

488

++	if (busy) {

489

++		BUG_ON(atomic_read(&bfqq->ref) < 2);

490

++

491

++		if (!resume)

492

++			bfq_del_bfqq_busy(bfqd, bfqq, 0);

493

++		else

494

++			bfq_deactivate_bfqq(bfqd, bfqq, 0);

495

++	} else if (entity->on_st)

496

++		bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);

497

++

498

++	/*

499

++	 * Here we use a reference to bfqg.  We don't need a refcounter

500

++	 * as the cgroup reference will not be dropped, so that its

501

++	 * destroy() callback will not be invoked.

502

++	 */

503

++	entity->parent = bfqg->my_entity;

504

++	entity->sched_data = &bfqg->sched_data;

505

++

506

++	if (busy && resume)

507

++		bfq_activate_bfqq(bfqd, bfqq);

508

++

509

++	if (bfqd->active_queue == NULL && !bfqd->rq_in_driver)

510

++		bfq_schedule_dispatch(bfqd);

511

++}

512

++

513

++/**

514

++ * __bfq_bic_change_cgroup - move @bic to @cgroup.

515

++ * @bfqd: the queue descriptor.

516

++ * @bic: the bic to move.

517

++ * @cgroup: the cgroup to move to.

518

++ *

519

++ * Move bic to cgroup, assuming that bfqd->queue is locked; the caller

520

++ * has to make sure that the reference to cgroup is valid across the call.

521

++ *

522

++ * NOTE: an alternative approach might have been to store the current

523

++ * cgroup in bfqq and getting a reference to it, reducing the lookup

524

++ * time here, at the price of slightly more complex code.

525

++ */

526

++static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,

527

++						 struct bfq_io_cq *bic,

528

++						 struct cgroup *cgroup)

529

++{

530

++	struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);

531

++	struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);

532

++	struct bfq_entity *entity;

533

++	struct bfq_group *bfqg;

534

++	struct bfqio_cgroup *bgrp;

535

++

536

++	bgrp = cgroup_to_bfqio(cgroup);

537

++

538

++	bfqg = bfq_find_alloc_group(bfqd, cgroup);

539

++	if (async_bfqq != NULL) {

540

++		entity = &async_bfqq->entity;

541

++

542

++		if (entity->sched_data != &bfqg->sched_data) {

543

++			bic_set_bfqq(bic, NULL, 0);

544

++			bfq_log_bfqq(bfqd, async_bfqq,

545

++				     "bic_change_group: %p %d",

546

++				     async_bfqq, atomic_read(&async_bfqq->ref));

547

++			bfq_put_queue(async_bfqq);

548

++		}

549

++	}

550

++

551

++	if (sync_bfqq != NULL) {

552

++		entity = &sync_bfqq->entity;

553

++		if (entity->sched_data != &bfqg->sched_data)

554

++			bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);

555

++	}

556

++

557

++	return bfqg;

558

++}

559

++

560

++/**

561

++ * bfq_bic_change_cgroup - move @bic to @cgroup.

562

++ * @bic: the bic being migrated.

563

++ * @cgroup: the destination cgroup.

564

++ *

565

++ * When the task owning @bic is moved to @cgroup, @bic is immediately

566

++ * moved into its new parent group.

567

++ */

568

++static void bfq_bic_change_cgroup(struct bfq_io_cq *bic,

569

++				  struct cgroup *cgroup)

570

++{

571

++	struct bfq_data *bfqd;

572

++	unsigned long uninitialized_var(flags);

573

++

574

++	bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), &flags);

575

++	if (bfqd != NULL) {

576

++		__bfq_bic_change_cgroup(bfqd, bic, cgroup);

577

++		bfq_put_bfqd_unlock(bfqd, &flags);

578

++	}

579

++}

580

++

581

++/**

582

++ * bfq_bic_update_cgroup - update the cgroup of @bic.

583

++ * @bic: the @bic to update.

584

++ *

585

++ * Make sure that @bic is enqueued in the cgroup of the current task.

586

++ * We need this in addition to moving bics during the cgroup attach

587

++ * phase because the task owning @bic could be at its first disk

588

++ * access or we may end up in the root cgroup as the result of a

589

++ * memory allocation failure and here we try to move to the right

590

++ * group.

591

++ *

592

++ * Must be called under the queue lock.  It is safe to use the returned

593

++ * value even after the rcu_read_unlock() as the migration/destruction

594

++ * paths act under the queue lock too.  IOW it is impossible to race with

595

++ * group migration/destruction and end up with an invalid group as:

596

++ *   a) here cgroup has not yet been destroyed, nor its destroy callback

597

++ *      has started execution, as current holds a reference to it,

598

++ *   b) if it is destroyed after rcu_read_unlock() [after current is

599

++ *      migrated to a different cgroup] its attach() callback will have

600

++ *      taken care of remove all the references to the old cgroup data.

601

++ */

602

++static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic)

603

++{

604

++	struct bfq_data *bfqd = bic_to_bfqd(bic);

605

++	struct bfq_group *bfqg;

606

++	struct cgroup *cgroup;

607

++

608

++	BUG_ON(bfqd == NULL);

609

++

610

++	rcu_read_lock();

611

++	cgroup = task_cgroup(current, bfqio_subsys_id);

612

++	bfqg = __bfq_bic_change_cgroup(bfqd, bic, cgroup);

613

++	rcu_read_unlock();

614

++

615

++	return bfqg;

616

++}

617

++

618

++/**

619

++ * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.

620

++ * @st: the service tree being flushed.

621

++ */

622

++static inline void bfq_flush_idle_tree(struct bfq_service_tree *st)

623

++{

624

++	struct bfq_entity *entity = st->first_idle;

625

++

626

++	for (; entity != NULL; entity = st->first_idle)

627

++		__bfq_deactivate_entity(entity, 0);

628

++}

629

++

630

++/**

631

++ * bfq_reparent_leaf_entity - move leaf entity to the root_group.

632

++ * @bfqd: the device data structure with the root group.

633

++ * @entity: the entity to move.

634

++ */

635

++static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd,

636

++					    struct bfq_entity *entity)

637

++{

638

++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

639

++

640

++	BUG_ON(bfqq == NULL);

641

++	bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);

642

++	return;

643

++}

644

++

645

++/**

646

++ * bfq_reparent_active_entities - move to the root group all active entities.

647

++ * @bfqd: the device data structure with the root group.

648

++ * @bfqg: the group to move from.

649

++ * @st: the service tree with the entities.

650

++ *

651

++ * Needs queue_lock to be taken and reference to be valid over the call.

652

++ */

653

++static inline void bfq_reparent_active_entities(struct bfq_data *bfqd,

654

++						struct bfq_group *bfqg,

655

++						struct bfq_service_tree *st)

656

++{

657

++	struct rb_root *active = &st->active;

658

++	struct bfq_entity *entity = NULL;

659

++

660

++	if (!RB_EMPTY_ROOT(&st->active))

661

++		entity = bfq_entity_of(rb_first(active));

662

++

663

++	for (; entity != NULL ; entity = bfq_entity_of(rb_first(active)))

664

++		bfq_reparent_leaf_entity(bfqd, entity);

665

++

666

++	if (bfqg->sched_data.active_entity != NULL)

667

++		bfq_reparent_leaf_entity(bfqd, bfqg->sched_data.active_entity);

668

++

669

++	return;

670

++}

671

++

672

++/**

673

++ * bfq_destroy_group - destroy @bfqg.

674

++ * @bgrp: the bfqio_cgroup containing @bfqg.

675

++ * @bfqg: the group being destroyed.

676

++ *

677

++ * Destroy @bfqg, making sure that it is not referenced from its parent.

678

++ */

679

++static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg)

680

++{

681

++	struct bfq_data *bfqd;

682

++	struct bfq_service_tree *st;

683

++	struct bfq_entity *entity = bfqg->my_entity;

684

++	unsigned long uninitialized_var(flags);

685

++	int i;

686

++

687

++	hlist_del(&bfqg->group_node);

688

++

689

++	/*

690

++	 * Empty all service_trees belonging to this group before deactivating

691

++	 * the group itself.

692

++	 */

693

++	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {

694

++		st = bfqg->sched_data.service_tree + i;

695

++

696

++		/*

697

++		 * The idle tree may still contain bfq_queues belonging

698

++		 * to exited task because they never migrated to a different

699

++		 * cgroup from the one being destroyed now.  Noone else

700

++		 * can access them so it's safe to act without any lock.

701

++		 */

702

++		bfq_flush_idle_tree(st);

703

++

704

++		/*

705

++		 * It may happen that some queues are still active

706

++		 * (busy) upon group destruction (if the corresponding

707

++		 * processes have been forced to terminate). We move

708

++		 * all the leaf entities corresponding to these queues

709

++		 * to the root_group.

710

++		 * Also, it may happen that the group has an entity

711

++		 * under service, which is disconnected from the active

712

++		 * tree: it must be moved, too.

713

++		 * There is no need to put the sync queues, as the

714

++		 * scheduler has taken no reference.

715

++		 */

716

++		bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);

717

++		if (bfqd != NULL) {

718

++			bfq_reparent_active_entities(bfqd, bfqg, st);

719

++			bfq_put_bfqd_unlock(bfqd, &flags);

720

++		}

721

++		BUG_ON(!RB_EMPTY_ROOT(&st->active));

722

++		BUG_ON(!RB_EMPTY_ROOT(&st->idle));

723

++	}

724

++	BUG_ON(bfqg->sched_data.next_active != NULL);

725

++	BUG_ON(bfqg->sched_data.active_entity != NULL);

726

++

727

++	/*

728

++	 * We may race with device destruction, take extra care when

729

++	 * dereferencing bfqg->bfqd.

730

++	 */

731

++	bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);

732

++	if (bfqd != NULL) {

733

++		hlist_del(&bfqg->bfqd_node);

734

++		__bfq_deactivate_entity(entity, 0);

735

++		bfq_put_async_queues(bfqd, bfqg);

736

++		bfq_put_bfqd_unlock(bfqd, &flags);

737

++	}

738

++	BUG_ON(entity->tree != NULL);

739

++

740

++	/*

741

++	 * No need to defer the kfree() to the end of the RCU grace

742

++	 * period: we are called from the destroy() callback of our

743

++	 * cgroup, so we can be sure that noone is a) still using

744

++	 * this cgroup or b) doing lookups in it.

745

++	 */

746

++	kfree(bfqg);

747

++}

748

++

749

++static void bfq_end_raising_async(struct bfq_data *bfqd)

750

++{

751

++	struct hlist_node *tmp;

752

++	struct bfq_group *bfqg;

753

++

754

++	hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node)

755

++		bfq_end_raising_async_queues(bfqd, bfqg);

756

++}

757

++

758

++/**

759

++ * bfq_disconnect_groups - diconnect @bfqd from all its groups.

760

++ * @bfqd: the device descriptor being exited.

761

++ *

762

++ * When the device exits we just make sure that no lookup can return

763

++ * the now unused group structures.  They will be deallocated on cgroup

764

++ * destruction.

765

++ */

766

++static void bfq_disconnect_groups(struct bfq_data *bfqd)

767

++{

768

++	struct hlist_node *tmp;

769

++	struct bfq_group *bfqg;

770

++

771

++	bfq_log(bfqd, "disconnect_groups beginning") ;

772

++	hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node) {

773

++		hlist_del(&bfqg->bfqd_node);

774

++

775

++		__bfq_deactivate_entity(bfqg->my_entity, 0);

776

++

777

++		/*

778

++		 * Don't remove from the group hash, just set an

779

++		 * invalid key.  No lookups can race with the

780

++		 * assignment as bfqd is being destroyed; this

781

++		 * implies also that new elements cannot be added

782

++		 * to the list.

783

++		 */

784

++		rcu_assign_pointer(bfqg->bfqd, NULL);

785

++

786

++		bfq_log(bfqd, "disconnect_groups: put async for group %p",

787

++			bfqg) ;

788

++		bfq_put_async_queues(bfqd, bfqg);

789

++	}

790

++}

791

++

792

++static inline void bfq_free_root_group(struct bfq_data *bfqd)

793

++{

794

++	struct bfqio_cgroup *bgrp = &bfqio_root_cgroup;

795

++	struct bfq_group *bfqg = bfqd->root_group;

796

++

797

++	bfq_put_async_queues(bfqd, bfqg);

798

++

799

++	spin_lock_irq(&bgrp->lock);

800

++	hlist_del_rcu(&bfqg->group_node);

801

++	spin_unlock_irq(&bgrp->lock);

802

++

803

++	/*

804

++	 * No need to synchronize_rcu() here: since the device is gone

805

++	 * there cannot be any read-side access to its root_group.

806

++	 */

807

++	kfree(bfqg);

808

++}

809

++

810

++static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)

811

++{

812

++	struct bfq_group *bfqg;

813

++	struct bfqio_cgroup *bgrp;

814

++	int i;

815

++

816

++	bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);

817

++	if (bfqg == NULL)

818

++		return NULL;

819

++

820

++	bfqg->entity.parent = NULL;

821

++	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)

822

++		bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;

823

++

824

++	bgrp = &bfqio_root_cgroup;

825

++	spin_lock_irq(&bgrp->lock);

826

++	rcu_assign_pointer(bfqg->bfqd, bfqd);

827

++	hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data);

828

++	spin_unlock_irq(&bgrp->lock);

829

++

830

++	return bfqg;

831

++}

832

++

833

++#define SHOW_FUNCTION(__VAR)						\

834

++static u64 bfqio_cgroup_##__VAR##_read(struct cgroup *cgroup,		\

835

++				       struct cftype *cftype)		\

836

++{									\

837

++	struct bfqio_cgroup *bgrp;					\

838

++	u64 ret;							\

839

++									\

840

++	if (!cgroup_lock_live_group(cgroup))				\

841

++		return -ENODEV;						\

842

++									\

843

++	bgrp = cgroup_to_bfqio(cgroup);					\

844

++	spin_lock_irq(&bgrp->lock);					\

845

++	ret = bgrp->__VAR;						\

846

++	spin_unlock_irq(&bgrp->lock);					\

847

++									\

848

++	cgroup_unlock();						\

849

++									\

850

++	return ret;							\

851

++}

852

++

853

++SHOW_FUNCTION(weight);

854

++SHOW_FUNCTION(ioprio);

855

++SHOW_FUNCTION(ioprio_class);

856

++#undef SHOW_FUNCTION

857

++

858

++#define STORE_FUNCTION(__VAR, __MIN, __MAX)				\

859

++static int bfqio_cgroup_##__VAR##_write(struct cgroup *cgroup,		\

860

++					struct cftype *cftype,		\

861

++					u64 val)			\

862

++{									\

863

++	struct bfqio_cgroup *bgrp;					\

864

++	struct bfq_group *bfqg;						\

865

++									\

866

++	if (val < (__MIN) || val > (__MAX))				\

867

++		return -EINVAL;						\

868

++									\

869

++	if (!cgroup_lock_live_group(cgroup))				\

870

++		return -ENODEV;						\

871

++									\

872

++	bgrp = cgroup_to_bfqio(cgroup);					\

873

++									\

874

++	spin_lock_irq(&bgrp->lock);					\

875

++	bgrp->__VAR = (unsigned short)val;				\

876

++	hlist_for_each_entry(bfqg, &bgrp->group_data, group_node) {	\

877

++		/*							\

878

++                 * Setting the ioprio_changed flag of the entity        \

879

++                 * to 1 with new_##__VAR == ##__VAR would re-set        \

880

++                 * the value of the weight to its ioprio mapping.       \

881

++                 * Set the flag only if necessary.                      \

882

++                 */                                                     \

883

++                if ((unsigned short)val != bfqg->entity.new_##__VAR) {  \

884

++                        bfqg->entity.new_##__VAR = (unsigned short)val; \

885

++                        smp_wmb();                                      \

886

++                        bfqg->entity.ioprio_changed = 1;                \

887

++                }							\

888

++	}								\

889

++	spin_unlock_irq(&bgrp->lock);					\

890

++									\

891

++	cgroup_unlock();						\

892

++									\

893

++	return 0;							\

894

++}

895

++

896

++STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT);

897

++STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1);

898

++STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE);

899

++#undef STORE_FUNCTION

900

++

901

++static struct cftype bfqio_files[] = {

902

++	{

903

++		.name = "weight",

904

++		.read_u64 = bfqio_cgroup_weight_read,

905

++		.write_u64 = bfqio_cgroup_weight_write,

906

++	},

907

++	{

908

++		.name = "ioprio",

909

++		.read_u64 = bfqio_cgroup_ioprio_read,

910

++		.write_u64 = bfqio_cgroup_ioprio_write,

911

++	},

912

++	{

913

++		.name = "ioprio_class",

914

++		.read_u64 = bfqio_cgroup_ioprio_class_read,

915

++		.write_u64 = bfqio_cgroup_ioprio_class_write,

916

++	},

917

++	{ },	/* terminate */

918

++};

919

++

920

++static struct cgroup_subsys_state *bfqio_create(struct cgroup *cgroup)

921

++{

922

++	struct bfqio_cgroup *bgrp;

923

++

924

++	if (cgroup->parent != NULL) {

925

++		bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL);

926

++		if (bgrp == NULL)

927

++			return ERR_PTR(-ENOMEM);

928

++	} else

929

++		bgrp = &bfqio_root_cgroup;

930

++

931

++	spin_lock_init(&bgrp->lock);

932

++	INIT_HLIST_HEAD(&bgrp->group_data);

933

++	bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO;

934

++	bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS;

935

++

936

++	return &bgrp->css;

937

++}

938

++

939

++/*

940

++ * We cannot support shared io contexts, as we have no means to support

941

++ * two tasks with the same ioc in two different groups without major rework

942

++ * of the main bic/bfqq data structures.  By now we allow a task to change

943

++ * its cgroup only if it's the only owner of its ioc; the drawback of this

944

++ * behavior is that a group containing a task that forked using CLONE_IO

945

++ * will not be destroyed until the tasks sharing the ioc die.

946

++ */

947

++static int bfqio_can_attach(struct cgroup *cgroup, struct cgroup_taskset *tset)

948

++{

949

++	struct task_struct *task;

950

++	struct io_context *ioc;

951

++	int ret = 0;

952

++

953

++	cgroup_taskset_for_each(task, cgroup, tset) {

954

++		/* task_lock() is needed to avoid races with exit_io_context() */

955

++		task_lock(task);

956

++		ioc = task->io_context;

957

++		if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1)

958

++			/*

959

++			 * ioc == NULL means that the task is either too young or

960

++			 * exiting: if it has still no ioc the ioc can't be shared,

961

++			 * if the task is exiting the attach will fail anyway, no

962

++			 * matter what we return here.

963

++			 */

964

++			ret = -EINVAL;

965

++		task_unlock(task);

966

++		if (ret)

967

++			break;

968

++	}

969

++

970

++	return ret;

971

++}

972

++

973

++static void bfqio_attach(struct cgroup *cgroup, struct cgroup_taskset *tset)

974

++{

975

++	struct task_struct *task;

976

++	struct io_context *ioc;

977

++	struct io_cq *icq;

978

++

979

++	/*

980

++	 * IMPORTANT NOTE: The move of more than one process at a time to a

981

++	 * new group has not yet been tested.

982

++	 */

983

++	cgroup_taskset_for_each(task, cgroup, tset) {

984

++		ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);

985

++		if (ioc) {

986

++			/*

987

++			 * Handle cgroup change here.

988

++			 */

989

++			rcu_read_lock();

990

++			hlist_for_each_entry_rcu(icq, &ioc->icq_list, ioc_node)

991

++				if (!strncmp(icq->q->elevator->type->elevator_name,

992

++					     "bfq", ELV_NAME_MAX))

993

++					bfq_bic_change_cgroup(icq_to_bic(icq),

994

++							      cgroup);

995

++			rcu_read_unlock();

996

++			put_io_context(ioc);

997

++		}

998

++	}

999

++}

1000

++

1001

++static void bfqio_destroy(struct cgroup *cgroup)

1002

++{

1003

++	struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup);

1004

++	struct hlist_node *tmp;

1005

++	struct bfq_group *bfqg;

1006

++

1007

++	/*

1008

++	 * Since we are destroying the cgroup, there are no more tasks

1009

++	 * referencing it, and all the RCU grace periods that may have

1010

++	 * referenced it are ended (as the destruction of the parent

1011

++	 * cgroup is RCU-safe); bgrp->group_data will not be accessed by

1012

++	 * anything else and we don't need any synchronization.

1013

++	 */

1014

++	hlist_for_each_entry_safe(bfqg, tmp, &bgrp->group_data, group_node)

1015

++		bfq_destroy_group(bgrp, bfqg);

1016

++

1017

++	BUG_ON(!hlist_empty(&bgrp->group_data));

1018

++

1019

++	kfree(bgrp);

1020

++}

1021

++

1022

++struct cgroup_subsys bfqio_subsys = {

1023

++	.name = "bfqio",

1024

++	.css_alloc = bfqio_create,

1025

++	.can_attach = bfqio_can_attach,

1026

++	.attach = bfqio_attach,

1027

++	.css_free = bfqio_destroy,

1028

++	.subsys_id = bfqio_subsys_id,

1029

++	.base_cftypes = bfqio_files,

1030

++};

1031

++#else

1032

++static inline void bfq_init_entity(struct bfq_entity *entity,

1033

++				   struct bfq_group *bfqg)

1034

++{

1035

++	entity->weight = entity->new_weight;

1036

++	entity->orig_weight = entity->new_weight;

1037

++	entity->ioprio = entity->new_ioprio;

1038

++	entity->ioprio_class = entity->new_ioprio_class;

1039

++	entity->sched_data = &bfqg->sched_data;

1040

++}

1041

++

1042

++static inline struct bfq_group *

1043

++bfq_bic_update_cgroup(struct bfq_io_cq *bic)

1044

++{

1045

++	struct bfq_data *bfqd = bic_to_bfqd(bic);

1046

++	return bfqd->root_group;

1047

++}

1048

++

1049

++static inline void bfq_bfqq_move(struct bfq_data *bfqd,

1050

++				 struct bfq_queue *bfqq,

1051

++				 struct bfq_entity *entity,

1052

++				 struct bfq_group *bfqg)

1053

++{

1054

++}

1055

++

1056

++static void bfq_end_raising_async(struct bfq_data *bfqd)

1057

++{

1058

++	bfq_end_raising_async_queues(bfqd, bfqd->root_group);

1059

++}

1060

++

1061

++static inline void bfq_disconnect_groups(struct bfq_data *bfqd)

1062

++{

1063

++	bfq_put_async_queues(bfqd, bfqd->root_group);

1064

++}

1065

++

1066

++static inline void bfq_free_root_group(struct bfq_data *bfqd)

1067

++{

1068

++	kfree(bfqd->root_group);

1069

++}

1070

++

1071

++static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)

1072

++{

1073

++	struct bfq_group *bfqg;

1074

++	int i;

1075

++

1076

++	bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);

1077

++	if (bfqg == NULL)

1078

++		return NULL;

1079

++

1080

++	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)

1081

++		bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;

1082

++

1083

++	return bfqg;

1084

++}

1085

++#endif

1086

+diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c

1087

+new file mode 100644

1088

+index 0000000..326e3ec

1089

+--- /dev/null

1090

++++ b/block/bfq-ioc.c

1091

+@@ -0,0 +1,36 @@

1092

++/*

1093

++ * BFQ: I/O context handling.

1094

++ *

1095

++ * Based on ideas and code from CFQ:

1096

++ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk>

1097

++ *

1098

++ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it>

1099

++ *		      Paolo Valente <paolo.valente@×××××××.it>

1100

++ *

1101

++ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it>

1102

++ */

1103

++

1104

++/**

1105

++ * icq_to_bic - convert iocontext queue structure to bfq_io_cq.

1106

++ * @icq: the iocontext queue.

1107

++ */

1108

++static inline struct bfq_io_cq *icq_to_bic(struct io_cq *icq)

1109

++{

1110

++	/* bic->icq is the first member, %NULL will convert to %NULL */

1111

++	return container_of(icq, struct bfq_io_cq, icq);

1112

++}

1113

++

1114

++/**

1115

++ * bfq_bic_lookup - search into @ioc a bic associated to @bfqd.

1116

++ * @bfqd: the lookup key.

1117

++ * @ioc: the io_context of the process doing I/O.

1118

++ *

1119

++ * Queue lock must be held.

1120

++ */

1121

++static inline struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,

1122

++					       struct io_context *ioc)

1123

++{

1124

++	if(ioc)

1125

++		return icq_to_bic(ioc_lookup_icq(ioc, bfqd->queue));

1126

++	return NULL;

1127

++}

1128

+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c

1129

+new file mode 100644

1130

+index 0000000..b230927

1131

+--- /dev/null

1132

++++ b/block/bfq-iosched.c

1133

+@@ -0,0 +1,3070 @@

1134

++/*

1135

++ * BFQ, or Budget Fair Queueing, disk scheduler.

1136

++ *

1137

++ * Based on ideas and code from CFQ:

1138

++ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk>

1139

++ *

1140

++ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it>

1141

++ *		      Paolo Valente <paolo.valente@×××××××.it>

1142

++ *

1143

++ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it>

1144

++ *

1145

++ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file.

1146

++ *

1147

++ * BFQ is a proportional share disk scheduling algorithm based on the

1148

++ * slice-by-slice service scheme of CFQ. But BFQ assigns budgets,

1149

++ * measured in number of sectors, to tasks instead of time slices.

1150

++ * The disk is not granted to the active task for a given time slice,

1151

++ * but until it has exahusted its assigned budget.  This change from

1152

++ * the time to the service domain allows BFQ to distribute the disk

1153

++ * bandwidth among tasks as desired, without any distortion due to

1154

++ * ZBR, workload fluctuations or other factors. BFQ uses an ad hoc

1155

++ * internal scheduler, called B-WF2Q+, to schedule tasks according to

1156

++ * their budgets.  Thanks to this accurate scheduler, BFQ can afford

1157

++ * to assign high budgets to disk-bound non-seeky tasks (to boost the

1158

++ * throughput), and yet guarantee low latencies to interactive and

1159

++ * soft real-time applications.

1160

++ *

1161

++ * BFQ has been introduced in [1], where the interested reader can

1162

++ * find an accurate description of the algorithm, the bandwidth

1163

++ * distribution and latency guarantees it provides, plus formal proofs

1164

++ * of all the properties.  With respect to the algorithm presented in

1165

++ * the paper, this implementation adds several little heuristics, and

1166

++ * a hierarchical extension, based on H-WF2Q+.

1167

++ *

1168

++ * B-WF2Q+ is based on WF2Q+, that is described in [2], together with

1169

++ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N)

1170

++ * complexity derives from the one introduced with EEVDF in [3].

1171

++ *

1172

++ * [1] P. Valente and F. Checconi, ``High Throughput Disk Scheduling

1173

++ *     with Deterministic Guarantees on Bandwidth Distribution,'',

1174

++ *     IEEE Transactions on Computer, May 2010.

1175

++ *

1176

++ *     http://algo.ing.unimo.it/people/paolo/disk_sched/bfq-techreport.pdf

1177

++ *

1178

++ * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing

1179

++ *     Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689,

1180

++ *     Oct 1997.

1181

++ *

1182

++ *     http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz

1183

++ *

1184

++ * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline

1185

++ *     First: A Flexible and Accurate Mechanism for Proportional Share

1186

++ *     Resource Allocation,'' technical report.

1187

++ *

1188

++ *     http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf

1189

++ */

1190

++#include <linux/module.h>

1191

++#include <linux/slab.h>

1192

++#include <linux/blkdev.h>

1193

++#include <linux/cgroup.h>

1194

++#include <linux/elevator.h>

1195

++#include <linux/jiffies.h>

1196

++#include <linux/rbtree.h>

1197

++#include <linux/ioprio.h>

1198

++#include "bfq.h"

1199

++#include "blk.h"

1200

++

1201

++/* Max number of dispatches in one round of service. */

1202

++static const int bfq_quantum = 4;

1203

++

1204

++/* Expiration time of sync (0) and async (1) requests, in jiffies. */

1205

++static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 };

1206

++

1207

++/* Maximum backwards seek, in KiB. */

1208

++static const int bfq_back_max = 16 * 1024;

1209

++

1210

++/* Penalty of a backwards seek, in number of sectors. */

1211

++static const int bfq_back_penalty = 2;

1212

++

1213

++/* Idling period duration, in jiffies. */

1214

++static int bfq_slice_idle = HZ / 125;

1215

++

1216

++/* Default maximum budget values, in sectors and number of requests. */

1217

++static const int bfq_default_max_budget = 16 * 1024;

1218

++static const int bfq_max_budget_async_rq = 4;

1219

++

1220

++/*

1221

++ * Async to sync throughput distribution is controlled as follows:

1222

++ * when an async request is served, the entity is charged the number

1223

++ * of sectors of the request, multipled by the factor below

1224

++ */

1225

++static const int bfq_async_charge_factor = 10;

1226

++

1227

++/* Default timeout values, in jiffies, approximating CFQ defaults. */

1228

++static const int bfq_timeout_sync = HZ / 8;

1229

++static int bfq_timeout_async = HZ / 25;

1230

++

1231

++struct kmem_cache *bfq_pool;

1232

++

1233

++/* Below this threshold (in ms), we consider thinktime immediate. */

1234

++#define BFQ_MIN_TT		2

1235

++

1236

++/* hw_tag detection: parallel requests threshold and min samples needed. */

1237

++#define BFQ_HW_QUEUE_THRESHOLD	4

1238

++#define BFQ_HW_QUEUE_SAMPLES	32

1239

++

1240

++#define BFQQ_SEEK_THR	 (sector_t)(8 * 1024)

1241

++#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR)

1242

++

1243

++/* Min samples used for peak rate estimation (for autotuning). */

1244

++#define BFQ_PEAK_RATE_SAMPLES	32

1245

++

1246

++/* Shift used for peak rate fixed precision calculations. */

1247

++#define BFQ_RATE_SHIFT		16

1248

++

1249

++/*

1250

++ * The duration of the weight raising for interactive applications is

1251

++ * computed automatically (as default behaviour), using the following

1252

++ * formula: duration = (R / r) * T, where r is the peak rate of the

1253

++ * disk, and R and T are two reference parameters. In particular, R is

1254

++ * the peak rate of a reference disk, and T is about the maximum time

1255

++ * for starting popular large applications on that disk, under BFQ and

1256

++ * while reading two files in parallel. Finally, BFQ uses two

1257

++ * different pairs (R, T) depending on whether the disk is rotational

1258

++ * or non-rotational.

1259

++ */

1260

++#define T_rot			(msecs_to_jiffies(5500))

1261

++#define T_nonrot		(msecs_to_jiffies(2000))

1262

++/* Next two quantities are in sectors/usec, left-shifted by BFQ_RATE_SHIFT */

1263

++#define R_rot			17415

1264

++#define R_nonrot		34791

1265

++

1266

++#define BFQ_SERVICE_TREE_INIT	((struct bfq_service_tree)		\

1267

++				{ RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 })

1268

++

1269

++#define RQ_BIC(rq)		((struct bfq_io_cq *) (rq)->elv.priv[0])

1270

++#define RQ_BFQQ(rq)		((rq)->elv.priv[1])

1271

++

1272

++static inline void bfq_schedule_dispatch(struct bfq_data *bfqd);

1273

++

1274

++#include "bfq-ioc.c"

1275

++#include "bfq-sched.c"

1276

++#include "bfq-cgroup.c"

1277

++

1278

++#define bfq_class_idle(bfqq)	((bfqq)->entity.ioprio_class ==\

1279

++				 IOPRIO_CLASS_IDLE)

1280

++#define bfq_class_rt(bfqq)	((bfqq)->entity.ioprio_class ==\

1281

++				 IOPRIO_CLASS_RT)

1282

++

1283

++#define bfq_sample_valid(samples)	((samples) > 80)

1284

++

1285

++/*

1286

++ * We regard a request as SYNC, if either it's a read or has the SYNC bit

1287

++ * set (in which case it could also be a direct WRITE).

1288

++ */

1289

++static inline int bfq_bio_sync(struct bio *bio)

1290

++{

1291

++	if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC))

1292

++		return 1;

1293

++

1294

++	return 0;

1295

++}

1296

++

1297

++/*

1298

++ * Scheduler run of queue, if there are requests pending and no one in the

1299

++ * driver that will restart queueing.

1300

++ */

1301

++static inline void bfq_schedule_dispatch(struct bfq_data *bfqd)

1302

++{

1303

++	if (bfqd->queued != 0) {

1304

++		bfq_log(bfqd, "schedule dispatch");

1305

++		kblockd_schedule_work(bfqd->queue, &bfqd->unplug_work);

1306

++	}

1307

++}

1308

++

1309

++/*

1310

++ * Lifted from AS - choose which of rq1 and rq2 that is best served now.

1311

++ * We choose the request that is closesr to the head right now.  Distance

1312

++ * behind the head is penalized and only allowed to a certain extent.

1313

++ */

1314

++static struct request *bfq_choose_req(struct bfq_data *bfqd,

1315

++				      struct request *rq1,

1316

++				      struct request *rq2,

1317

++				      sector_t last)

1318

++{

1319

++	sector_t s1, s2, d1 = 0, d2 = 0;

1320

++	unsigned long back_max;

1321

++#define BFQ_RQ1_WRAP	0x01 /* request 1 wraps */

1322

++#define BFQ_RQ2_WRAP	0x02 /* request 2 wraps */

1323

++	unsigned wrap = 0; /* bit mask: requests behind the disk head? */

1324

++

1325

++	if (rq1 == NULL || rq1 == rq2)

1326

++		return rq2;

1327

++	if (rq2 == NULL)

1328

++		return rq1;

1329

++

1330

++	if (rq_is_sync(rq1) && !rq_is_sync(rq2))

1331

++		return rq1;

1332

++	else if (rq_is_sync(rq2) && !rq_is_sync(rq1))

1333

++		return rq2;

1334

++	if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))

1335

++		return rq1;

1336

++	else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META))

1337

++		return rq2;

1338

++

1339

++	s1 = blk_rq_pos(rq1);

1340

++	s2 = blk_rq_pos(rq2);

1341

++

1342

++	/*

1343

++	 * By definition, 1KiB is 2 sectors.

1344

++	 */

1345

++	back_max = bfqd->bfq_back_max * 2;

1346

++

1347

++	/*

1348

++	 * Strict one way elevator _except_ in the case where we allow

1349

++	 * short backward seeks which are biased as twice the cost of a

1350

++	 * similar forward seek.

1351

++	 */

1352

++	if (s1 >= last)

1353

++		d1 = s1 - last;

1354

++	else if (s1 + back_max >= last)

1355

++		d1 = (last - s1) * bfqd->bfq_back_penalty;

1356

++	else

1357

++		wrap |= BFQ_RQ1_WRAP;

1358

++

1359

++	if (s2 >= last)

1360

++		d2 = s2 - last;

1361

++	else if (s2 + back_max >= last)

1362

++		d2 = (last - s2) * bfqd->bfq_back_penalty;

1363

++	else

1364

++		wrap |= BFQ_RQ2_WRAP;

1365

++

1366

++	/* Found required data */

1367

++

1368

++	/*

1369

++	 * By doing switch() on the bit mask "wrap" we avoid having to

1370

++	 * check two variables for all permutations: --> faster!

1371

++	 */

1372

++	switch (wrap) {

1373

++	case 0: /* common case for CFQ: rq1 and rq2 not wrapped */

1374

++		if (d1 < d2)

1375

++			return rq1;

1376

++		else if (d2 < d1)

1377

++			return rq2;

1378

++		else {

1379

++			if (s1 >= s2)

1380

++				return rq1;

1381

++			else

1382

++				return rq2;

1383

++		}

1384

++

1385

++	case BFQ_RQ2_WRAP:

1386

++		return rq1;

1387

++	case BFQ_RQ1_WRAP:

1388

++		return rq2;

1389

++	case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */

1390

++	default:

1391

++		/*

1392

++		 * Since both rqs are wrapped,

1393

++		 * start with the one that's further behind head

1394

++		 * (--> only *one* back seek required),

1395

++		 * since back seek takes more time than forward.

1396

++		 */

1397

++		if (s1 <= s2)

1398

++			return rq1;

1399

++		else

1400

++			return rq2;

1401

++	}

1402

++}

1403

++

1404

++static struct bfq_queue *

1405

++bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root,

1406

++		     sector_t sector, struct rb_node **ret_parent,

1407

++		     struct rb_node ***rb_link)

1408

++{

1409

++	struct rb_node **p, *parent;

1410

++	struct bfq_queue *bfqq = NULL;

1411

++

1412

++	parent = NULL;

1413

++	p = &root->rb_node;

1414

++	while (*p) {

1415

++		struct rb_node **n;

1416

++

1417

++		parent = *p;

1418

++		bfqq = rb_entry(parent, struct bfq_queue, pos_node);

1419

++

1420

++		/*

1421

++		 * Sort strictly based on sector. Smallest to the left,

1422

++		 * largest to the right.

1423

++		 */

1424

++		if (sector > blk_rq_pos(bfqq->next_rq))

1425

++			n = &(*p)->rb_right;

1426

++		else if (sector < blk_rq_pos(bfqq->next_rq))

1427

++			n = &(*p)->rb_left;

1428

++		else

1429

++			break;

1430

++		p = n;

1431

++		bfqq = NULL;

1432

++	}

1433

++

1434

++	*ret_parent = parent;

1435

++	if (rb_link)

1436

++		*rb_link = p;

1437

++

1438

++	bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d",

1439

++		(long long unsigned)sector,

1440

++		bfqq != NULL ? bfqq->pid : 0);

1441

++

1442

++	return bfqq;

1443

++}

1444

++

1445

++static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq)

1446

++{

1447

++	struct rb_node **p, *parent;

1448

++	struct bfq_queue *__bfqq;

1449

++

1450

++	if (bfqq->pos_root != NULL) {

1451

++		rb_erase(&bfqq->pos_node, bfqq->pos_root);

1452

++		bfqq->pos_root = NULL;

1453

++	}

1454

++

1455

++	if (bfq_class_idle(bfqq))

1456

++		return;

1457

++	if (!bfqq->next_rq)

1458

++		return;

1459

++

1460

++	bfqq->pos_root = &bfqd->rq_pos_tree;

1461

++	__bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,

1462

++			blk_rq_pos(bfqq->next_rq), &parent, &p);

1463

++	if (__bfqq == NULL) {

1464

++		rb_link_node(&bfqq->pos_node, parent, p);

1465

++		rb_insert_color(&bfqq->pos_node, bfqq->pos_root);

1466

++	} else

1467

++		bfqq->pos_root = NULL;

1468

++}

1469

++

1470

++static struct request *bfq_find_next_rq(struct bfq_data *bfqd,

1471

++					struct bfq_queue *bfqq,

1472

++					struct request *last)

1473

++{

1474

++	struct rb_node *rbnext = rb_next(&last->rb_node);

1475

++	struct rb_node *rbprev = rb_prev(&last->rb_node);

1476

++	struct request *next = NULL, *prev = NULL;

1477

++

1478

++	BUG_ON(RB_EMPTY_NODE(&last->rb_node));

1479

++

1480

++	if (rbprev != NULL)

1481

++		prev = rb_entry_rq(rbprev);

1482

++

1483

++	if (rbnext != NULL)

1484

++		next = rb_entry_rq(rbnext);

1485

++	else {

1486

++		rbnext = rb_first(&bfqq->sort_list);

1487

++		if (rbnext && rbnext != &last->rb_node)

1488

++			next = rb_entry_rq(rbnext);

1489

++	}

1490

++

1491

++	return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last));

1492

++}

1493

++

1494

++static void bfq_del_rq_rb(struct request *rq)

1495

++{

1496

++	struct bfq_queue *bfqq = RQ_BFQQ(rq);

1497

++	struct bfq_data *bfqd = bfqq->bfqd;

1498

++	const int sync = rq_is_sync(rq);

1499

++

1500

++	BUG_ON(bfqq->queued[sync] == 0);

1501

++	bfqq->queued[sync]--;

1502

++	bfqd->queued--;

1503

++

1504

++	elv_rb_del(&bfqq->sort_list, rq);

1505

++

1506

++	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {

1507

++		if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->active_queue)

1508

++			bfq_del_bfqq_busy(bfqd, bfqq, 1);

1509

++		/*

1510

++		 * Remove queue from request-position tree as it is empty.

1511

++		 */

1512

++		if (bfqq->pos_root != NULL) {

1513

++			rb_erase(&bfqq->pos_node, bfqq->pos_root);

1514

++			bfqq->pos_root = NULL;

1515

++		}

1516

++	}

1517

++}

1518

++

1519

++/* see the definition of bfq_async_charge_factor for details */

1520

++static inline unsigned long bfq_serv_to_charge(struct request *rq,

1521

++					       struct bfq_queue *bfqq)

1522

++{

1523

++	return blk_rq_sectors(rq) *

1524

++		(1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->raising_coeff == 1) *

1525

++		bfq_async_charge_factor));

1526

++}

1527

++

1528

++/**

1529

++ * bfq_updated_next_req - update the queue after a new next_rq selection.

1530

++ * @bfqd: the device data the queue belongs to.

1531

++ * @bfqq: the queue to update.

1532

++ *

1533

++ * If the first request of a queue changes we make sure that the queue

1534

++ * has enough budget to serve at least its first request (if the

1535

++ * request has grown).  We do this because if the queue has not enough

1536

++ * budget for its first request, it has to go through two dispatch

1537

++ * rounds to actually get it dispatched.

1538

++ */

1539

++static void bfq_updated_next_req(struct bfq_data *bfqd,

1540

++				 struct bfq_queue *bfqq)

1541

++{

1542

++	struct bfq_entity *entity = &bfqq->entity;

1543

++	struct bfq_service_tree *st = bfq_entity_service_tree(entity);

1544

++	struct request *next_rq = bfqq->next_rq;

1545

++	unsigned long new_budget;

1546

++

1547

++	if (next_rq == NULL)

1548

++		return;

1549

++

1550

++	if (bfqq == bfqd->active_queue)

1551

++		/*

1552

++		 * In order not to break guarantees, budgets cannot be

1553

++		 * changed after an entity has been selected.

1554

++		 */

1555

++		return;

1556

++

1557

++	BUG_ON(entity->tree != &st->active);

1558

++	BUG_ON(entity == entity->sched_data->active_entity);

1559

++

1560

++	new_budget = max_t(unsigned long, bfqq->max_budget,

1561

++			   bfq_serv_to_charge(next_rq, bfqq));

1562

++	entity->budget = new_budget;

1563

++	bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", new_budget);

1564

++	bfq_activate_bfqq(bfqd, bfqq);

1565

++}

1566

++

1567

++static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd)

1568

++{

1569

++	u64 dur;

1570

++

1571

++	if (bfqd->bfq_raising_max_time > 0)

1572

++		return bfqd->bfq_raising_max_time;

1573

++

1574

++	dur = bfqd->RT_prod;

1575

++	do_div(dur, bfqd->peak_rate);

1576

++

1577

++	return dur;

1578

++}

1579

++

1580

++static void bfq_add_rq_rb(struct request *rq)

1581

++{

1582

++	struct bfq_queue *bfqq = RQ_BFQQ(rq);

1583

++	struct bfq_entity *entity = &bfqq->entity;

1584

++	struct bfq_data *bfqd = bfqq->bfqd;

1585

++	struct request *next_rq, *prev;

1586

++	unsigned long old_raising_coeff = bfqq->raising_coeff;

1587

++	int idle_for_long_time = bfqq->budget_timeout +

1588

++		bfqd->bfq_raising_min_idle_time < jiffies;

1589

++

1590

++	bfq_log_bfqq(bfqd, bfqq, "add_rq_rb %d", rq_is_sync(rq));

1591

++	bfqq->queued[rq_is_sync(rq)]++;

1592

++	bfqd->queued++;

1593

++

1594

++	elv_rb_add(&bfqq->sort_list, rq);

1595

++

1596

++	/*

1597

++	 * Check if this request is a better next-serve candidate.

1598

++	 */

1599

++	prev = bfqq->next_rq;

1600

++	next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);

1601

++	BUG_ON(next_rq == NULL);

1602

++	bfqq->next_rq = next_rq;

1603

++

1604

++	/*

1605

++	 * Adjust priority tree position, if next_rq changes.

1606

++	 */

1607

++	if (prev != bfqq->next_rq)

1608

++		bfq_rq_pos_tree_add(bfqd, bfqq);

1609

++

1610

++	if (!bfq_bfqq_busy(bfqq)) {

1611

++		int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 &&

1612

++			bfqq->soft_rt_next_start < jiffies;

1613

++		entity->budget = max_t(unsigned long, bfqq->max_budget,

1614

++				       bfq_serv_to_charge(next_rq, bfqq));

1615

++

1616

++		if (! bfqd->low_latency)

1617

++			goto add_bfqq_busy;

1618

++

1619

++		/*

1620

++		 * If the queue is not being boosted and has been idle

1621

++		 * for enough time, start a weight-raising period

1622

++		 */

1623

++		if(old_raising_coeff == 1 && (idle_for_long_time || soft_rt)) {

1624

++			bfqq->raising_coeff = bfqd->bfq_raising_coeff;

1625

++			if (idle_for_long_time)

1626

++				bfqq->raising_cur_max_time =

1627

++					bfq_wrais_duration(bfqd);

1628

++			else

1629

++				bfqq->raising_cur_max_time =

1630

++					bfqd->bfq_raising_rt_max_time;

1631

++			bfq_log_bfqq(bfqd, bfqq,

1632

++				     "wrais starting at %llu msec,"

1633

++				     "rais_max_time %u",

1634

++				     bfqq->last_rais_start_finish,

1635

++				     jiffies_to_msecs(bfqq->

1636

++					raising_cur_max_time));

1637

++		} else if (old_raising_coeff > 1) {

1638

++			if (idle_for_long_time)

1639

++				bfqq->raising_cur_max_time =

1640

++					bfq_wrais_duration(bfqd);

1641

++			else if (bfqq->raising_cur_max_time ==

1642

++				 bfqd->bfq_raising_rt_max_time &&

1643

++				 !soft_rt) {

1644

++				bfqq->raising_coeff = 1;

1645

++				bfq_log_bfqq(bfqd, bfqq,

1646

++					     "wrais ending at %llu msec,"

1647

++					     "rais_max_time %u",

1648

++					     bfqq->last_rais_start_finish,

1649

++					     jiffies_to_msecs(bfqq->

1650

++						raising_cur_max_time));

1651

++				}

1652

++		}

1653

++		if (old_raising_coeff != bfqq->raising_coeff)

1654

++			entity->ioprio_changed = 1;

1655

++add_bfqq_busy:

1656

++		bfq_add_bfqq_busy(bfqd, bfqq);

1657

++        } else {

1658

++                if(bfqd->low_latency && old_raising_coeff == 1 &&

1659

++			!rq_is_sync(rq) &&

1660

++			bfqq->last_rais_start_finish +

1661

++                        bfqd->bfq_raising_min_inter_arr_async < jiffies) {

1662

++                        bfqq->raising_coeff = bfqd->bfq_raising_coeff;

1663

++			bfqq->raising_cur_max_time = bfq_wrais_duration(bfqd);

1664

++

1665

++			entity->ioprio_changed = 1;

1666

++			bfq_log_bfqq(bfqd, bfqq,

1667

++				     "non-idle wrais starting at %llu msec,"

1668

++				     "rais_max_time %u",

1669

++				     bfqq->last_rais_start_finish,

1670

++				     jiffies_to_msecs(bfqq->

1671

++					raising_cur_max_time));

1672

++                }

1673

++                bfq_updated_next_req(bfqd, bfqq);

1674

++	}

1675

++

1676

++	if(bfqd->low_latency &&

1677

++		(old_raising_coeff == 1 || bfqq->raising_coeff == 1 ||

1678

++		 idle_for_long_time))

1679

++		bfqq->last_rais_start_finish = jiffies;

1680

++}

1681

++

1682

++static void bfq_reposition_rq_rb(struct bfq_queue *bfqq, struct request *rq)

1683

++{

1684

++	elv_rb_del(&bfqq->sort_list, rq);

1685

++	bfqq->queued[rq_is_sync(rq)]--;

1686

++	bfqq->bfqd->queued--;

1687

++	bfq_add_rq_rb(rq);

1688

++}

1689

++

1690

++static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd,

1691

++					  struct bio *bio)

1692

++{

1693

++	struct task_struct *tsk = current;

1694

++	struct bfq_io_cq *bic;

1695

++	struct bfq_queue *bfqq;

1696

++

1697

++	bic = bfq_bic_lookup(bfqd, tsk->io_context);

1698

++	if (bic == NULL)

1699

++		return NULL;

1700

++

1701

++	bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));

1702

++	if (bfqq != NULL) {

1703

++		sector_t sector = bio->bi_sector + bio_sectors(bio);

1704

++

1705

++		return elv_rb_find(&bfqq->sort_list, sector);

1706

++	}

1707

++

1708

++	return NULL;

1709

++}

1710

++

1711

++static void bfq_activate_request(struct request_queue *q, struct request *rq)

1712

++{

1713

++	struct bfq_data *bfqd = q->elevator->elevator_data;

1714

++

1715

++	bfqd->rq_in_driver++;

1716

++	bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);

1717

++	bfq_log(bfqd, "activate_request: new bfqd->last_position %llu",

1718

++		(long long unsigned)bfqd->last_position);

1719

++}

1720

++

1721

++static void bfq_deactivate_request(struct request_queue *q, struct request *rq)

1722

++{

1723

++	struct bfq_data *bfqd = q->elevator->elevator_data;

1724

++

1725

++	WARN_ON(bfqd->rq_in_driver == 0);

1726

++	bfqd->rq_in_driver--;

1727

++}

1728

++

1729

++static void bfq_remove_request(struct request *rq)

1730

++{

1731

++	struct bfq_queue *bfqq = RQ_BFQQ(rq);

1732

++	struct bfq_data *bfqd = bfqq->bfqd;

1733

++

1734

++	if (bfqq->next_rq == rq) {

1735

++		bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq);

1736

++		bfq_updated_next_req(bfqd, bfqq);

1737

++	}

1738

++

1739

++	list_del_init(&rq->queuelist);

1740

++	bfq_del_rq_rb(rq);

1741

++

1742

++	if (rq->cmd_flags & REQ_META) {

1743

++		WARN_ON(bfqq->meta_pending == 0);

1744

++		bfqq->meta_pending--;

1745

++	}

1746

++}

1747

++

1748

++static int bfq_merge(struct request_queue *q, struct request **req,

1749

++		     struct bio *bio)

1750

++{

1751

++	struct bfq_data *bfqd = q->elevator->elevator_data;

1752

++	struct request *__rq;

1753

++

1754

++	__rq = bfq_find_rq_fmerge(bfqd, bio);

1755

++	if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) {

1756

++		*req = __rq;

1757

++		return ELEVATOR_FRONT_MERGE;

1758

++	}

1759

++

1760

++	return ELEVATOR_NO_MERGE;

1761

++}

1762

++

1763

++static void bfq_merged_request(struct request_queue *q, struct request *req,

1764

++			       int type)

1765

++{

1766

++	if (type == ELEVATOR_FRONT_MERGE) {

1767

++		struct bfq_queue *bfqq = RQ_BFQQ(req);

1768

++

1769

++		bfq_reposition_rq_rb(bfqq, req);

1770

++	}

1771

++}

1772

++

1773

++static void bfq_merged_requests(struct request_queue *q, struct request *rq,

1774

++				struct request *next)

1775

++{

1776

++	struct bfq_queue *bfqq = RQ_BFQQ(rq);

1777

++

1778

++	/*

1779

++	 * Reposition in fifo if next is older than rq.

1780

++	 */

1781

++	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&

1782

++	    time_before(rq_fifo_time(next), rq_fifo_time(rq))) {

1783

++		list_move(&rq->queuelist, &next->queuelist);

1784

++		rq_set_fifo_time(rq, rq_fifo_time(next));

1785

++	}

1786

++

1787

++	if (bfqq->next_rq == next)

1788

++		bfqq->next_rq = rq;

1789

++

1790

++	bfq_remove_request(next);

1791

++}

1792

++

1793

++/* Must be called with bfqq != NULL */

1794

++static inline void bfq_bfqq_end_raising(struct bfq_queue *bfqq)

1795

++{

1796

++	BUG_ON(bfqq == NULL);

1797

++	bfqq->raising_coeff = 1;

1798

++	bfqq->raising_cur_max_time = 0;

1799

++	/* Trigger a weight change on the next activation of the queue */

1800

++	bfqq->entity.ioprio_changed = 1;

1801

++}

1802

++

1803

++static void bfq_end_raising_async_queues(struct bfq_data *bfqd,

1804

++					struct bfq_group *bfqg)

1805

++{

1806

++	int i, j;

1807

++

1808

++	for (i = 0; i < 2; i++)

1809

++		for (j = 0; j < IOPRIO_BE_NR; j++)

1810

++			if (bfqg->async_bfqq[i][j] != NULL)

1811

++				bfq_bfqq_end_raising(bfqg->async_bfqq[i][j]);

1812

++	if (bfqg->async_idle_bfqq != NULL)

1813

++		bfq_bfqq_end_raising(bfqg->async_idle_bfqq);

1814

++}

1815

++

1816

++static void bfq_end_raising(struct bfq_data *bfqd)

1817

++{

1818

++	struct bfq_queue *bfqq;

1819

++

1820

++	spin_lock_irq(bfqd->queue->queue_lock);

1821

++

1822

++	list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list)

1823

++		bfq_bfqq_end_raising(bfqq);

1824

++	list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list)

1825

++		bfq_bfqq_end_raising(bfqq);

1826

++	bfq_end_raising_async(bfqd);

1827

++

1828

++	spin_unlock_irq(bfqd->queue->queue_lock);

1829

++}

1830

++

1831

++static int bfq_allow_merge(struct request_queue *q, struct request *rq,

1832

++			   struct bio *bio)

1833

++{

1834

++	struct bfq_data *bfqd = q->elevator->elevator_data;

1835

++	struct bfq_io_cq *bic;

1836

++	struct bfq_queue *bfqq;

1837

++

1838

++	/*

1839

++	 * Disallow merge of a sync bio into an async request.

1840

++	 */

1841

++	if (bfq_bio_sync(bio) && !rq_is_sync(rq))

1842

++		return 0;

1843

++

1844

++	/*

1845

++	 * Lookup the bfqq that this bio will be queued with. Allow

1846

++	 * merge only if rq is queued there.

1847

++	 * Queue lock is held here.

1848

++	 */

1849

++	bic = bfq_bic_lookup(bfqd, current->io_context);

1850

++	if (bic == NULL)

1851

++		return 0;

1852

++

1853

++	bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));

1854

++	return bfqq == RQ_BFQQ(rq);

1855

++}

1856

++

1857

++static void __bfq_set_active_queue(struct bfq_data *bfqd,

1858

++				   struct bfq_queue *bfqq)

1859

++{

1860

++	if (bfqq != NULL) {

1861

++		bfq_mark_bfqq_must_alloc(bfqq);

1862

++		bfq_mark_bfqq_budget_new(bfqq);

1863

++		bfq_clear_bfqq_fifo_expire(bfqq);

1864

++

1865

++		bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;

1866

++

1867

++		bfq_log_bfqq(bfqd, bfqq, "set_active_queue, cur-budget = %lu",

1868

++			     bfqq->entity.budget);

1869

++	}

1870

++

1871

++	bfqd->active_queue = bfqq;

1872

++}

1873

++

1874

++/*

1875

++ * Get and set a new active queue for service.

1876

++ */

1877

++static struct bfq_queue *bfq_set_active_queue(struct bfq_data *bfqd,

1878

++					      struct bfq_queue *bfqq)

1879

++{

1880

++	if (!bfqq)

1881

++		bfqq = bfq_get_next_queue(bfqd);

1882

++	else

1883

++		bfq_get_next_queue_forced(bfqd, bfqq);

1884

++

1885

++	__bfq_set_active_queue(bfqd, bfqq);

1886

++	return bfqq;

1887

++}

1888

++

1889

++static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd,

1890

++					  struct request *rq)

1891

++{

1892

++	if (blk_rq_pos(rq) >= bfqd->last_position)

1893

++		return blk_rq_pos(rq) - bfqd->last_position;

1894

++	else

1895

++		return bfqd->last_position - blk_rq_pos(rq);

1896

++}

1897

++

1898

++/*

1899

++ * Return true if bfqq has no request pending and rq is close enough to

1900

++ * bfqd->last_position, or if rq is closer to bfqd->last_position than

1901

++ * bfqq->next_rq

1902

++ */

1903

++static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq)

1904

++{

1905

++	return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR;

1906

++}

1907

++

1908

++static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)

1909

++{

1910

++	struct rb_root *root = &bfqd->rq_pos_tree;

1911

++	struct rb_node *parent, *node;

1912

++	struct bfq_queue *__bfqq;

1913

++	sector_t sector = bfqd->last_position;

1914

++

1915

++	if (RB_EMPTY_ROOT(root))

1916

++		return NULL;

1917

++

1918

++	/*

1919

++	 * First, if we find a request starting at the end of the last

1920

++	 * request, choose it.

1921

++	 */

1922

++	__bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL);

1923

++	if (__bfqq != NULL)

1924

++		return __bfqq;

1925

++

1926

++	/*

1927

++	 * If the exact sector wasn't found, the parent of the NULL leaf

1928

++	 * will contain the closest sector (rq_pos_tree sorted by next_request

1929

++	 * position).

1930

++	 */

1931

++	__bfqq = rb_entry(parent, struct bfq_queue, pos_node);

1932

++	if (bfq_rq_close(bfqd, __bfqq->next_rq))

1933

++		return __bfqq;

1934

++

1935

++	if (blk_rq_pos(__bfqq->next_rq) < sector)

1936

++		node = rb_next(&__bfqq->pos_node);

1937

++	else

1938

++		node = rb_prev(&__bfqq->pos_node);

1939

++	if (node == NULL)

1940

++		return NULL;

1941

++

1942

++	__bfqq = rb_entry(node, struct bfq_queue, pos_node);

1943

++	if (bfq_rq_close(bfqd, __bfqq->next_rq))

1944

++		return __bfqq;

1945

++

1946

++	return NULL;

1947

++}

1948

++

1949

++/*

1950

++ * bfqd - obvious

1951

++ * cur_bfqq - passed in so that we don't decide that the current queue

1952

++ *            is closely cooperating with itself.

1953

++ *

1954

++ * We are assuming that cur_bfqq has dispatched at least one request,

1955

++ * and that bfqd->last_position reflects a position on the disk associated

1956

++ * with the I/O issued by cur_bfqq.

1957

++ */

1958

++static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,

1959

++					      struct bfq_queue *cur_bfqq)

1960

++{

1961

++	struct bfq_queue *bfqq;

1962

++

1963

++	if (bfq_class_idle(cur_bfqq))

1964

++		return NULL;

1965

++	if (!bfq_bfqq_sync(cur_bfqq))

1966

++		return NULL;

1967

++	if (BFQQ_SEEKY(cur_bfqq))

1968

++		return NULL;

1969

++

1970

++	/* If device has only one backlogged bfq_queue, don't search. */

1971

++	if (bfqd->busy_queues == 1)

1972

++		return NULL;

1973

++

1974

++	/*

1975

++	 * We should notice if some of the queues are cooperating, e.g.

1976

++	 * working closely on the same area of the disk. In that case,

1977

++	 * we can group them together and don't waste time idling.

1978

++	 */

1979

++	bfqq = bfqq_close(bfqd);

1980

++	if (bfqq == NULL || bfqq == cur_bfqq)

1981

++		return NULL;

1982

++

1983

++	/*

1984

++	 * Do not merge queues from different bfq_groups.

1985

++	*/

1986

++	if (bfqq->entity.parent != cur_bfqq->entity.parent)

1987

++		return NULL;

1988

++

1989

++	/*

1990

++	 * It only makes sense to merge sync queues.

1991

++	 */

1992

++	if (!bfq_bfqq_sync(bfqq))

1993

++		return NULL;

1994

++	if (BFQQ_SEEKY(bfqq))

1995

++		return NULL;

1996

++

1997

++	/*

1998

++	 * Do not merge queues of different priority classes.

1999

++	 */

2000

++	if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq))

2001

++		return NULL;

2002

++

2003

++	return bfqq;

2004

++}

2005

++

2006

++/*

2007

++ * If enough samples have been computed, return the current max budget

2008

++ * stored in bfqd, which is dynamically updated according to the

2009

++ * estimated disk peak rate; otherwise return the default max budget

2010

++ */

2011

++static inline unsigned long bfq_max_budget(struct bfq_data *bfqd)

2012

++{

2013

++	if (bfqd->budgets_assigned < 194)

2014

++		return bfq_default_max_budget;

2015

++	else

2016

++		return bfqd->bfq_max_budget;

2017

++}

2018

++

2019

++/*

2020

++ * Return min budget, which is a fraction of the current or default

2021

++ * max budget (trying with 1/32)

2022

++ */

2023

++static inline unsigned long bfq_min_budget(struct bfq_data *bfqd)

2024

++{

2025

++	if (bfqd->budgets_assigned < 194)

2026

++		return bfq_default_max_budget / 32;

2027

++	else

2028

++		return bfqd->bfq_max_budget / 32;

2029

++}

2030

++

2031

++/*

2032

++ * Decides whether idling should be done for given device and

2033

++ * given active queue.

2034

++ */

2035

++static inline bool bfq_queue_nonrot_noidle(struct bfq_data *bfqd,

2036

++					   struct bfq_queue *active_bfqq)

2037

++{

2038

++	if (active_bfqq == NULL)

2039

++		return false;

2040

++	/*

2041

++	 * If device is SSD it has no seek penalty, disable idling; but

2042

++	 * do so only if:

2043

++	 * - device does not support queuing, otherwise we still have

2044

++	 *   a problem with sync vs async workloads;

2045

++	 * - the queue is not weight-raised, to preserve guarantees.

2046

++	 */

2047

++	return (blk_queue_nonrot(bfqd->queue) && bfqd->hw_tag &&

2048

++		active_bfqq->raising_coeff == 1);

2049

++}

2050

++

2051

++static void bfq_arm_slice_timer(struct bfq_data *bfqd)

2052

++{

2053

++	struct bfq_queue *bfqq = bfqd->active_queue;

2054

++	struct bfq_io_cq *bic;

2055

++	unsigned long sl;

2056

++

2057

++	WARN_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));

2058

++

2059

++	/* Tasks have exited, don't wait. */

2060

++	bic = bfqd->active_bic;

2061

++	if (bic == NULL || atomic_read(&bic->icq.ioc->active_ref) == 0)

2062

++		return;

2063

++

2064

++	bfq_mark_bfqq_wait_request(bfqq);

2065

++

2066

++	/*

2067

++	 * We don't want to idle for seeks, but we do want to allow

2068

++	 * fair distribution of slice time for a process doing back-to-back

2069

++	 * seeks. So allow a little bit of time for him to submit a new rq.

2070

++	 *

2071

++	 * To prevent processes with (partly) seeky workloads from

2072

++	 * being too ill-treated, grant them a small fraction of the

2073

++	 * assigned budget before reducing the waiting time to

2074

++	 * BFQ_MIN_TT. This happened to help reduce latency.

2075

++	 */

2076

++	sl = bfqd->bfq_slice_idle;

2077

++	if (bfq_sample_valid(bfqq->seek_samples) && BFQQ_SEEKY(bfqq) &&

2078

++	    bfqq->entity.service > bfq_max_budget(bfqd) / 8 &&

2079

++	    bfqq->raising_coeff == 1)

2080

++		sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT));

2081

++	else if (bfqq->raising_coeff > 1)

2082

++		sl = sl * 3;

2083

++	bfqd->last_idling_start = ktime_get();

2084

++	mod_timer(&bfqd->idle_slice_timer, jiffies + sl);

2085

++	bfq_log(bfqd, "arm idle: %u/%u ms",

2086

++		jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle));

2087

++}

2088

++

2089

++/*

2090

++ * Set the maximum time for the active queue to consume its

2091

++ * budget. This prevents seeky processes from lowering the disk

2092

++ * throughput (always guaranteed with a time slice scheme as in CFQ).

2093

++ */

2094

++static void bfq_set_budget_timeout(struct bfq_data *bfqd)

2095

++{

2096

++	struct bfq_queue *bfqq = bfqd->active_queue;

2097

++	unsigned int timeout_coeff;

2098

++	if (bfqq->raising_cur_max_time == bfqd->bfq_raising_rt_max_time)

2099

++		timeout_coeff = 1;

2100

++	else

2101

++		timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;

2102

++

2103

++	bfqd->last_budget_start = ktime_get();

2104

++

2105

++	bfq_clear_bfqq_budget_new(bfqq);

2106

++	bfqq->budget_timeout = jiffies +

2107

++		bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff;

2108

++

2109

++	bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u",

2110

++		jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] *

2111

++		timeout_coeff));

2112

++}

2113

++

2114

++/*

2115

++ * Move request from internal lists to the request queue dispatch list.

2116

++ */

2117

++static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)

2118

++{

2119

++	struct bfq_data *bfqd = q->elevator->elevator_data;

2120

++	struct bfq_queue *bfqq = RQ_BFQQ(rq);

2121

++

2122

++	bfq_remove_request(rq);

2123

++	bfqq->dispatched++;

2124

++	elv_dispatch_sort(q, rq);

2125

++

2126

++	if (bfq_bfqq_sync(bfqq))

2127

++		bfqd->sync_flight++;

2128

++}

2129

++

2130

++/*

2131

++ * Return expired entry, or NULL to just start from scratch in rbtree.

2132

++ */

2133

++static struct request *bfq_check_fifo(struct bfq_queue *bfqq)

2134

++{

2135

++	struct request *rq = NULL;

2136

++

2137

++	if (bfq_bfqq_fifo_expire(bfqq))

2138

++		return NULL;

2139

++

2140

++	bfq_mark_bfqq_fifo_expire(bfqq);

2141

++

2142

++	if (list_empty(&bfqq->fifo))

2143

++		return NULL;

2144

++

2145

++	rq = rq_entry_fifo(bfqq->fifo.next);

2146

++

2147

++	if (time_before(jiffies, rq_fifo_time(rq)))

2148

++		return NULL;

2149

++

2150

++	return rq;

2151

++}

2152

++

2153

++/*

2154

++ * Must be called with the queue_lock held.

2155

++ */

2156

++static int bfqq_process_refs(struct bfq_queue *bfqq)

2157

++{

2158

++	int process_refs, io_refs;

2159

++

2160

++	io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];

2161

++	process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;

2162

++	BUG_ON(process_refs < 0);

2163

++	return process_refs;

2164

++}

2165

++

2166

++static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)

2167

++{

2168

++	int process_refs, new_process_refs;

2169

++	struct bfq_queue *__bfqq;

2170

++

2171

++	/*

2172

++	 * If there are no process references on the new_bfqq, then it is

2173

++	 * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain

2174

++	 * may have dropped their last reference (not just their last process

2175

++	 * reference).

2176

++	 */

2177

++	if (!bfqq_process_refs(new_bfqq))

2178

++		return;

2179

++

2180

++	/* Avoid a circular list and skip interim queue merges. */

2181

++	while ((__bfqq = new_bfqq->new_bfqq)) {

2182

++		if (__bfqq == bfqq)

2183

++			return;

2184

++		new_bfqq = __bfqq;

2185

++	}

2186

++

2187

++	process_refs = bfqq_process_refs(bfqq);

2188

++	new_process_refs = bfqq_process_refs(new_bfqq);

2189

++	/*

2190

++	 * If the process for the bfqq has gone away, there is no

2191

++	 * sense in merging the queues.

2192

++	 */

2193

++	if (process_refs == 0 || new_process_refs == 0)

2194

++		return;

2195

++

2196

++	/*

2197

++	 * Merge in the direction of the lesser amount of work.

2198

++	 */

2199

++	if (new_process_refs >= process_refs) {

2200

++		bfqq->new_bfqq = new_bfqq;

2201

++		atomic_add(process_refs, &new_bfqq->ref);

2202

++	} else {

2203

++		new_bfqq->new_bfqq = bfqq;

2204

++		atomic_add(new_process_refs, &bfqq->ref);

2205

++	}

2206

++	bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",

2207

++		new_bfqq->pid);

2208

++}

2209

++

2210

++static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq)

2211

++{

2212

++	struct bfq_entity *entity = &bfqq->entity;

2213

++	return entity->budget - entity->service;

2214

++}

2215

++

2216

++static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)

2217

++{

2218

++	BUG_ON(bfqq != bfqd->active_queue);

2219

++

2220

++	__bfq_bfqd_reset_active(bfqd);

2221

++

2222

++	/*

2223

++	 * If this bfqq is shared between multiple processes, check

2224

++	 * to make sure that those processes are still issuing I/Os

2225

++	 * within the mean seek distance. If not, it may be time to

2226

++	 * break the queues apart again.

2227

++	 */

2228

++	if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq))

2229

++		bfq_mark_bfqq_split_coop(bfqq);

2230

++

2231

++	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {

2232

++		/*

2233

++		 * overloading budget_timeout field to store when

2234

++		 * the queue remains with no backlog, used by

2235

++		 * the weight-raising mechanism

2236

++		 */

2237

++		bfqq->budget_timeout = jiffies ;

2238

++		bfq_del_bfqq_busy(bfqd, bfqq, 1);

2239

++	} else {

2240

++		bfq_activate_bfqq(bfqd, bfqq);

2241

++		/*

2242

++		 * Resort priority tree of potential close cooperators.

2243

++		 */

2244

++		bfq_rq_pos_tree_add(bfqd, bfqq);

2245

++	}

2246

++}

2247

++

2248

++/**

2249

++ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior.

2250

++ * @bfqd: device data.

2251

++ * @bfqq: queue to update.

2252

++ * @reason: reason for expiration.

2253

++ *

2254

++ * Handle the feedback on @bfqq budget.  See the body for detailed

2255

++ * comments.

2256

++ */

2257

++static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,

2258

++				     struct bfq_queue *bfqq,

2259

++				     enum bfqq_expiration reason)

2260

++{

2261

++	struct request *next_rq;

2262

++	unsigned long budget, min_budget;

2263

++

2264

++	budget = bfqq->max_budget;

2265

++	min_budget = bfq_min_budget(bfqd);

2266

++

2267

++	BUG_ON(bfqq != bfqd->active_queue);

2268

++

2269

++	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu",

2270

++		bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));

2271

++	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu",

2272

++		budget, bfq_min_budget(bfqd));

2273

++	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d",

2274

++		bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->active_queue));

2275

++

2276

++	if (bfq_bfqq_sync(bfqq)) {

2277

++		switch (reason) {

2278

++		/*

2279

++		 * Caveat: in all the following cases we trade latency

2280

++		 * for throughput.

2281

++		 */

2282

++		case BFQ_BFQQ_TOO_IDLE:

2283

++			/*

2284

++			 * This is the only case where we may reduce

2285

++			 * the budget: if there is no requets of the

2286

++			 * process still waiting for completion, then

2287

++			 * we assume (tentatively) that the timer has

2288

++			 * expired because the batch of requests of

2289

++			 * the process could have been served with a

2290

++			 * smaller budget.  Hence, betting that

2291

++			 * process will behave in the same way when it

2292

++			 * becomes backlogged again, we reduce its

2293

++			 * next budget.  As long as we guess right,

2294

++			 * this budget cut reduces the latency

2295

++			 * experienced by the process.

2296

++			 *

2297

++			 * However, if there are still outstanding

2298

++			 * requests, then the process may have not yet

2299

++			 * issued its next request just because it is

2300

++			 * still waiting for the completion of some of

2301

++			 * the still oustanding ones.  So in this

2302

++			 * subcase we do not reduce its budget, on the

2303

++			 * contrary we increase it to possibly boost

2304

++			 * the throughput, as discussed in the

2305

++			 * comments to the BUDGET_TIMEOUT case.

2306

++			 */

2307

++			if (bfqq->dispatched > 0) /* still oustanding reqs */

2308

++				budget = min(budget * 2, bfqd->bfq_max_budget);

2309

++			else {

2310

++				if (budget > 5 * min_budget)

2311

++					budget -= 4 * min_budget;

2312

++				else

2313

++					budget = min_budget;

2314

++			}

2315

++			break;

2316

++		case BFQ_BFQQ_BUDGET_TIMEOUT:

2317

++			/*

2318

++			 * We double the budget here because: 1) it

2319

++			 * gives the chance to boost the throughput if

2320

++			 * this is not a seeky process (which may have

2321

++			 * bumped into this timeout because of, e.g.,

2322

++			 * ZBR), 2) together with charge_full_budget

2323

++			 * it helps give seeky processes higher

2324

++			 * timestamps, and hence be served less

2325

++			 * frequently.

2326

++			 */

2327

++			budget = min(budget * 2, bfqd->bfq_max_budget);

2328

++			break;

2329

++		case BFQ_BFQQ_BUDGET_EXHAUSTED:

2330

++			/*

2331

++			 * The process still has backlog, and did not

2332

++			 * let either the budget timeout or the disk

2333

++			 * idling timeout expire. Hence it is not

2334

++			 * seeky, has a short thinktime and may be

2335

++			 * happy with a higher budget too. So

2336

++			 * definitely increase the budget of this good

2337

++			 * candidate to boost the disk throughput.

2338

++			 */

2339

++			budget = min(budget * 4, bfqd->bfq_max_budget);

2340

++			break;

2341

++		case BFQ_BFQQ_NO_MORE_REQUESTS:

2342

++		       /*

2343

++			* Leave the budget unchanged.

2344

++			*/

2345

++		default:

2346

++			return;

2347

++		}

2348

++	} else /* async queue */

2349

++	    /* async queues get always the maximum possible budget

2350

++	     * (their ability to dispatch is limited by

2351

++	     * @bfqd->bfq_max_budget_async_rq).

2352

++	     */

2353

++		budget = bfqd->bfq_max_budget;

2354

++

2355

++	bfqq->max_budget = budget;

2356

++

2357

++	if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 &&

2358

++	    bfqq->max_budget > bfqd->bfq_max_budget)

2359

++		bfqq->max_budget = bfqd->bfq_max_budget;

2360

++

2361

++	/*

2362

++	 * Make sure that we have enough budget for the next request.

2363

++	 * Since the finish time of the bfqq must be kept in sync with

2364

++	 * the budget, be sure to call __bfq_bfqq_expire() after the

2365

++	 * update.

2366

++	 */

2367

++	next_rq = bfqq->next_rq;

2368

++	if (next_rq != NULL)

2369

++		bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget,

2370

++					    bfq_serv_to_charge(next_rq, bfqq));

2371

++	else

2372

++		bfqq->entity.budget = bfqq->max_budget;

2373

++

2374

++	bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu",

2375

++			next_rq != NULL ? blk_rq_sectors(next_rq) : 0,

2376

++			bfqq->entity.budget);

2377

++}

2378

++

2379

++static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout)

2380

++{

2381

++	unsigned long max_budget;

2382

++

2383

++	/*

2384

++	 * The max_budget calculated when autotuning is equal to the

2385

++	 * amount of sectors transfered in timeout_sync at the

2386

++	 * estimated peak rate.

2387

++	 */

2388

++	max_budget = (unsigned long)(peak_rate * 1000 *

2389

++				     timeout >> BFQ_RATE_SHIFT);

2390

++

2391

++	return max_budget;

2392

++}

2393

++

2394

++/*

2395

++ * In addition to updating the peak rate, checks whether the process

2396

++ * is "slow", and returns 1 if so. This slow flag is used, in addition

2397

++ * to the budget timeout, to reduce the amount of service provided to

2398

++ * seeky processes, and hence reduce their chances to lower the

2399

++ * throughput. See the code for more details.

2400

++ */

2401

++static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,

2402

++				int compensate, enum bfqq_expiration reason)

2403

++{

2404

++	u64 bw, usecs, expected, timeout;

2405

++	ktime_t delta;

2406

++	int update = 0;

2407

++

2408

++	if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq))

2409

++		return 0;

2410

++

2411

++	if (compensate)

2412

++		delta = bfqd->last_idling_start;

2413

++	else

2414

++		delta = ktime_get();

2415

++	delta = ktime_sub(delta, bfqd->last_budget_start);

2416

++	usecs = ktime_to_us(delta);

2417

++

2418

++	/* Don't trust short/unrealistic values. */

2419

++	if (usecs < 100 || usecs >= LONG_MAX)

2420

++		return 0;

2421

++

2422

++	/*

2423

++	 * Calculate the bandwidth for the last slice.  We use a 64 bit

2424

++	 * value to store the peak rate, in sectors per usec in fixed

2425

++	 * point math.  We do so to have enough precision in the estimate

2426

++	 * and to avoid overflows.

2427

++	 */

2428

++	bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT;

2429

++	do_div(bw, (unsigned long)usecs);

2430

++

2431

++	timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);

2432

++

2433

++	/*

2434

++	 * Use only long (> 20ms) intervals to filter out spikes for

2435

++	 * the peak rate estimation.

2436

++	 */

2437

++	if (usecs > 20000) {

2438

++		if (bw > bfqd->peak_rate ||

2439

++		   (!BFQQ_SEEKY(bfqq) &&

2440

++		    reason == BFQ_BFQQ_BUDGET_TIMEOUT)) {

2441

++			bfq_log(bfqd, "measured bw =%llu", bw);

2442

++			/*

2443

++			 * To smooth oscillations use a low-pass filter with

2444

++			 * alpha=7/8, i.e.,

2445

++			 * new_rate = (7/8) * old_rate + (1/8) * bw

2446

++			 */

2447

++			do_div(bw, 8);

2448

++			if (bw == 0)

2449

++				return 0;

2450

++			bfqd->peak_rate *= 7;

2451

++			do_div(bfqd->peak_rate, 8);

2452

++			bfqd->peak_rate += bw;

2453

++			update = 1;

2454

++			bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate);

2455

++		}

2456

++

2457

++		update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1;

2458

++

2459

++		if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES)

2460

++			bfqd->peak_rate_samples++;

2461

++

2462

++		if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES &&

2463

++		    update && bfqd->bfq_user_max_budget == 0) {

2464

++			bfqd->bfq_max_budget =

2465

++				bfq_calc_max_budget(bfqd->peak_rate, timeout);

2466

++			bfq_log(bfqd, "new max_budget=%lu",

2467

++				bfqd->bfq_max_budget);

2468

++		}

2469

++	}

2470

++

2471

++	/*

2472

++	 * If the process has been served for a too short time

2473

++	 * interval to let its possible sequential accesses prevail on

2474

++	 * the initial seek time needed to move the disk head on the

2475

++	 * first sector it requested, then give the process a chance

2476

++	 * and for the moment return false.

2477

++	 */

2478

++	if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8)

2479

++		return 0;

2480

++

2481

++	/*

2482

++	 * A process is considered ``slow'' (i.e., seeky, so that we

2483

++	 * cannot treat it fairly in the service domain, as it would

2484

++	 * slow down too much the other processes) if, when a slice

2485

++	 * ends for whatever reason, it has received service at a

2486

++	 * rate that would not be high enough to complete the budget

2487

++	 * before the budget timeout expiration.

2488

++	 */

2489

++	expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT;

2490

++

2491

++	/*

2492

++	 * Caveat: processes doing IO in the slower disk zones will

2493

++	 * tend to be slow(er) even if not seeky. And the estimated

2494

++	 * peak rate will actually be an average over the disk

2495

++	 * surface. Hence, to not be too harsh with unlucky processes,

2496

++	 * we keep a budget/3 margin of safety before declaring a

2497

++	 * process slow.

2498

++	 */

2499

++	return expected > (4 * bfqq->entity.budget) / 3;

2500

++}

2501

++

2502

++/**

2503

++ * bfq_bfqq_expire - expire a queue.

2504

++ * @bfqd: device owning the queue.

2505

++ * @bfqq: the queue to expire.

2506

++ * @compensate: if true, compensate for the time spent idling.

2507

++ * @reason: the reason causing the expiration.

2508

++ *

2509

++ *

2510

++ * If the process associated to the queue is slow (i.e., seeky), or in

2511

++ * case of budget timeout, or, finally, if it is async, we

2512

++ * artificially charge it an entire budget (independently of the

2513

++ * actual service it received). As a consequence, the queue will get

2514

++ * higher timestamps than the correct ones upon reactivation, and

2515

++ * hence it will be rescheduled as if it had received more service

2516

++ * than what it actually received. In the end, this class of processes

2517

++ * will receive less service in proportion to how slowly they consume

2518

++ * their budgets (and hence how seriously they tend to lower the

2519

++ * throughput).

2520

++ *

2521

++ * In contrast, when a queue expires because it has been idling for

2522

++ * too much or because it exhausted its budget, we do not touch the

2523

++ * amount of service it has received. Hence when the queue will be

2524

++ * reactivated and its timestamps updated, the latter will be in sync

2525

++ * with the actual service received by the queue until expiration.

2526

++ *

2527

++ * Charging a full budget to the first type of queues and the exact

2528

++ * service to the others has the effect of using the WF2Q+ policy to

2529

++ * schedule the former on a timeslice basis, without violating the

2530

++ * service domain guarantees of the latter.

2531

++ */

2532

++static void bfq_bfqq_expire(struct bfq_data *bfqd,

2533

++			    struct bfq_queue *bfqq,

2534

++			    int compensate,

2535

++			    enum bfqq_expiration reason)

2536

++{

2537

++	int slow;

2538

++	BUG_ON(bfqq != bfqd->active_queue);

2539

++

2540

++	/* Update disk peak rate for autotuning and check whether the

2541

++	 * process is slow (see bfq_update_peak_rate).

2542

++	 */

2543

++	slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason);

2544

++

2545

++	/*

2546

++	 * As above explained, 'punish' slow (i.e., seeky), timed-out

2547

++	 * and async queues, to favor sequential sync workloads.

2548

++	 *

2549

++	 * Processes doing IO in the slower disk zones will tend to be

2550

++	 * slow(er) even if not seeky. Hence, since the estimated peak

2551

++	 * rate is actually an average over the disk surface, these

2552

++	 * processes may timeout just for bad luck. To avoid punishing

2553

++	 * them we do not charge a full budget to a process that

2554

++	 * succeeded in consuming at least 2/3 of its budget.

2555

++	 */

2556

++	if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT &&

2557

++		     bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3))

2558

++		bfq_bfqq_charge_full_budget(bfqq);

2559

++

2560

++	if (bfqd->low_latency && bfqq->raising_coeff == 1)

2561

++		bfqq->last_rais_start_finish = jiffies;

2562

++

2563

++	if (bfqd->low_latency && bfqd->bfq_raising_max_softrt_rate > 0) {

2564

++	    if(reason != BFQ_BFQQ_BUDGET_TIMEOUT)

2565

++		bfqq->soft_rt_next_start =

2566

++			jiffies +

2567

++			HZ * bfqq->entity.service /

2568

++			bfqd->bfq_raising_max_softrt_rate;

2569

++		else

2570

++			bfqq->soft_rt_next_start = -1; /* infinity */

2571

++	}

2572

++	bfq_log_bfqq(bfqd, bfqq,

2573

++		"expire (%d, slow %d, num_disp %d, idle_win %d)", reason, slow,

2574

++		bfqq->dispatched, bfq_bfqq_idle_window(bfqq));

2575

++

2576

++	/* Increase, decrease or leave budget unchanged according to reason */

2577

++	__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);

2578

++	__bfq_bfqq_expire(bfqd, bfqq);

2579

++}

2580

++

2581

++/*

2582

++ * Budget timeout is not implemented through a dedicated timer, but

2583

++ * just checked on request arrivals and completions, as well as on

2584

++ * idle timer expirations.

2585

++ */

2586

++static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)

2587

++{

2588

++	if (bfq_bfqq_budget_new(bfqq))

2589

++		return 0;

2590

++

2591

++	if (time_before(jiffies, bfqq->budget_timeout))

2592

++		return 0;

2593

++

2594

++	return 1;

2595

++}

2596

++

2597

++/*

2598

++ * If we expire a queue that is waiting for the arrival of a new

2599

++ * request, we may prevent the fictitious timestamp backshifting that

2600

++ * allows the guarantees of the queue to be preserved (see [1] for

2601

++ * this tricky aspect). Hence we return true only if this condition

2602

++ * does not hold, or if the queue is slow enough to deserve only to be

2603

++ * kicked off for preserving a high throughput.

2604

++*/

2605

++static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)

2606

++{

2607

++	bfq_log_bfqq(bfqq->bfqd, bfqq,

2608

++		"may_budget_timeout: wr %d left %d timeout %d",

2609

++		bfq_bfqq_wait_request(bfqq),

2610

++			bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3,

2611

++		bfq_bfqq_budget_timeout(bfqq));

2612

++

2613

++	return (!bfq_bfqq_wait_request(bfqq) ||

2614

++		bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3)

2615

++		&&

2616

++		bfq_bfqq_budget_timeout(bfqq);

2617

++}

2618

++

2619

++/*

2620

++ * If the active queue is empty, but it is sync and either of the following

2621

++ * conditions holds, then: 1) the queue must remain active and cannot be

2622

++ * expired, and 2) the disk must be idled to wait for the possible arrival

2623

++ * of a new request for the queue. The conditions are:

2624

++ * - the device is rotational and not performing NCQ, and the queue has its

2625

++ *   idle window set (in this case, waiting for a new request for the queue

2626

++ *   is likely to boost the disk throughput);

2627

++ * - the queue is weight-raised (waiting for the request is necessary for

2628

++ *   providing the queue with fairness and latency guarantees).

2629

++ */

2630

++static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq,

2631

++				      int budg_timeout)

2632

++{

2633

++	struct bfq_data *bfqd = bfqq->bfqd;

2634

++

2635

++	return (bfq_bfqq_sync(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list) &&

2636

++		bfqd->bfq_slice_idle != 0 &&

2637

++		((bfq_bfqq_idle_window(bfqq) && !bfqd->hw_tag &&

2638

++		  !blk_queue_nonrot(bfqd->queue))

2639

++		 || bfqq->raising_coeff > 1) &&

2640

++		(bfqd->rq_in_driver == 0 ||

2641

++				budg_timeout ||

2642

++                                bfqq->raising_coeff > 1) &&

2643

++                !bfq_close_cooperator(bfqd, bfqq) &&

2644

++                (!bfq_bfqq_coop(bfqq) ||

2645

++			!bfq_bfqq_some_coop_idle(bfqq)) &&

2646

++		!bfq_queue_nonrot_noidle(bfqd, bfqq));

2647

++}

2648

++

2649

++/*

2650

++ * Select a queue for service.  If we have a current active queue,

2651

++ * check whether to continue servicing it, or retrieve and set a new one.

2652

++ */

2653

++static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)

2654

++{

2655

++	struct bfq_queue *bfqq, *new_bfqq = NULL;

2656

++	struct request *next_rq;

2657

++	enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;

2658

++	int budg_timeout;

2659

++

2660

++	bfqq = bfqd->active_queue;

2661

++	if (bfqq == NULL)

2662

++		goto new_queue;

2663

++

2664

++	bfq_log_bfqq(bfqd, bfqq, "select_queue: already active queue");

2665

++

2666

++	/*

2667

++         * If another queue has a request waiting within our mean seek

2668

++         * distance, let it run. The expire code will check for close

2669

++         * cooperators and put the close queue at the front of the

2670

++         * service tree. If possible, merge the expiring queue with the

2671

++         * new bfqq.

2672

++         */

2673

++        new_bfqq = bfq_close_cooperator(bfqd, bfqq);

2674

++        if (new_bfqq != NULL && bfqq->new_bfqq == NULL)

2675

++                bfq_setup_merge(bfqq, new_bfqq);

2676

++

2677

++	budg_timeout = bfq_may_expire_for_budg_timeout(bfqq);

2678

++	if (budg_timeout &&

2679

++	    !bfq_bfqq_must_idle(bfqq, budg_timeout))

2680

++		goto expire;

2681

++

2682

++	next_rq = bfqq->next_rq;

2683

++	/*

2684

++	 * If bfqq has requests queued and it has enough budget left to

2685

++	 * serve them, keep the queue, otherwise expire it.

2686

++	 */

2687

++	if (next_rq != NULL) {

2688

++		if (bfq_serv_to_charge(next_rq, bfqq) >

2689

++			bfq_bfqq_budget_left(bfqq)) {

2690

++			reason = BFQ_BFQQ_BUDGET_EXHAUSTED;

2691

++			goto expire;

2692

++		} else {

2693

++			/*

2694

++			 * The idle timer may be pending because we may not

2695

++			 * disable disk idling even when a new request arrives

2696

++			 */

2697

++			if (timer_pending(&bfqd->idle_slice_timer)) {

2698

++				/*

2699

++				 * If we get here: 1) at least a new request

2700

++				 * has arrived but we have not disabled the

2701

++				 * timer because the request was too small,

2702

++				 * 2) then the block layer has unplugged the

2703

++				 * device, causing the dispatch to be invoked.

2704

++				 *

2705

++				 * Since the device is unplugged, now the

2706

++				 * requests are probably large enough to

2707

++				 * provide a reasonable throughput.

2708

++				 * So we disable idling.

2709

++				 */

2710

++				bfq_clear_bfqq_wait_request(bfqq);

2711

++				del_timer(&bfqd->idle_slice_timer);

2712

++			}

2713

++			if (new_bfqq == NULL)

2714

++				goto keep_queue;

2715

++			else

2716

++				goto expire;

2717

++		}

2718

++	}

2719

++

2720

++	/*

2721

++	 * No requests pending.  If there is no cooperator, and the active

2722

++	 * queue still has requests in flight or is idling for a new request,

2723

++	 * then keep it.

2724

++	 */

2725

++	if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) ||

2726

++	    (bfqq->dispatched != 0 &&

2727

++	     (bfq_bfqq_idle_window(bfqq) || bfqq->raising_coeff > 1) &&

2728

++	     !bfq_queue_nonrot_noidle(bfqd, bfqq)))) {

2729

++		bfqq = NULL;

2730

++		goto keep_queue;

2731

++	} else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) {

2732

++		/*

2733

++		 * Expiring the queue because there is a close cooperator,

2734

++		 * cancel timer.

2735

++		 */

2736

++		bfq_clear_bfqq_wait_request(bfqq);

2737

++		del_timer(&bfqd->idle_slice_timer);

2738

++	}

2739

++

2740

++	reason = BFQ_BFQQ_NO_MORE_REQUESTS;

2741

++expire:

2742

++	bfq_bfqq_expire(bfqd, bfqq, 0, reason);

2743

++new_queue:

2744

++	bfqq = bfq_set_active_queue(bfqd, new_bfqq);

2745

++	bfq_log(bfqd, "select_queue: new queue %d returned",

2746

++		bfqq != NULL ? bfqq->pid : 0);

2747

++keep_queue:

2748

++	return bfqq;

2749

++}

2750

++

2751

++static void update_raising_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)

2752

++{

2753

++	if (bfqq->raising_coeff > 1) { /* queue is being boosted */

2754

++		struct bfq_entity *entity = &bfqq->entity;

2755

++

2756

++		bfq_log_bfqq(bfqd, bfqq,

2757

++			"raising period dur %u/%u msec, "

2758

++			"old raising coeff %u, w %d(%d)",

2759

++			jiffies_to_msecs(jiffies -

2760

++				bfqq->last_rais_start_finish),

2761

++			jiffies_to_msecs(bfqq->raising_cur_max_time),

2762

++			bfqq->raising_coeff,

2763

++			bfqq->entity.weight, bfqq->entity.orig_weight);

2764

++

2765

++		BUG_ON(bfqq != bfqd->active_queue && entity->weight !=

2766

++			entity->orig_weight * bfqq->raising_coeff);

2767

++		if(entity->ioprio_changed)

2768

++			bfq_log_bfqq(bfqd, bfqq,

2769

++			"WARN: pending prio change");

2770

++		/*

2771

++		 * If too much time has elapsed from the beginning

2772

++		 * of this weight-raising period and process is not soft

2773

++		 * real-time, stop it

2774

++		 */

2775

++		if (jiffies - bfqq->last_rais_start_finish >

2776

++			bfqq->raising_cur_max_time) {

2777

++			int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 &&

2778

++				bfqq->soft_rt_next_start < jiffies;

2779

++

2780

++			bfqq->last_rais_start_finish = jiffies;

2781

++			if (soft_rt)

2782

++				bfqq->raising_cur_max_time =

2783

++					bfqd->bfq_raising_rt_max_time;

2784

++			else {

2785

++				bfq_log_bfqq(bfqd, bfqq,

2786

++					     "wrais ending at %llu msec,"

2787

++					     "rais_max_time %u",

2788

++					     bfqq->last_rais_start_finish,

2789

++					     jiffies_to_msecs(bfqq->

2790

++						raising_cur_max_time));

2791

++				bfq_bfqq_end_raising(bfqq);

2792

++				__bfq_entity_update_weight_prio(

2793

++					bfq_entity_service_tree(entity),

2794

++					entity);

2795

++			}

2796

++		}

2797

++	}

2798

++}

2799

++

2800

++/*

2801

++ * Dispatch one request from bfqq, moving it to the request queue

2802

++ * dispatch list.

2803

++ */

2804

++static int bfq_dispatch_request(struct bfq_data *bfqd,

2805

++				struct bfq_queue *bfqq)

2806

++{

2807

++	int dispatched = 0;

2808

++	struct request *rq;

2809

++	unsigned long service_to_charge;

2810

++

2811

++	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));

2812

++

2813

++	/* Follow expired path, else get first next available. */

2814

++	rq = bfq_check_fifo(bfqq);

2815

++	if (rq == NULL)

2816

++		rq = bfqq->next_rq;

2817

++	service_to_charge = bfq_serv_to_charge(rq, bfqq);

2818

++

2819

++	if (service_to_charge > bfq_bfqq_budget_left(bfqq)) {

2820

++		/*

2821

++		 * This may happen if the next rq is chosen

2822

++		 * in fifo order instead of sector order.

2823

++		 * The budget is properly dimensioned

2824

++		 * to be always sufficient to serve the next request

2825

++		 * only if it is chosen in sector order. The reason is

2826

++		 * that it would be quite inefficient and little useful

2827

++		 * to always make sure that the budget is large enough

2828

++		 * to serve even the possible next rq in fifo order.

2829

++		 * In fact, requests are seldom served in fifo order.

2830

++		 *

2831

++		 * Expire the queue for budget exhaustion, and

2832

++		 * make sure that the next act_budget is enough

2833

++		 * to serve the next request, even if it comes

2834

++		 * from the fifo expired path.

2835

++		 */

2836

++		bfqq->next_rq = rq;

2837

++		/*

2838

++		 * Since this dispatch is failed, make sure that

2839

++		 * a new one will be performed

2840

++		 */

2841

++		if (!bfqd->rq_in_driver)

2842

++			bfq_schedule_dispatch(bfqd);

2843

++		goto expire;

2844

++	}

2845

++

2846

++	/* Finally, insert request into driver dispatch list. */

2847

++	bfq_bfqq_served(bfqq, service_to_charge);

2848

++	bfq_dispatch_insert(bfqd->queue, rq);

2849

++

2850

++	update_raising_data(bfqd, bfqq);

2851

++

2852

++	bfq_log_bfqq(bfqd, bfqq, "dispatched %u sec req (%llu), "

2853

++			"budg left %lu",

2854

++			blk_rq_sectors(rq),

2855

++			(long long unsigned)blk_rq_pos(rq),

2856

++			bfq_bfqq_budget_left(bfqq));

2857

++

2858

++	dispatched++;

2859

++

2860

++	if (bfqd->active_bic == NULL) {

2861

++		atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount);

2862

++		bfqd->active_bic = RQ_BIC(rq);

2863

++	}

2864

++

2865

++	if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) &&

2866

++	    dispatched >= bfqd->bfq_max_budget_async_rq) ||

2867

++	    bfq_class_idle(bfqq)))

2868

++		goto expire;

2869

++

2870

++	return dispatched;

2871

++

2872

++expire:

2873

++	bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED);

2874

++	return dispatched;

2875

++}

2876

++

2877

++static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq)

2878

++{

2879

++	int dispatched = 0;

2880

++

2881

++	while (bfqq->next_rq != NULL) {

2882

++		bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq);

2883

++		dispatched++;

2884

++	}

2885

++

2886

++	BUG_ON(!list_empty(&bfqq->fifo));

2887

++	return dispatched;

2888

++}

2889

++

2890

++/*

2891

++ * Drain our current requests.  Used for barriers and when switching

2892

++ * io schedulers on-the-fly.

2893

++ */

2894

++static int bfq_forced_dispatch(struct bfq_data *bfqd)

2895

++{

2896

++	struct bfq_queue *bfqq, *n;

2897

++	struct bfq_service_tree *st;

2898

++	int dispatched = 0;

2899

++

2900

++	bfqq = bfqd->active_queue;

2901

++	if (bfqq != NULL)

2902

++		__bfq_bfqq_expire(bfqd, bfqq);

2903

++

2904

++	/*

2905

++	 * Loop through classes, and be careful to leave the scheduler

2906

++	 * in a consistent state, as feedback mechanisms and vtime

2907

++	 * updates cannot be disabled during the process.

2908

++	 */

2909

++	list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) {

2910

++		st = bfq_entity_service_tree(&bfqq->entity);

2911

++

2912

++		dispatched += __bfq_forced_dispatch_bfqq(bfqq);

2913

++		bfqq->max_budget = bfq_max_budget(bfqd);

2914

++

2915

++		bfq_forget_idle(st);

2916

++	}

2917

++

2918

++	BUG_ON(bfqd->busy_queues != 0);

2919

++

2920

++	return dispatched;

2921

++}

2922

++

2923

++static int bfq_dispatch_requests(struct request_queue *q, int force)

2924

++{

2925

++	struct bfq_data *bfqd = q->elevator->elevator_data;

2926

++	struct bfq_queue *bfqq;

2927

++	int max_dispatch;

2928

++

2929

++	bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues);

2930

++	if (bfqd->busy_queues == 0)

2931

++		return 0;

2932

++

2933

++	if (unlikely(force))

2934

++		return bfq_forced_dispatch(bfqd);

2935

++

2936

++	if((bfqq = bfq_select_queue(bfqd)) == NULL)

2937

++		return 0;

2938

++

2939

++	max_dispatch = bfqd->bfq_quantum;

2940

++	if (bfq_class_idle(bfqq))

2941

++		max_dispatch = 1;

2942

++

2943

++	if (!bfq_bfqq_sync(bfqq))

2944

++		max_dispatch = bfqd->bfq_max_budget_async_rq;

2945

++

2946

++	if (bfqq->dispatched >= max_dispatch) {

2947

++		if (bfqd->busy_queues > 1)

2948

++			return 0;

2949

++		if (bfqq->dispatched >= 4 * max_dispatch)

2950

++			return 0;

2951

++	}

2952

++

2953

++	if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq))

2954

++		return 0;

2955

++

2956

++	bfq_clear_bfqq_wait_request(bfqq);

2957

++	BUG_ON(timer_pending(&bfqd->idle_slice_timer));

2958

++

2959

++	if (! bfq_dispatch_request(bfqd, bfqq))

2960

++		return 0;

2961

++

2962

++	bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d"

2963

++		     "(max_disp %d)", bfqq->pid, max_dispatch);

2964

++

2965

++	return 1;

2966

++}

2967

++

2968

++/*

2969

++ * Task holds one reference to the queue, dropped when task exits.  Each rq

2970

++ * in-flight on this queue also holds a reference, dropped when rq is freed.

2971

++ *

2972

++ * Queue lock must be held here.

2973

++ */

2974

++static void bfq_put_queue(struct bfq_queue *bfqq)

2975

++{

2976

++	struct bfq_data *bfqd = bfqq->bfqd;

2977

++

2978

++	BUG_ON(atomic_read(&bfqq->ref) <= 0);

2979

++

2980

++	bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq,

2981

++		     atomic_read(&bfqq->ref));

2982

++	if (!atomic_dec_and_test(&bfqq->ref))

2983

++		return;

2984

++

2985

++	BUG_ON(rb_first(&bfqq->sort_list) != NULL);

2986

++	BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0);

2987

++	BUG_ON(bfqq->entity.tree != NULL);

2988

++	BUG_ON(bfq_bfqq_busy(bfqq));

2989

++	BUG_ON(bfqd->active_queue == bfqq);

2990

++

2991

++	bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq);

2992

++

2993

++	kmem_cache_free(bfq_pool, bfqq);

2994

++}

2995

++

2996

++static void bfq_put_cooperator(struct bfq_queue *bfqq)

2997

++{

2998

++	struct bfq_queue *__bfqq, *next;

2999

++

3000

++	/*

3001

++	 * If this queue was scheduled to merge with another queue, be

3002

++	 * sure to drop the reference taken on that queue (and others in

3003

++	 * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs.

3004

++	 */

3005

++	__bfqq = bfqq->new_bfqq;

3006

++	while (__bfqq) {

3007

++		if (__bfqq == bfqq) {

3008

++			WARN(1, "bfqq->new_bfqq loop detected.\n");

3009

++			break;

3010

++		}

3011

++		next = __bfqq->new_bfqq;

3012

++		bfq_put_queue(__bfqq);

3013

++		__bfqq = next;

3014

++	}

3015

++}

3016

++

3017

++static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)

3018

++{

3019

++	if (bfqq == bfqd->active_queue) {

3020

++		__bfq_bfqq_expire(bfqd, bfqq);

3021

++		bfq_schedule_dispatch(bfqd);

3022

++	}

3023

++

3024

++	bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq,

3025

++		     atomic_read(&bfqq->ref));

3026

++

3027

++	bfq_put_cooperator(bfqq);

3028

++

3029

++	bfq_put_queue(bfqq);

3030

++}

3031

++

3032

++static void bfq_init_icq(struct io_cq *icq)

3033

++{

3034

++	struct bfq_io_cq *bic = icq_to_bic(icq);

3035

++

3036

++	bic->ttime.last_end_request = jiffies;

3037

++}

3038

++

3039

++static void bfq_exit_icq(struct io_cq *icq)

3040

++{

3041

++	struct bfq_io_cq *bic = icq_to_bic(icq);

3042

++	struct bfq_data *bfqd = bic_to_bfqd(bic);

3043

++

3044

++	if (bic->bfqq[BLK_RW_ASYNC]) {

3045

++		bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_ASYNC]);

3046

++		bic->bfqq[BLK_RW_ASYNC] = NULL;

3047

++	}

3048

++

3049

++	if (bic->bfqq[BLK_RW_SYNC]) {

3050

++		bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);

3051

++		bic->bfqq[BLK_RW_SYNC] = NULL;

3052

++	}

3053

++}

3054

++

3055

++/*

3056

++ * Update the entity prio values; note that the new values will not

3057

++ * be used until the next (re)activation.

3058

++ */

3059

++static void bfq_init_prio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)

3060

++{

3061

++	struct task_struct *tsk = current;

3062

++	int ioprio_class;

3063

++

3064

++	if (!bfq_bfqq_prio_changed(bfqq))

3065

++		return;

3066

++

3067

++	ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);

3068

++	switch (ioprio_class) {

3069

++	default:

3070

++		printk(KERN_ERR "bfq: bad prio %x\n", ioprio_class);

3071

++	case IOPRIO_CLASS_NONE:

3072

++		/*

3073

++		 * No prio set, inherit CPU scheduling settings.

3074

++		 */

3075

++		bfqq->entity.new_ioprio = task_nice_ioprio(tsk);

3076

++		bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk);

3077

++		break;

3078

++	case IOPRIO_CLASS_RT:

3079

++		bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);

3080

++		bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT;

3081

++		break;

3082

++	case IOPRIO_CLASS_BE:

3083

++		bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);

3084

++		bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE;

3085

++		break;

3086

++	case IOPRIO_CLASS_IDLE:

3087

++		bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE;

3088

++		bfqq->entity.new_ioprio = 7;

3089

++		bfq_clear_bfqq_idle_window(bfqq);

3090

++		break;

3091

++	}

3092

++

3093

++	bfqq->entity.ioprio_changed = 1;

3094

++

3095

++	/*

3096

++	 * Keep track of original prio settings in case we have to temporarily

3097

++	 * elevate the priority of this queue.

3098

++	 */

3099

++	bfqq->org_ioprio = bfqq->entity.new_ioprio;

3100

++	bfq_clear_bfqq_prio_changed(bfqq);

3101

++}

3102

++

3103

++static void bfq_changed_ioprio(struct bfq_io_cq *bic)

3104

++{

3105

++	struct bfq_data *bfqd;

3106

++	struct bfq_queue *bfqq, *new_bfqq;

3107

++	struct bfq_group *bfqg;

3108

++	unsigned long uninitialized_var(flags);

3109

++	int ioprio = bic->icq.ioc->ioprio;

3110

++

3111

++	bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), &flags);

3112

++	/*

3113

++	 * This condition may trigger on a newly created bic, be sure to drop the

3114

++	 * lock before returning.

3115

++	 */

3116

++	if (unlikely(bfqd == NULL) || likely(bic->ioprio == ioprio))

3117

++		goto out;

3118

++

3119

++	bfqq = bic->bfqq[BLK_RW_ASYNC];

3120

++	if (bfqq != NULL) {

3121

++		bfqg = container_of(bfqq->entity.sched_data, struct bfq_group,

3122

++				    sched_data);

3123

++		new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, bic,

3124

++					 GFP_ATOMIC);

3125

++		if (new_bfqq != NULL) {

3126

++			bic->bfqq[BLK_RW_ASYNC] = new_bfqq;

3127

++			bfq_log_bfqq(bfqd, bfqq,

3128

++				     "changed_ioprio: bfqq %p %d",

3129

++				     bfqq, atomic_read(&bfqq->ref));

3130

++			bfq_put_queue(bfqq);

3131

++		}

3132

++	}

3133

++

3134

++	bfqq = bic->bfqq[BLK_RW_SYNC];

3135

++	if (bfqq != NULL)

3136

++		bfq_mark_bfqq_prio_changed(bfqq);

3137

++

3138

++	bic->ioprio = ioprio;

3139

++

3140

++out:

3141

++	bfq_put_bfqd_unlock(bfqd, &flags);

3142

++}

3143

++

3144

++static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,

3145

++			  pid_t pid, int is_sync)

3146

++{

3147

++	RB_CLEAR_NODE(&bfqq->entity.rb_node);

3148

++	INIT_LIST_HEAD(&bfqq->fifo);

3149

++

3150

++	atomic_set(&bfqq->ref, 0);

3151

++	bfqq->bfqd = bfqd;

3152

++

3153

++	bfq_mark_bfqq_prio_changed(bfqq);

3154

++

3155

++	if (is_sync) {

3156

++		if (!bfq_class_idle(bfqq))

3157

++			bfq_mark_bfqq_idle_window(bfqq);

3158

++		bfq_mark_bfqq_sync(bfqq);

3159

++	}

3160

++

3161

++	/* Tentative initial value to trade off between thr and lat */

3162

++	bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3;

3163

++	bfqq->pid = pid;

3164

++

3165

++	bfqq->raising_coeff = 1;

3166

++	bfqq->last_rais_start_finish = 0;

3167

++	bfqq->soft_rt_next_start = -1;

3168

++}

3169

++

3170

++static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd,

3171

++					      struct bfq_group *bfqg,

3172

++					      int is_sync,

3173

++					      struct bfq_io_cq *bic,

3174

++					      gfp_t gfp_mask)

3175

++{

3176

++	struct bfq_queue *bfqq, *new_bfqq = NULL;

3177

++

3178

++retry:

3179

++	/* bic always exists here */

3180

++	bfqq = bic_to_bfqq(bic, is_sync);

3181

++

3182

++	/*

3183

++	 * Always try a new alloc if we fall back to the OOM bfqq

3184

++	 * originally, since it should just be a temporary situation.

3185

++	 */

3186

++	if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) {

3187

++		bfqq = NULL;

3188

++		if (new_bfqq != NULL) {

3189

++			bfqq = new_bfqq;

3190

++			new_bfqq = NULL;

3191

++		} else if (gfp_mask & __GFP_WAIT) {

3192

++			spin_unlock_irq(bfqd->queue->queue_lock);

3193

++			new_bfqq = kmem_cache_alloc_node(bfq_pool,

3194

++					gfp_mask | __GFP_ZERO,

3195

++					bfqd->queue->node);

3196

++			spin_lock_irq(bfqd->queue->queue_lock);

3197

++			if (new_bfqq != NULL)

3198

++				goto retry;

3199

++		} else {

3200

++			bfqq = kmem_cache_alloc_node(bfq_pool,

3201

++					gfp_mask | __GFP_ZERO,

3202

++					bfqd->queue->node);

3203

++		}

3204

++

3205

++		if (bfqq != NULL) {

3206

++			bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync);

3207

++			bfq_log_bfqq(bfqd, bfqq, "allocated");

3208

++		} else {

3209

++			bfqq = &bfqd->oom_bfqq;

3210

++			bfq_log_bfqq(bfqd, bfqq, "using oom bfqq");

3211

++		}

3212

++

3213

++		bfq_init_prio_data(bfqq, bic);

3214

++		bfq_init_entity(&bfqq->entity, bfqg);

3215

++	}

3216

++

3217

++	if (new_bfqq != NULL)

3218

++		kmem_cache_free(bfq_pool, new_bfqq);

3219

++

3220

++	return bfqq;

3221

++}

3222

++

3223

++static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,

3224

++					       struct bfq_group *bfqg,

3225

++					       int ioprio_class, int ioprio)

3226

++{

3227

++	switch (ioprio_class) {

3228

++	case IOPRIO_CLASS_RT:

3229

++		return &bfqg->async_bfqq[0][ioprio];

3230

++	case IOPRIO_CLASS_NONE:

3231

++		ioprio = IOPRIO_NORM;

3232

++		/* fall through */

3233

++	case IOPRIO_CLASS_BE:

3234

++		return &bfqg->async_bfqq[1][ioprio];

3235

++	case IOPRIO_CLASS_IDLE:

3236

++		return &bfqg->async_idle_bfqq;

3237

++	default:

3238

++		BUG();

3239

++	}

3240

++}

3241

++

3242

++static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,

3243

++				       struct bfq_group *bfqg, int is_sync,

3244

++				       struct bfq_io_cq *bic, gfp_t gfp_mask)

3245

++{

3246

++	const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio);

3247

++	const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);

3248

++	struct bfq_queue **async_bfqq = NULL;

3249

++	struct bfq_queue *bfqq = NULL;

3250

++

3251

++	if (!is_sync) {

3252

++		async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,

3253

++						  ioprio);

3254

++		bfqq = *async_bfqq;

3255

++	}

3256

++

3257

++	if (bfqq == NULL)

3258

++		bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, bic, gfp_mask);

3259

++

3260

++	/*

3261

++	 * Pin the queue now that it's allocated, scheduler exit will prune it.

3262

++	 */

3263

++	if (!is_sync && *async_bfqq == NULL) {

3264

++		atomic_inc(&bfqq->ref);

3265

++		bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d",

3266

++			     bfqq, atomic_read(&bfqq->ref));

3267

++		*async_bfqq = bfqq;

3268

++	}

3269

++

3270

++	atomic_inc(&bfqq->ref);

3271

++	bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq,

3272

++		     atomic_read(&bfqq->ref));

3273

++	return bfqq;

3274

++}

3275

++

3276

++static void bfq_update_io_thinktime(struct bfq_data *bfqd,

3277

++				    struct bfq_io_cq *bic)

3278

++{

3279

++	unsigned long elapsed = jiffies - bic->ttime.last_end_request;

3280

++	unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle);

3281

++

3282

++	bic->ttime.ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8;

3283

++	bic->ttime.ttime_total = (7*bic->ttime.ttime_total + 256*ttime) / 8;

3284

++	bic->ttime.ttime_mean = (bic->ttime.ttime_total + 128) / bic->ttime.ttime_samples;

3285

++}

3286

++

3287

++static void bfq_update_io_seektime(struct bfq_data *bfqd,

3288

++				   struct bfq_queue *bfqq,

3289

++				   struct request *rq)

3290

++{

3291

++	sector_t sdist;

3292

++	u64 total;

3293

++

3294

++	if (bfqq->last_request_pos < blk_rq_pos(rq))

3295

++		sdist = blk_rq_pos(rq) - bfqq->last_request_pos;

3296

++	else

3297

++		sdist = bfqq->last_request_pos - blk_rq_pos(rq);

3298

++

3299

++	/*

3300

++	 * Don't allow the seek distance to get too large from the

3301

++	 * odd fragment, pagein, etc.

3302

++	 */

3303

++	if (bfqq->seek_samples == 0) /* first request, not really a seek */

3304

++		sdist = 0;

3305

++	else if (bfqq->seek_samples <= 60) /* second & third seek */

3306

++		sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024);

3307

++	else

3308

++		sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64);

3309

++

3310

++	bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8;

3311

++	bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8;

3312

++	total = bfqq->seek_total + (bfqq->seek_samples/2);

3313

++	do_div(total, bfqq->seek_samples);

3314

++	if (bfq_bfqq_coop(bfqq)) {

3315

++		/*

3316

++		 * If the mean seektime increases for a (non-seeky) shared

3317

++		 * queue, some cooperator is likely to be idling too much.

3318

++		 * On the contrary,  if it decreases, some cooperator has

3319

++		 * probably waked up.

3320

++		 *

3321

++		 */

3322

++		if ((sector_t)total < bfqq->seek_mean)

3323

++			bfq_mark_bfqq_some_coop_idle(bfqq) ;

3324

++		else if ((sector_t)total > bfqq->seek_mean)

3325

++			bfq_clear_bfqq_some_coop_idle(bfqq) ;

3326

++	}

3327

++	bfqq->seek_mean = (sector_t)total;

3328

++

3329

++	bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist,

3330

++			(u64)bfqq->seek_mean);

3331

++}

3332

++

3333

++/*

3334

++ * Disable idle window if the process thinks too long or seeks so much that

3335

++ * it doesn't matter.

3336

++ */

3337

++static void bfq_update_idle_window(struct bfq_data *bfqd,

3338

++				   struct bfq_queue *bfqq,

3339

++				   struct bfq_io_cq *bic)

3340

++{

3341

++	int enable_idle;

3342

++

3343

++	/* Don't idle for async or idle io prio class. */

3344

++	if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))

3345

++		return;

3346

++

3347

++	enable_idle = bfq_bfqq_idle_window(bfqq);

3348

++

3349

++	if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||

3350

++	    bfqd->bfq_slice_idle == 0 ||

3351

++		(bfqd->hw_tag && BFQQ_SEEKY(bfqq) &&

3352

++			bfqq->raising_coeff == 1))

3353

++		enable_idle = 0;

3354

++	else if (bfq_sample_valid(bic->ttime.ttime_samples)) {

3355

++		if (bic->ttime.ttime_mean > bfqd->bfq_slice_idle &&

3356

++			bfqq->raising_coeff == 1)

3357

++			enable_idle = 0;

3358

++		else

3359

++			enable_idle = 1;

3360

++	}

3361

++	bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d",

3362

++		enable_idle);

3363

++

3364

++	if (enable_idle)

3365

++		bfq_mark_bfqq_idle_window(bfqq);

3366

++	else

3367

++		bfq_clear_bfqq_idle_window(bfqq);

3368

++}

3369

++

3370

++/*

3371

++ * Called when a new fs request (rq) is added to bfqq.  Check if there's

3372

++ * something we should do about it.

3373

++ */

3374

++static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,

3375

++			    struct request *rq)

3376

++{

3377

++	struct bfq_io_cq *bic = RQ_BIC(rq);

3378

++

3379

++	if (rq->cmd_flags & REQ_META)

3380

++		bfqq->meta_pending++;

3381

++

3382

++	bfq_update_io_thinktime(bfqd, bic);

3383

++	bfq_update_io_seektime(bfqd, bfqq, rq);

3384

++	if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||

3385

++	    !BFQQ_SEEKY(bfqq))

3386

++		bfq_update_idle_window(bfqd, bfqq, bic);

3387

++

3388

++	bfq_log_bfqq(bfqd, bfqq,

3389

++		     "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",

3390

++		     bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq),

3391

++		     (long long unsigned)bfqq->seek_mean);

3392

++

3393

++	bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);

3394

++

3395

++	if (bfqq == bfqd->active_queue) {

3396

++		/*

3397

++		 * If there is just this request queued and the request

3398

++		 * is small, just exit.

3399

++		 * In this way, if the disk is being idled to wait for a new

3400

++		 * request from the active queue, we avoid unplugging the

3401

++		 * device now.

3402

++		 *

3403

++		 * By doing so, we spare the disk to be committed

3404

++		 * to serve just a small request. On the contrary, we wait for

3405

++		 * the block layer to decide when to unplug the device:

3406

++		 * hopefully, new requests will be merged to this

3407

++		 * one quickly, then the device will be unplugged

3408

++		 * and larger requests will be dispatched.

3409

++		 */

3410

++	        if (bfqq->queued[rq_is_sync(rq)] == 1 &&

3411

++		    blk_rq_sectors(rq) < 32) {

3412

++		        return;

3413

++		}

3414

++		if (bfq_bfqq_wait_request(bfqq)) {

3415

++			/*

3416

++			 * If we are waiting for a request for this queue, let

3417

++			 * it rip immediately and flag that we must not expire

3418

++			 * this queue just now.

3419

++			 */

3420

++			bfq_clear_bfqq_wait_request(bfqq);

3421

++			del_timer(&bfqd->idle_slice_timer);

3422

++			/*

3423

++			 * Here we can safely expire the queue, in

3424

++			 * case of budget timeout, without wasting

3425

++			 * guarantees

3426

++			 */

3427

++			if (bfq_bfqq_budget_timeout(bfqq))

3428

++				bfq_bfqq_expire(bfqd, bfqq, 0,

3429

++						BFQ_BFQQ_BUDGET_TIMEOUT);

3430

++			__blk_run_queue(bfqd->queue);

3431

++		}

3432

++	}

3433

++}

3434

++

3435

++static void bfq_insert_request(struct request_queue *q, struct request *rq)

3436

++{

3437

++	struct bfq_data *bfqd = q->elevator->elevator_data;

3438

++	struct bfq_queue *bfqq = RQ_BFQQ(rq);

3439

++

3440

++	assert_spin_locked(bfqd->queue->queue_lock);

3441

++	bfq_init_prio_data(bfqq, RQ_BIC(rq));

3442

++

3443

++	bfq_add_rq_rb(rq);

3444

++

3445

++	rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]);

3446

++	list_add_tail(&rq->queuelist, &bfqq->fifo);

3447

++

3448

++	bfq_rq_enqueued(bfqd, bfqq, rq);

3449

++}

3450

++

3451

++static void bfq_update_hw_tag(struct bfq_data *bfqd)

3452

++{

3453

++	bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver,

3454

++				     bfqd->rq_in_driver);

3455

++

3456

++	if (bfqd->hw_tag == 1)

3457

++		return;

3458

++

3459

++	/*

3460

++	 * This sample is valid if the number of outstanding requests

3461

++	 * is large enough to allow a queueing behavior.  Note that the

3462

++	 * sum is not exact, as it's not taking into account deactivated

3463

++	 * requests.

3464

++	 */

3465

++	if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD)

3466

++		return;

3467

++

3468

++	if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES)

3469

++		return;

3470

++

3471

++	bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD;

3472

++	bfqd->max_rq_in_driver = 0;

3473

++	bfqd->hw_tag_samples = 0;

3474

++}

3475

++

3476

++static void bfq_completed_request(struct request_queue *q, struct request *rq)

3477

++{

3478

++	struct bfq_queue *bfqq = RQ_BFQQ(rq);

3479

++	struct bfq_data *bfqd = bfqq->bfqd;

3480

++	const int sync = rq_is_sync(rq);

3481

++

3482

++	bfq_log_bfqq(bfqd, bfqq, "completed %u sects req (%d)",

3483

++			blk_rq_sectors(rq), sync);

3484

++

3485

++	bfq_update_hw_tag(bfqd);

3486

++

3487

++	WARN_ON(!bfqd->rq_in_driver);

3488

++	WARN_ON(!bfqq->dispatched);

3489

++	bfqd->rq_in_driver--;

3490

++	bfqq->dispatched--;

3491

++

3492

++	if (bfq_bfqq_sync(bfqq))

3493

++		bfqd->sync_flight--;

3494

++

3495

++	if (sync)

3496

++		RQ_BIC(rq)->ttime.last_end_request = jiffies;

3497

++

3498

++	/*

3499

++	 * If this is the active queue, check if it needs to be expired,

3500

++	 * or if we want to idle in case it has no pending requests.

3501

++	 */

3502

++	if (bfqd->active_queue == bfqq) {

3503

++		int budg_timeout = bfq_may_expire_for_budg_timeout(bfqq);

3504

++		if (bfq_bfqq_budget_new(bfqq))

3505

++			bfq_set_budget_timeout(bfqd);

3506

++

3507

++		/* Idling is disabled also for cooperation issues:

3508

++		 * 1) there is a close cooperator for the queue, or

3509

++		 * 2) the queue is shared and some cooperator is likely

3510

++		 *    to be idle (in this case, by not arming the idle timer,

3511

++		 *    we try to slow down the queue, to prevent the zones

3512

++		 *    of the disk accessed by the active cooperators to become

3513

++		 *    too distant from the zone that will be accessed by the

3514

++		 *    currently idle cooperators)

3515

++		 */

3516

++		if (bfq_bfqq_must_idle(bfqq, budg_timeout))

3517

++			bfq_arm_slice_timer(bfqd);

3518

++		else if (budg_timeout)

3519

++			bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT);

3520

++	}

3521

++

3522

++	if (!bfqd->rq_in_driver)

3523

++		bfq_schedule_dispatch(bfqd);

3524

++}

3525

++

3526

++static inline int __bfq_may_queue(struct bfq_queue *bfqq)

3527

++{

3528

++	if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) {

3529

++		bfq_clear_bfqq_must_alloc(bfqq);

3530

++		return ELV_MQUEUE_MUST;

3531

++	}

3532

++

3533

++	return ELV_MQUEUE_MAY;

3534

++}

3535

++

3536

++static int bfq_may_queue(struct request_queue *q, int rw)

3537

++{

3538

++	struct bfq_data *bfqd = q->elevator->elevator_data;

3539

++	struct task_struct *tsk = current;

3540

++	struct bfq_io_cq *bic;

3541

++	struct bfq_queue *bfqq;

3542

++

3543

++	/*

3544

++	 * Don't force setup of a queue from here, as a call to may_queue

3545

++	 * does not necessarily imply that a request actually will be queued.

3546

++	 * So just lookup a possibly existing queue, or return 'may queue'

3547

++	 * if that fails.

3548

++	 */

3549

++	bic = bfq_bic_lookup(bfqd, tsk->io_context);

3550

++	if (bic == NULL)

3551

++		return ELV_MQUEUE_MAY;

3552

++

3553

++	bfqq = bic_to_bfqq(bic, rw_is_sync(rw));

3554

++	if (bfqq != NULL) {

3555

++		bfq_init_prio_data(bfqq, bic);

3556

++

3557

++		return __bfq_may_queue(bfqq);

3558

++	}

3559

++

3560

++	return ELV_MQUEUE_MAY;

3561

++}

3562

++

3563

++/*

3564

++ * Queue lock held here.

3565

++ */

3566

++static void bfq_put_request(struct request *rq)

3567

++{

3568

++	struct bfq_queue *bfqq = RQ_BFQQ(rq);

3569

++

3570

++	if (bfqq != NULL) {

3571

++		const int rw = rq_data_dir(rq);

3572

++

3573

++		BUG_ON(!bfqq->allocated[rw]);

3574

++		bfqq->allocated[rw]--;

3575

++

3576

++		rq->elv.priv[0] = NULL;

3577

++		rq->elv.priv[1] = NULL;

3578

++

3579

++		bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d",

3580

++			     bfqq, atomic_read(&bfqq->ref));

3581

++		bfq_put_queue(bfqq);

3582

++	}

3583

++}

3584

++

3585

++static struct bfq_queue *

3586

++bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,

3587

++                struct bfq_queue *bfqq)

3588

++{

3589

++        bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",

3590

++		(long unsigned)bfqq->new_bfqq->pid);

3591

++        bic_set_bfqq(bic, bfqq->new_bfqq, 1);

3592

++        bfq_mark_bfqq_coop(bfqq->new_bfqq);

3593

++        bfq_put_queue(bfqq);

3594

++        return bic_to_bfqq(bic, 1);

3595

++}

3596

++

3597

++/*

3598

++ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this

3599

++ * was the last process referring to said bfqq.

3600

++ */

3601

++static struct bfq_queue *

3602

++bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)

3603

++{

3604

++	bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");

3605

++	if (bfqq_process_refs(bfqq) == 1) {

3606

++		bfqq->pid = current->pid;

3607

++		bfq_clear_bfqq_some_coop_idle(bfqq);

3608

++		bfq_clear_bfqq_coop(bfqq);

3609

++		bfq_clear_bfqq_split_coop(bfqq);

3610

++		return bfqq;

3611

++	}

3612

++

3613

++	bic_set_bfqq(bic, NULL, 1);

3614

++

3615

++	bfq_put_cooperator(bfqq);

3616

++

3617

++	bfq_put_queue(bfqq);

3618

++	return NULL;

3619

++}

3620

++

3621

++/*

3622

++ * Allocate bfq data structures associated with this request.

3623

++ */

3624

++static int bfq_set_request(struct request_queue *q, struct request *rq,

3625

++			   struct bio *bio, gfp_t gfp_mask)

3626

++{

3627

++	struct bfq_data *bfqd = q->elevator->elevator_data;

3628

++	struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);

3629

++	const int rw = rq_data_dir(rq);

3630

++	const int is_sync = rq_is_sync(rq);

3631

++	struct bfq_queue *bfqq;

3632

++	struct bfq_group *bfqg;

3633

++	unsigned long flags;

3634

++

3635

++	might_sleep_if(gfp_mask & __GFP_WAIT);

3636

++

3637

++	bfq_changed_ioprio(bic);

3638

++

3639

++	spin_lock_irqsave(q->queue_lock, flags);

3640

++

3641

++	if (bic == NULL)

3642

++		goto queue_fail;

3643

++

3644

++	bfqg = bfq_bic_update_cgroup(bic);

3645

++

3646

++new_queue:

3647

++	bfqq = bic_to_bfqq(bic, is_sync);

3648

++	if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) {

3649

++		bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask);

3650

++		bic_set_bfqq(bic, bfqq, is_sync);

3651

++	} else {

3652

++		/*

3653

++		 * If the queue was seeky for too long, break it apart.

3654

++		 */

3655

++		if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {

3656

++			bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");

3657

++			bfqq = bfq_split_bfqq(bic, bfqq);

3658

++			if (!bfqq)

3659

++				goto new_queue;

3660

++		}

3661

++

3662

++		/*

3663

++		 * Check to see if this queue is scheduled to merge with

3664

++		 * another closely cooperating queue. The merging of queues

3665

++		 * happens here as it must be done in process context.

3666

++		 * The reference on new_bfqq was taken in merge_bfqqs.

3667

++		 */

3668

++		if (bfqq->new_bfqq != NULL)

3669

++			bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq);

3670

++	}

3671

++

3672

++	bfqq->allocated[rw]++;

3673

++	atomic_inc(&bfqq->ref);

3674

++	bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq,

3675

++		     atomic_read(&bfqq->ref));

3676

++

3677

++	rq->elv.priv[0] = bic;

3678

++	rq->elv.priv[1] = bfqq;

3679

++

3680

++	spin_unlock_irqrestore(q->queue_lock, flags);

3681

++

3682

++	return 0;

3683

++

3684

++queue_fail:

3685

++	bfq_schedule_dispatch(bfqd);

3686

++	spin_unlock_irqrestore(q->queue_lock, flags);

3687

++

3688

++	return 1;

3689

++}

3690

++

3691

++static void bfq_kick_queue(struct work_struct *work)

3692

++{

3693

++	struct bfq_data *bfqd =

3694

++		container_of(work, struct bfq_data, unplug_work);

3695

++	struct request_queue *q = bfqd->queue;

3696

++

3697

++	spin_lock_irq(q->queue_lock);

3698

++	__blk_run_queue(q);

3699

++	spin_unlock_irq(q->queue_lock);

3700

++}

3701

++

3702

++/*

3703

++ * Handler of the expiration of the timer running if the active_queue

3704

++ * is idling inside its time slice.

3705

++ */

3706

++static void bfq_idle_slice_timer(unsigned long data)

3707

++{

3708

++	struct bfq_data *bfqd = (struct bfq_data *)data;

3709

++	struct bfq_queue *bfqq;

3710

++	unsigned long flags;

3711

++	enum bfqq_expiration reason;

3712

++

3713

++	spin_lock_irqsave(bfqd->queue->queue_lock, flags);

3714

++

3715

++	bfqq = bfqd->active_queue;

3716

++	/*

3717

++	 * Theoretical race here: active_queue can be NULL or different

3718

++	 * from the queue that was idling if the timer handler spins on

3719

++	 * the queue_lock and a new request arrives for the current

3720

++	 * queue and there is a full dispatch cycle that changes the

3721

++	 * active_queue.  This can hardly happen, but in the worst case

3722

++	 * we just expire a queue too early.

3723

++	 */

3724

++	if (bfqq != NULL) {

3725

++		bfq_log_bfqq(bfqd, bfqq, "slice_timer expired");

3726

++		if (bfq_bfqq_budget_timeout(bfqq))

3727

++			/*

3728

++			 * Also here the queue can be safely expired

3729

++			 * for budget timeout without wasting

3730

++			 * guarantees

3731

++			 */

3732

++			reason = BFQ_BFQQ_BUDGET_TIMEOUT;

3733

++		else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0)

3734

++			/*

3735

++			 * The queue may not be empty upon timer expiration,

3736

++			 * because we may not disable the timer when the first

3737

++			 * request of the active queue arrives during

3738

++			 * disk idling

3739

++			 */

3740

++			reason = BFQ_BFQQ_TOO_IDLE;

3741

++		else

3742

++			goto schedule_dispatch;

3743

++

3744

++		bfq_bfqq_expire(bfqd, bfqq, 1, reason);

3745

++	}

3746

++

3747

++schedule_dispatch:

3748

++	bfq_schedule_dispatch(bfqd);

3749

++

3750

++	spin_unlock_irqrestore(bfqd->queue->queue_lock, flags);

3751

++}

3752

++

3753

++static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)

3754

++{

3755

++	del_timer_sync(&bfqd->idle_slice_timer);

3756

++	cancel_work_sync(&bfqd->unplug_work);

3757

++}

3758

++

3759

++static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd,

3760

++					struct bfq_queue **bfqq_ptr)

3761

++{

3762

++	struct bfq_group *root_group = bfqd->root_group;

3763

++	struct bfq_queue *bfqq = *bfqq_ptr;

3764

++

3765

++	bfq_log(bfqd, "put_async_bfqq: %p", bfqq);

3766

++	if (bfqq != NULL) {

3767

++		bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group);

3768

++		bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d",

3769

++			     bfqq, atomic_read(&bfqq->ref));

3770

++		bfq_put_queue(bfqq);

3771

++		*bfqq_ptr = NULL;

3772

++	}

3773

++}

3774

++

3775

++/*

3776

++ * Release all the bfqg references to its async queues.  If we are

3777

++ * deallocating the group these queues may still contain requests, so

3778

++ * we reparent them to the root cgroup (i.e., the only one that will

3779

++ * exist for sure untill all the requests on a device are gone).

3780

++ */

3781

++static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)

3782

++{

3783

++	int i, j;

3784

++

3785

++	for (i = 0; i < 2; i++)

3786

++		for (j = 0; j < IOPRIO_BE_NR; j++)

3787

++			__bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]);

3788

++

3789

++	__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);

3790

++}

3791

++

3792

++static void bfq_exit_queue(struct elevator_queue *e)

3793

++{

3794

++	struct bfq_data *bfqd = e->elevator_data;

3795

++	struct request_queue *q = bfqd->queue;

3796

++	struct bfq_queue *bfqq, *n;

3797

++

3798

++	bfq_shutdown_timer_wq(bfqd);

3799

++

3800

++	spin_lock_irq(q->queue_lock);

3801

++

3802

++	BUG_ON(bfqd->active_queue != NULL);

3803

++	list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)

3804

++		bfq_deactivate_bfqq(bfqd, bfqq, 0);

3805

++

3806

++	bfq_disconnect_groups(bfqd);

3807

++	spin_unlock_irq(q->queue_lock);

3808

++

3809

++	bfq_shutdown_timer_wq(bfqd);

3810

++

3811

++	synchronize_rcu();

3812

++

3813

++	BUG_ON(timer_pending(&bfqd->idle_slice_timer));

3814

++

3815

++	bfq_free_root_group(bfqd);

3816

++	kfree(bfqd);

3817

++}

3818

++

3819

++static int bfq_init_queue(struct request_queue *q)

3820

++{

3821

++	struct bfq_group *bfqg;

3822

++	struct bfq_data *bfqd;

3823

++

3824

++	bfqd = kmalloc_node(sizeof(*bfqd), GFP_KERNEL | __GFP_ZERO, q->node);

3825

++	if (bfqd == NULL)

3826

++		return -ENOMEM;

3827

++

3828

++	/*

3829

++	 * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.

3830

++	 * Grab a permanent reference to it, so that the normal code flow

3831

++	 * will not attempt to free it.

3832

++	 */

3833

++	bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0);

3834

++	atomic_inc(&bfqd->oom_bfqq.ref);

3835

++

3836

++	bfqd->queue = q;

3837

++	q->elevator->elevator_data = bfqd;

3838

++

3839

++	bfqg = bfq_alloc_root_group(bfqd, q->node);

3840

++	if (bfqg == NULL) {

3841

++		kfree(bfqd);

3842

++		return -ENOMEM;

3843

++	}

3844

++

3845

++	bfqd->root_group = bfqg;

3846

++

3847

++	init_timer(&bfqd->idle_slice_timer);

3848

++	bfqd->idle_slice_timer.function = bfq_idle_slice_timer;

3849

++	bfqd->idle_slice_timer.data = (unsigned long)bfqd;

3850

++

3851

++	bfqd->rq_pos_tree = RB_ROOT;

3852

++

3853

++	INIT_WORK(&bfqd->unplug_work, bfq_kick_queue);

3854

++

3855

++	INIT_LIST_HEAD(&bfqd->active_list);

3856

++	INIT_LIST_HEAD(&bfqd->idle_list);

3857

++

3858

++	bfqd->hw_tag = -1;

3859

++

3860

++	bfqd->bfq_max_budget = bfq_default_max_budget;

3861

++

3862

++	bfqd->bfq_quantum = bfq_quantum;

3863

++	bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0];

3864

++	bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1];

3865

++	bfqd->bfq_back_max = bfq_back_max;

3866

++	bfqd->bfq_back_penalty = bfq_back_penalty;

3867

++	bfqd->bfq_slice_idle = bfq_slice_idle;

3868

++	bfqd->bfq_class_idle_last_service = 0;

3869

++	bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq;

3870

++	bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async;

3871

++	bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync;

3872

++

3873

++	bfqd->low_latency = true;

3874

++

3875

++	bfqd->bfq_raising_coeff = 20;

3876

++	bfqd->bfq_raising_rt_max_time = msecs_to_jiffies(300);

3877

++	bfqd->bfq_raising_max_time = 0;

3878

++	bfqd->bfq_raising_min_idle_time = msecs_to_jiffies(2000);

3879

++	bfqd->bfq_raising_min_inter_arr_async = msecs_to_jiffies(500);

3880

++	bfqd->bfq_raising_max_softrt_rate = 7000;

3881

++

3882

++	/* Initially estimate the device's peak rate as the reference rate */

3883

++	if (blk_queue_nonrot(bfqd->queue)) {

3884

++		bfqd->RT_prod = R_nonrot * T_nonrot;

3885

++		bfqd->peak_rate = R_nonrot;

3886

++	} else {

3887

++		bfqd->RT_prod = R_rot * T_rot;

3888

++		bfqd->peak_rate = R_rot;

3889

++	}

3890

++

3891

++	return 0;

3892

++}

3893

++

3894

++static void bfq_slab_kill(void)

3895

++{

3896

++	if (bfq_pool != NULL)

3897

++		kmem_cache_destroy(bfq_pool);

3898

++}

3899

++

3900

++static int __init bfq_slab_setup(void)

3901

++{

3902

++	bfq_pool = KMEM_CACHE(bfq_queue, 0);

3903

++	if (bfq_pool == NULL)

3904

++		return -ENOMEM;

3905

++	return 0;

3906

++}

3907

++

3908

++static ssize_t bfq_var_show(unsigned int var, char *page)

3909

++{

3910

++	return sprintf(page, "%d\n", var);

3911

++}

3912

++

3913

++static ssize_t bfq_var_store(unsigned long *var, const char *page, size_t count)

3914

++{

3915

++	unsigned long new_val;

3916

++	int ret = strict_strtoul(page, 10, &new_val);

3917

++

3918

++	if (ret == 0)

3919

++		*var = new_val;

3920

++

3921

++	return count;

3922

++}

3923

++

3924

++static ssize_t bfq_raising_max_time_show(struct elevator_queue *e, char *page)

3925

++{

3926

++	struct bfq_data *bfqd = e->elevator_data;

3927

++	return sprintf(page, "%d\n", bfqd->bfq_raising_max_time > 0 ?

3928

++		       jiffies_to_msecs(bfqd->bfq_raising_max_time) :

3929

++		       jiffies_to_msecs(bfq_wrais_duration(bfqd)));

3930

++}

3931

++

3932

++static ssize_t bfq_weights_show(struct elevator_queue *e, char *page)

3933

++{

3934

++	struct bfq_queue *bfqq;

3935

++	struct bfq_data *bfqd = e->elevator_data;

3936

++	ssize_t num_char = 0;

3937

++

3938

++	num_char += sprintf(page + num_char, "Tot reqs queued %d\n\n",

3939

++			    bfqd->queued);

3940

++

3941

++	spin_lock_irq(bfqd->queue->queue_lock);

3942

++

3943

++	num_char += sprintf(page + num_char, "Active:\n");

3944

++	list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) {

3945

++	  num_char += sprintf(page + num_char,

3946

++			      "pid%d: weight %hu, nr_queued %d %d,"

3947

++			      " dur %d/%u\n",

3948

++			      bfqq->pid,

3949

++			      bfqq->entity.weight,

3950

++			      bfqq->queued[0],

3951

++			      bfqq->queued[1],

3952

++			jiffies_to_msecs(jiffies -

3953

++				bfqq->last_rais_start_finish),

3954

++			jiffies_to_msecs(bfqq->raising_cur_max_time));

3955

++	}

3956

++

3957

++	num_char += sprintf(page + num_char, "Idle:\n");

3958

++	list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) {

3959

++			num_char += sprintf(page + num_char,

3960

++				"pid%d: weight %hu, dur %d/%u\n",

3961

++				bfqq->pid,

3962

++				bfqq->entity.weight,

3963

++				jiffies_to_msecs(jiffies -

3964

++					bfqq->last_rais_start_finish),

3965

++				jiffies_to_msecs(bfqq->raising_cur_max_time));

3966

++	}

3967

++

3968

++	spin_unlock_irq(bfqd->queue->queue_lock);

3969

++

3970

++	return num_char;

3971

++}

3972

++

3973

++#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\

3974

++static ssize_t __FUNC(struct elevator_queue *e, char *page)		\

3975

++{									\

3976

++	struct bfq_data *bfqd = e->elevator_data;			\

3977

++	unsigned int __data = __VAR;					\

3978

++	if (__CONV)							\

3979

++		__data = jiffies_to_msecs(__data);			\

3980

++	return bfq_var_show(__data, (page));				\

3981

++}

3982

++SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0);

3983

++SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1);

3984

++SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1);

3985

++SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);

3986

++SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);

3987

++SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1);

3988

++SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);

3989

++SHOW_FUNCTION(bfq_max_budget_async_rq_show, bfqd->bfq_max_budget_async_rq, 0);

3990

++SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1);

3991

++SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1);

3992

++SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);

3993

++SHOW_FUNCTION(bfq_raising_coeff_show, bfqd->bfq_raising_coeff, 0);

3994

++SHOW_FUNCTION(bfq_raising_rt_max_time_show, bfqd->bfq_raising_rt_max_time, 1);

3995

++SHOW_FUNCTION(bfq_raising_min_idle_time_show, bfqd->bfq_raising_min_idle_time,

3996

++	1);

3997

++SHOW_FUNCTION(bfq_raising_min_inter_arr_async_show,

3998

++	bfqd->bfq_raising_min_inter_arr_async,

3999

++	1);

4000

++SHOW_FUNCTION(bfq_raising_max_softrt_rate_show,

4001

++	bfqd->bfq_raising_max_softrt_rate, 0);

4002

++#undef SHOW_FUNCTION

4003

++

4004

++#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\

4005

++static ssize_t								\

4006

++__FUNC(struct elevator_queue *e, const char *page, size_t count)	\

4007

++{									\

4008

++	struct bfq_data *bfqd = e->elevator_data;			\

4009

++	unsigned long uninitialized_var(__data);			\

4010

++	int ret = bfq_var_store(&__data, (page), count);		\

4011

++	if (__data < (MIN))						\

4012

++		__data = (MIN);						\

4013

++	else if (__data > (MAX))					\

4014

++		__data = (MAX);						\

4015

++	if (__CONV)							\

4016

++		*(__PTR) = msecs_to_jiffies(__data);			\

4017

++	else								\

4018

++		*(__PTR) = __data;					\

4019

++	return ret;							\

4020

++}

4021

++STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0);

4022

++STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1,

4023

++		INT_MAX, 1);

4024

++STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1,

4025

++		INT_MAX, 1);

4026

++STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);

4027

++STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,

4028

++		INT_MAX, 0);

4029

++STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1);

4030

++STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq,

4031

++		1, INT_MAX, 0);

4032

++STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0,

4033

++		INT_MAX, 1);

4034

++STORE_FUNCTION(bfq_raising_coeff_store, &bfqd->bfq_raising_coeff, 1,

4035

++		INT_MAX, 0);

4036

++STORE_FUNCTION(bfq_raising_max_time_store, &bfqd->bfq_raising_max_time, 0,

4037

++		INT_MAX, 1);

4038

++STORE_FUNCTION(bfq_raising_rt_max_time_store, &bfqd->bfq_raising_rt_max_time, 0,

4039

++		INT_MAX, 1);

4040

++STORE_FUNCTION(bfq_raising_min_idle_time_store,

4041

++	       &bfqd->bfq_raising_min_idle_time, 0, INT_MAX, 1);

4042

++STORE_FUNCTION(bfq_raising_min_inter_arr_async_store,

4043

++		&bfqd->bfq_raising_min_inter_arr_async, 0, INT_MAX, 1);

4044

++STORE_FUNCTION(bfq_raising_max_softrt_rate_store,

4045

++	       &bfqd->bfq_raising_max_softrt_rate, 0, INT_MAX, 0);

4046

++#undef STORE_FUNCTION

4047

++

4048

++/* do nothing for the moment */

4049

++static ssize_t bfq_weights_store(struct elevator_queue *e,

4050

++				    const char *page, size_t count)

4051

++{

4052

++	return count;

4053

++}

4054

++

4055

++static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd)

4056

++{

4057

++	u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);

4058

++

4059

++	if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES)

4060

++		return bfq_calc_max_budget(bfqd->peak_rate, timeout);

4061

++	else

4062

++		return bfq_default_max_budget;

4063

++}

4064

++

4065

++static ssize_t bfq_max_budget_store(struct elevator_queue *e,

4066

++				    const char *page, size_t count)

4067

++{

4068

++	struct bfq_data *bfqd = e->elevator_data;

4069

++	unsigned long uninitialized_var(__data);

4070

++	int ret = bfq_var_store(&__data, (page), count);

4071

++

4072

++	if (__data == 0)

4073

++		bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);

4074

++	else {

4075

++		if (__data > INT_MAX)

4076

++			__data = INT_MAX;

4077

++		bfqd->bfq_max_budget = __data;

4078

++	}

4079

++

4080

++	bfqd->bfq_user_max_budget = __data;

4081

++

4082

++	return ret;

4083

++}

4084

++

4085

++static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,

4086

++				      const char *page, size_t count)

4087

++{

4088

++	struct bfq_data *bfqd = e->elevator_data;

4089

++	unsigned long uninitialized_var(__data);

4090

++	int ret = bfq_var_store(&__data, (page), count);

4091

++

4092

++	if (__data < 1)

4093

++		__data = 1;

4094

++	else if (__data > INT_MAX)

4095

++		__data = INT_MAX;

4096

++

4097

++	bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data);

4098

++	if (bfqd->bfq_user_max_budget == 0)

4099

++		bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);

4100

++

4101

++	return ret;

4102

++}

4103

++

4104

++static ssize_t bfq_low_latency_store(struct elevator_queue *e,

4105

++				     const char *page, size_t count)

4106

++{

4107

++	struct bfq_data *bfqd = e->elevator_data;

4108

++	unsigned long uninitialized_var(__data);

4109

++	int ret = bfq_var_store(&__data, (page), count);

4110

++

4111

++	if (__data > 1)

4112

++		__data = 1;

4113

++	if (__data == 0 && bfqd->low_latency != 0)

4114

++		bfq_end_raising(bfqd);

4115

++	bfqd->low_latency = __data;

4116

++

4117

++	return ret;

4118

++}

4119

++

4120

++#define BFQ_ATTR(name) \

4121

++	__ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store)

4122

++

4123

++static struct elv_fs_entry bfq_attrs[] = {

4124

++	BFQ_ATTR(quantum),

4125

++	BFQ_ATTR(fifo_expire_sync),

4126

++	BFQ_ATTR(fifo_expire_async),

4127

++	BFQ_ATTR(back_seek_max),

4128

++	BFQ_ATTR(back_seek_penalty),

4129

++	BFQ_ATTR(slice_idle),

4130

++	BFQ_ATTR(max_budget),

4131

++	BFQ_ATTR(max_budget_async_rq),

4132

++	BFQ_ATTR(timeout_sync),

4133

++	BFQ_ATTR(timeout_async),

4134

++	BFQ_ATTR(low_latency),

4135

++	BFQ_ATTR(raising_coeff),

4136

++	BFQ_ATTR(raising_max_time),

4137

++	BFQ_ATTR(raising_rt_max_time),

4138

++	BFQ_ATTR(raising_min_idle_time),

4139

++	BFQ_ATTR(raising_min_inter_arr_async),

4140

++	BFQ_ATTR(raising_max_softrt_rate),

4141

++	BFQ_ATTR(weights),

4142

++	__ATTR_NULL

4143

++};

4144

++

4145

++static struct elevator_type iosched_bfq = {

4146

++	.ops = {

4147

++		.elevator_merge_fn =		bfq_merge,

4148

++		.elevator_merged_fn =		bfq_merged_request,

4149

++		.elevator_merge_req_fn =	bfq_merged_requests,

4150

++		.elevator_allow_merge_fn =	bfq_allow_merge,

4151

++		.elevator_dispatch_fn =		bfq_dispatch_requests,

4152

++		.elevator_add_req_fn =		bfq_insert_request,

4153

++		.elevator_activate_req_fn =	bfq_activate_request,

4154

++		.elevator_deactivate_req_fn =	bfq_deactivate_request,

4155

++		.elevator_completed_req_fn =	bfq_completed_request,

4156

++		.elevator_former_req_fn =	elv_rb_former_request,

4157

++		.elevator_latter_req_fn =	elv_rb_latter_request,

4158

++		.elevator_init_icq_fn =		bfq_init_icq,

4159

++		.elevator_exit_icq_fn =		bfq_exit_icq,

4160

++		.elevator_set_req_fn =		bfq_set_request,

4161

++		.elevator_put_req_fn =		bfq_put_request,

4162

++		.elevator_may_queue_fn =	bfq_may_queue,

4163

++		.elevator_init_fn =		bfq_init_queue,

4164

++		.elevator_exit_fn =		bfq_exit_queue,

4165

++	},

4166

++	.icq_size =		sizeof(struct bfq_io_cq),

4167

++	.icq_align =		__alignof__(struct bfq_io_cq),

4168

++	.elevator_attrs =	bfq_attrs,

4169

++	.elevator_name =	"bfq",

4170

++	.elevator_owner =	THIS_MODULE,

4171

++};

4172

++

4173

++static int __init bfq_init(void)

4174

++{

4175

++	/*

4176

++	 * Can be 0 on HZ < 1000 setups.

4177

++	 */

4178

++	if (bfq_slice_idle == 0)

4179

++		bfq_slice_idle = 1;

4180

++

4181

++	if (bfq_timeout_async == 0)

4182

++		bfq_timeout_async = 1;

4183

++

4184

++	if (bfq_slab_setup())

4185

++		return -ENOMEM;

4186

++

4187

++	elv_register(&iosched_bfq);

4188

++

4189

++	return 0;

4190

++}

4191

++

4192

++static void __exit bfq_exit(void)

4193

++{

4194

++	elv_unregister(&iosched_bfq);

4195

++	bfq_slab_kill();

4196

++}

4197

++

4198

++module_init(bfq_init);

4199

++module_exit(bfq_exit);

4200

++

4201

++MODULE_AUTHOR("Fabio Checconi, Paolo Valente");

4202

++MODULE_LICENSE("GPL");

4203

++MODULE_DESCRIPTION("Budget Fair Queueing IO scheduler");

4204

+diff --git a/block/bfq-sched.c b/block/bfq-sched.c

4205

+new file mode 100644

4206

+index 0000000..03f8061

4207

+--- /dev/null

4208

++++ b/block/bfq-sched.c

4209

+@@ -0,0 +1,1072 @@

4210

++/*

4211

++ * BFQ: Hierarchical B-WF2Q+ scheduler.

4212

++ *

4213

++ * Based on ideas and code from CFQ:

4214

++ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk>

4215

++ *

4216

++ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it>

4217

++ *		      Paolo Valente <paolo.valente@×××××××.it>

4218

++ *

4219

++ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it>

4220

++ */

4221

++

4222

++#ifdef CONFIG_CGROUP_BFQIO

4223

++#define for_each_entity(entity)	\

4224

++	for (; entity != NULL; entity = entity->parent)

4225

++

4226

++#define for_each_entity_safe(entity, parent) \

4227

++	for (; entity && ({ parent = entity->parent; 1; }); entity = parent)

4228

++

4229

++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,

4230

++						 int extract,

4231

++						 struct bfq_data *bfqd);

4232

++

4233

++static inline void bfq_update_budget(struct bfq_entity *next_active)

4234

++{

4235

++	struct bfq_entity *bfqg_entity;

4236

++	struct bfq_group *bfqg;

4237

++	struct bfq_sched_data *group_sd;

4238

++

4239

++	BUG_ON(next_active == NULL);

4240

++

4241

++	group_sd = next_active->sched_data;

4242

++

4243

++	bfqg = container_of(group_sd, struct bfq_group, sched_data);

4244

++	/*

4245

++	 * bfq_group's my_entity field is not NULL only if the group

4246

++	 * is not the root group. We must not touch the root entity

4247

++	 * as it must never become an active entity.

4248

++	 */

4249

++	bfqg_entity = bfqg->my_entity;

4250

++	if (bfqg_entity != NULL)

4251

++		bfqg_entity->budget = next_active->budget;

4252

++}

4253

++

4254

++static int bfq_update_next_active(struct bfq_sched_data *sd)

4255

++{

4256

++	struct bfq_entity *next_active;

4257

++

4258

++	if (sd->active_entity != NULL)

4259

++		/* will update/requeue at the end of service */

4260

++		return 0;

4261

++

4262

++	/*

4263

++	 * NOTE: this can be improved in many ways, such as returning

4264

++	 * 1 (and thus propagating upwards the update) only when the

4265

++	 * budget changes, or caching the bfqq that will be scheduled

4266

++	 * next from this subtree.  By now we worry more about

4267

++	 * correctness than about performance...

4268

++	 */

4269

++	next_active = bfq_lookup_next_entity(sd, 0, NULL);

4270

++	sd->next_active = next_active;

4271

++

4272

++	if (next_active != NULL)

4273

++		bfq_update_budget(next_active);

4274

++

4275

++	return 1;

4276

++}

4277

++

4278

++static inline void bfq_check_next_active(struct bfq_sched_data *sd,

4279

++					 struct bfq_entity *entity)

4280

++{

4281

++	BUG_ON(sd->next_active != entity);

4282

++}

4283

++#else

4284

++#define for_each_entity(entity)	\

4285

++	for (; entity != NULL; entity = NULL)

4286

++

4287

++#define for_each_entity_safe(entity, parent) \

4288

++	for (parent = NULL; entity != NULL; entity = parent)

4289

++

4290

++static inline int bfq_update_next_active(struct bfq_sched_data *sd)

4291

++{

4292

++	return 0;

4293

++}

4294

++

4295

++static inline void bfq_check_next_active(struct bfq_sched_data *sd,

4296

++					 struct bfq_entity *entity)

4297

++{

4298

++}

4299

++

4300

++static inline void bfq_update_budget(struct bfq_entity *next_active)

4301

++{

4302

++}

4303

++#endif

4304

++

4305

++/*

4306

++ * Shift for timestamp calculations.  This actually limits the maximum

4307

++ * service allowed in one timestamp delta (small shift values increase it),

4308

++ * the maximum total weight that can be used for the queues in the system

4309

++ * (big shift values increase it), and the period of virtual time wraparounds.

4310

++ */

4311

++#define WFQ_SERVICE_SHIFT	22

4312

++

4313

++/**

4314

++ * bfq_gt - compare two timestamps.

4315

++ * @a: first ts.

4316

++ * @b: second ts.

4317

++ *

4318

++ * Return @a > @b, dealing with wrapping correctly.

4319

++ */

4320

++static inline int bfq_gt(u64 a, u64 b)

4321

++{

4322

++	return (s64)(a - b) > 0;

4323

++}

4324

++

4325

++static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)

4326

++{

4327

++	struct bfq_queue *bfqq = NULL;

4328

++

4329

++	BUG_ON(entity == NULL);

4330

++

4331

++	if (entity->my_sched_data == NULL)

4332

++		bfqq = container_of(entity, struct bfq_queue, entity);

4333

++

4334

++	return bfqq;

4335

++}

4336

++

4337

++

4338

++/**

4339

++ * bfq_delta - map service into the virtual time domain.

4340

++ * @service: amount of service.

4341

++ * @weight: scale factor (weight of an entity or weight sum).

4342

++ */

4343

++static inline u64 bfq_delta(unsigned long service,

4344

++					unsigned long weight)

4345

++{

4346

++	u64 d = (u64)service << WFQ_SERVICE_SHIFT;

4347

++

4348

++	do_div(d, weight);

4349

++	return d;

4350

++}

4351

++

4352

++/**

4353

++ * bfq_calc_finish - assign the finish time to an entity.

4354

++ * @entity: the entity to act upon.

4355

++ * @service: the service to be charged to the entity.

4356

++ */

4357

++static inline void bfq_calc_finish(struct bfq_entity *entity,

4358

++				   unsigned long service)

4359

++{

4360

++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

4361

++

4362

++	BUG_ON(entity->weight == 0);

4363

++

4364

++	entity->finish = entity->start +

4365

++		bfq_delta(service, entity->weight);

4366

++

4367

++	if (bfqq != NULL) {

4368

++		bfq_log_bfqq(bfqq->bfqd, bfqq,

4369

++			"calc_finish: serv %lu, w %d",

4370

++			service, entity->weight);

4371

++		bfq_log_bfqq(bfqq->bfqd, bfqq,

4372

++			"calc_finish: start %llu, finish %llu, delta %llu",

4373

++			entity->start, entity->finish,

4374

++			bfq_delta(service, entity->weight));

4375

++	}

4376

++}

4377

++

4378

++/**

4379

++ * bfq_entity_of - get an entity from a node.

4380

++ * @node: the node field of the entity.

4381

++ *

4382

++ * Convert a node pointer to the relative entity.  This is used only

4383

++ * to simplify the logic of some functions and not as the generic

4384

++ * conversion mechanism because, e.g., in the tree walking functions,

4385

++ * the check for a %NULL value would be redundant.

4386

++ */

4387

++static inline struct bfq_entity *bfq_entity_of(struct rb_node *node)

4388

++{

4389

++	struct bfq_entity *entity = NULL;

4390

++

4391

++	if (node != NULL)

4392

++		entity = rb_entry(node, struct bfq_entity, rb_node);

4393

++

4394

++	return entity;

4395

++}

4396

++

4397

++/**

4398

++ * bfq_extract - remove an entity from a tree.

4399

++ * @root: the tree root.

4400

++ * @entity: the entity to remove.

4401

++ */

4402

++static inline void bfq_extract(struct rb_root *root,

4403

++			       struct bfq_entity *entity)

4404

++{

4405

++	BUG_ON(entity->tree != root);

4406

++

4407

++	entity->tree = NULL;

4408

++	rb_erase(&entity->rb_node, root);

4409

++}

4410

++

4411

++/**

4412

++ * bfq_idle_extract - extract an entity from the idle tree.

4413

++ * @st: the service tree of the owning @entity.

4414

++ * @entity: the entity being removed.

4415

++ */

4416

++static void bfq_idle_extract(struct bfq_service_tree *st,

4417

++			     struct bfq_entity *entity)

4418

++{

4419

++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

4420

++	struct rb_node *next;

4421

++

4422

++	BUG_ON(entity->tree != &st->idle);

4423

++

4424

++	if (entity == st->first_idle) {

4425

++		next = rb_next(&entity->rb_node);

4426

++		st->first_idle = bfq_entity_of(next);

4427

++	}

4428

++

4429

++	if (entity == st->last_idle) {

4430

++		next = rb_prev(&entity->rb_node);

4431

++		st->last_idle = bfq_entity_of(next);

4432

++	}

4433

++

4434

++	bfq_extract(&st->idle, entity);

4435

++

4436

++	if (bfqq != NULL)

4437

++		list_del(&bfqq->bfqq_list);

4438

++}

4439

++

4440

++/**

4441

++ * bfq_insert - generic tree insertion.

4442

++ * @root: tree root.

4443

++ * @entity: entity to insert.

4444

++ *

4445

++ * This is used for the idle and the active tree, since they are both

4446

++ * ordered by finish time.

4447

++ */

4448

++static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)

4449

++{

4450

++	struct bfq_entity *entry;

4451

++	struct rb_node **node = &root->rb_node;

4452

++	struct rb_node *parent = NULL;

4453

++

4454

++	BUG_ON(entity->tree != NULL);

4455

++

4456

++	while (*node != NULL) {

4457

++		parent = *node;

4458

++		entry = rb_entry(parent, struct bfq_entity, rb_node);

4459

++

4460

++		if (bfq_gt(entry->finish, entity->finish))

4461

++			node = &parent->rb_left;

4462

++		else

4463

++			node = &parent->rb_right;

4464

++	}

4465

++

4466

++	rb_link_node(&entity->rb_node, parent, node);

4467

++	rb_insert_color(&entity->rb_node, root);

4468

++

4469

++	entity->tree = root;

4470

++}

4471

++

4472

++/**

4473

++ * bfq_update_min - update the min_start field of a entity.

4474

++ * @entity: the entity to update.

4475

++ * @node: one of its children.

4476

++ *

4477

++ * This function is called when @entity may store an invalid value for

4478

++ * min_start due to updates to the active tree.  The function  assumes

4479

++ * that the subtree rooted at @node (which may be its left or its right

4480

++ * child) has a valid min_start value.

4481

++ */

4482

++static inline void bfq_update_min(struct bfq_entity *entity,

4483

++				  struct rb_node *node)

4484

++{

4485

++	struct bfq_entity *child;

4486

++

4487

++	if (node != NULL) {

4488

++		child = rb_entry(node, struct bfq_entity, rb_node);

4489

++		if (bfq_gt(entity->min_start, child->min_start))

4490

++			entity->min_start = child->min_start;

4491

++	}

4492

++}

4493

++

4494

++/**

4495

++ * bfq_update_active_node - recalculate min_start.

4496

++ * @node: the node to update.

4497

++ *

4498

++ * @node may have changed position or one of its children may have moved,

4499

++ * this function updates its min_start value.  The left and right subtrees

4500

++ * are assumed to hold a correct min_start value.

4501

++ */

4502

++static inline void bfq_update_active_node(struct rb_node *node)

4503

++{

4504

++	struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node);

4505

++

4506

++	entity->min_start = entity->start;

4507

++	bfq_update_min(entity, node->rb_right);

4508

++	bfq_update_min(entity, node->rb_left);

4509

++}

4510

++

4511

++/**

4512

++ * bfq_update_active_tree - update min_start for the whole active tree.

4513

++ * @node: the starting node.

4514

++ *

4515

++ * @node must be the deepest modified node after an update.  This function

4516

++ * updates its min_start using the values held by its children, assuming

4517

++ * that they did not change, and then updates all the nodes that may have

4518

++ * changed in the path to the root.  The only nodes that may have changed

4519

++ * are the ones in the path or their siblings.

4520

++ */

4521

++static void bfq_update_active_tree(struct rb_node *node)

4522

++{

4523

++	struct rb_node *parent;

4524

++

4525

++up:

4526

++	bfq_update_active_node(node);

4527

++

4528

++	parent = rb_parent(node);

4529

++	if (parent == NULL)

4530

++		return;

4531

++

4532

++	if (node == parent->rb_left && parent->rb_right != NULL)

4533

++		bfq_update_active_node(parent->rb_right);

4534

++	else if (parent->rb_left != NULL)

4535

++		bfq_update_active_node(parent->rb_left);

4536

++

4537

++	node = parent;

4538

++	goto up;

4539

++}

4540

++

4541

++/**

4542

++ * bfq_active_insert - insert an entity in the active tree of its group/device.

4543

++ * @st: the service tree of the entity.

4544

++ * @entity: the entity being inserted.

4545

++ *

4546

++ * The active tree is ordered by finish time, but an extra key is kept

4547

++ * per each node, containing the minimum value for the start times of

4548

++ * its children (and the node itself), so it's possible to search for

4549

++ * the eligible node with the lowest finish time in logarithmic time.

4550

++ */

4551

++static void bfq_active_insert(struct bfq_service_tree *st,

4552

++			      struct bfq_entity *entity)

4553

++{

4554

++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

4555

++	struct rb_node *node = &entity->rb_node;

4556

++

4557

++	bfq_insert(&st->active, entity);

4558

++

4559

++	if (node->rb_left != NULL)

4560

++		node = node->rb_left;

4561

++	else if (node->rb_right != NULL)

4562

++		node = node->rb_right;

4563

++

4564

++	bfq_update_active_tree(node);

4565

++

4566

++	if (bfqq != NULL)

4567

++		list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);

4568

++}

4569

++

4570

++/**

4571

++ * bfq_ioprio_to_weight - calc a weight from an ioprio.

4572

++ * @ioprio: the ioprio value to convert.

4573

++ */

4574

++static unsigned short bfq_ioprio_to_weight(int ioprio)

4575

++{

4576

++	WARN_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);

4577

++	return IOPRIO_BE_NR - ioprio;

4578

++}

4579

++

4580

++/**

4581

++ * bfq_weight_to_ioprio - calc an ioprio from a weight.

4582

++ * @weight: the weight value to convert.

4583

++ *

4584

++ * To preserve as mush as possible the old only-ioprio user interface,

4585

++ * 0 is used as an escape ioprio value for weights (numerically) equal or

4586

++ * larger than IOPRIO_BE_NR

4587

++ */

4588

++static unsigned short bfq_weight_to_ioprio(int weight)

4589

++{

4590

++	WARN_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT);

4591

++	return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight;

4592

++}

4593

++

4594

++static inline void bfq_get_entity(struct bfq_entity *entity)

4595

++{

4596

++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

4597

++	struct bfq_sched_data *sd;

4598

++

4599

++	if (bfqq != NULL) {

4600

++		sd = entity->sched_data;

4601

++		atomic_inc(&bfqq->ref);

4602

++		bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",

4603

++			     bfqq, atomic_read(&bfqq->ref));

4604

++	}

4605

++}

4606

++

4607

++/**

4608

++ * bfq_find_deepest - find the deepest node that an extraction can modify.

4609

++ * @node: the node being removed.

4610

++ *

4611

++ * Do the first step of an extraction in an rb tree, looking for the

4612

++ * node that will replace @node, and returning the deepest node that

4613

++ * the following modifications to the tree can touch.  If @node is the

4614

++ * last node in the tree return %NULL.

4615

++ */

4616

++static struct rb_node *bfq_find_deepest(struct rb_node *node)

4617

++{

4618

++	struct rb_node *deepest;

4619

++

4620

++	if (node->rb_right == NULL && node->rb_left == NULL)

4621

++		deepest = rb_parent(node);

4622

++	else if (node->rb_right == NULL)

4623

++		deepest = node->rb_left;

4624

++	else if (node->rb_left == NULL)

4625

++		deepest = node->rb_right;

4626

++	else {

4627

++		deepest = rb_next(node);

4628

++		if (deepest->rb_right != NULL)

4629

++			deepest = deepest->rb_right;

4630

++		else if (rb_parent(deepest) != node)

4631

++			deepest = rb_parent(deepest);

4632

++	}

4633

++

4634

++	return deepest;

4635

++}

4636

++

4637

++/**

4638

++ * bfq_active_extract - remove an entity from the active tree.

4639

++ * @st: the service_tree containing the tree.

4640

++ * @entity: the entity being removed.

4641

++ */

4642

++static void bfq_active_extract(struct bfq_service_tree *st,

4643

++			       struct bfq_entity *entity)

4644

++{

4645

++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

4646

++	struct rb_node *node;

4647

++

4648

++	node = bfq_find_deepest(&entity->rb_node);

4649

++	bfq_extract(&st->active, entity);

4650

++

4651

++	if (node != NULL)

4652

++		bfq_update_active_tree(node);

4653

++

4654

++	if (bfqq != NULL)

4655

++		list_del(&bfqq->bfqq_list);

4656

++}

4657

++

4658

++/**

4659

++ * bfq_idle_insert - insert an entity into the idle tree.

4660

++ * @st: the service tree containing the tree.

4661

++ * @entity: the entity to insert.

4662

++ */

4663

++static void bfq_idle_insert(struct bfq_service_tree *st,

4664

++			    struct bfq_entity *entity)

4665

++{

4666

++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

4667

++	struct bfq_entity *first_idle = st->first_idle;

4668

++	struct bfq_entity *last_idle = st->last_idle;

4669

++

4670

++	if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish))

4671

++		st->first_idle = entity;

4672

++	if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish))

4673

++		st->last_idle = entity;

4674

++

4675

++	bfq_insert(&st->idle, entity);

4676

++

4677

++	if (bfqq != NULL)

4678

++		list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list);

4679

++}

4680

++

4681

++/**

4682

++ * bfq_forget_entity - remove an entity from the wfq trees.

4683

++ * @st: the service tree.

4684

++ * @entity: the entity being removed.

4685

++ *

4686

++ * Update the device status and forget everything about @entity, putting

4687

++ * the device reference to it, if it is a queue.  Entities belonging to

4688

++ * groups are not refcounted.

4689

++ */

4690

++static void bfq_forget_entity(struct bfq_service_tree *st,

4691

++			      struct bfq_entity *entity)

4692

++{

4693

++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

4694

++	struct bfq_sched_data *sd;

4695

++

4696

++	BUG_ON(!entity->on_st);

4697

++

4698

++	entity->on_st = 0;

4699

++	st->wsum -= entity->weight;

4700

++	if (bfqq != NULL) {

4701

++		sd = entity->sched_data;

4702

++		bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d",

4703

++			     bfqq, atomic_read(&bfqq->ref));

4704

++		bfq_put_queue(bfqq);

4705

++	}

4706

++}

4707

++

4708

++/**

4709

++ * bfq_put_idle_entity - release the idle tree ref of an entity.

4710

++ * @st: service tree for the entity.

4711

++ * @entity: the entity being released.

4712

++ */

4713

++static void bfq_put_idle_entity(struct bfq_service_tree *st,

4714

++				struct bfq_entity *entity)

4715

++{

4716

++	bfq_idle_extract(st, entity);

4717

++	bfq_forget_entity(st, entity);

4718

++}

4719

++

4720

++/**

4721

++ * bfq_forget_idle - update the idle tree if necessary.

4722

++ * @st: the service tree to act upon.

4723

++ *

4724

++ * To preserve the global O(log N) complexity we only remove one entry here;

4725

++ * as the idle tree will not grow indefinitely this can be done safely.

4726

++ */

4727

++static void bfq_forget_idle(struct bfq_service_tree *st)

4728

++{

4729

++	struct bfq_entity *first_idle = st->first_idle;

4730

++	struct bfq_entity *last_idle = st->last_idle;

4731

++

4732

++	if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL &&

4733

++	    !bfq_gt(last_idle->finish, st->vtime)) {

4734

++		/*

4735

++		 * Forget the whole idle tree, increasing the vtime past

4736

++		 * the last finish time of idle entities.

4737

++		 */

4738

++		st->vtime = last_idle->finish;

4739

++	}

4740

++

4741

++	if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime))

4742

++		bfq_put_idle_entity(st, first_idle);

4743

++}

4744

++

4745

++static struct bfq_service_tree *

4746

++__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,

4747

++			 struct bfq_entity *entity)

4748

++{

4749

++	struct bfq_service_tree *new_st = old_st;

4750

++

4751

++	if (entity->ioprio_changed) {

4752

++		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);

4753

++

4754

++		BUG_ON(old_st->wsum < entity->weight);

4755

++		old_st->wsum -= entity->weight;

4756

++

4757

++		if (entity->new_weight != entity->orig_weight) {

4758

++			entity->orig_weight = entity->new_weight;

4759

++			entity->ioprio =

4760

++				bfq_weight_to_ioprio(entity->orig_weight);

4761

++		} else if (entity->new_ioprio != entity->ioprio) {

4762

++			entity->ioprio = entity->new_ioprio;

4763

++			entity->orig_weight =

4764

++					bfq_ioprio_to_weight(entity->ioprio);

4765

++		} else

4766

++			entity->new_weight = entity->orig_weight =

4767

++				bfq_ioprio_to_weight(entity->ioprio);

4768

++

4769

++		entity->ioprio_class = entity->new_ioprio_class;

4770

++		entity->ioprio_changed = 0;

4771

++

4772

++		/*

4773

++		 * NOTE: here we may be changing the weight too early,

4774

++		 * this will cause unfairness.  The correct approach

4775

++		 * would have required additional complexity to defer

4776

++		 * weight changes to the proper time instants (i.e.,

4777

++		 * when entity->finish <= old_st->vtime).

4778

++		 */

4779

++		new_st = bfq_entity_service_tree(entity);

4780

++		entity->weight = entity->orig_weight *

4781

++			(bfqq != NULL ? bfqq->raising_coeff : 1);

4782

++		new_st->wsum += entity->weight;

4783

++

4784

++		if (new_st != old_st)

4785

++			entity->start = new_st->vtime;

4786

++	}

4787

++

4788

++	return new_st;

4789

++}

4790

++

4791

++/**

4792

++ * bfq_bfqq_served - update the scheduler status after selection for service.

4793

++ * @bfqq: the queue being served.

4794

++ * @served: bytes to transfer.

4795

++ *

4796

++ * NOTE: this can be optimized, as the timestamps of upper level entities

4797

++ * are synchronized every time a new bfqq is selected for service.  By now,

4798

++ * we keep it to better check consistency.

4799

++ */

4800

++static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served)

4801

++{

4802

++	struct bfq_entity *entity = &bfqq->entity;

4803

++	struct bfq_service_tree *st;

4804

++

4805

++	for_each_entity(entity) {

4806

++		st = bfq_entity_service_tree(entity);

4807

++

4808

++		entity->service += served;

4809

++		BUG_ON(entity->service > entity->budget);

4810

++		BUG_ON(st->wsum == 0);

4811

++

4812

++		st->vtime += bfq_delta(served, st->wsum);

4813

++		bfq_forget_idle(st);

4814

++	}

4815

++	bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served);

4816

++}

4817

++

4818

++/**

4819

++ * bfq_bfqq_charge_full_budget - set the service to the entity budget.

4820

++ * @bfqq: the queue that needs a service update.

4821

++ *

4822

++ * When it's not possible to be fair in the service domain, because

4823

++ * a queue is not consuming its budget fast enough (the meaning of

4824

++ * fast depends on the timeout parameter), we charge it a full

4825

++ * budget.  In this way we should obtain a sort of time-domain

4826

++ * fairness among all the seeky/slow queues.

4827

++ */

4828

++static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)

4829

++{

4830

++	struct bfq_entity *entity = &bfqq->entity;

4831

++

4832

++	bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget");

4833

++

4834

++	bfq_bfqq_served(bfqq, entity->budget - entity->service);

4835

++}

4836

++

4837

++/**

4838

++ * __bfq_activate_entity - activate an entity.

4839

++ * @entity: the entity being activated.

4840

++ *

4841

++ * Called whenever an entity is activated, i.e., it is not active and one

4842

++ * of its children receives a new request, or has to be reactivated due to

4843

++ * budget exhaustion.  It uses the current budget of the entity (and the

4844

++ * service received if @entity is active) of the queue to calculate its

4845

++ * timestamps.

4846

++ */

4847

++static void __bfq_activate_entity(struct bfq_entity *entity)

4848

++{

4849

++	struct bfq_sched_data *sd = entity->sched_data;

4850

++	struct bfq_service_tree *st = bfq_entity_service_tree(entity);

4851

++

4852

++	if (entity == sd->active_entity) {

4853

++		BUG_ON(entity->tree != NULL);

4854

++		/*

4855

++		 * If we are requeueing the current entity we have

4856

++		 * to take care of not charging to it service it has

4857

++		 * not received.

4858

++		 */

4859

++		bfq_calc_finish(entity, entity->service);

4860

++		entity->start = entity->finish;

4861

++		sd->active_entity = NULL;

4862

++	} else if (entity->tree == &st->active) {

4863

++		/*

4864

++		 * Requeueing an entity due to a change of some

4865

++		 * next_active entity below it.  We reuse the old

4866

++		 * start time.

4867

++		 */

4868

++		bfq_active_extract(st, entity);

4869

++	} else if (entity->tree == &st->idle) {

4870

++		/*

4871

++		 * Must be on the idle tree, bfq_idle_extract() will

4872

++		 * check for that.

4873

++		 */

4874

++		bfq_idle_extract(st, entity);

4875

++		entity->start = bfq_gt(st->vtime, entity->finish) ?

4876

++				       st->vtime : entity->finish;

4877

++	} else {

4878

++		/*

4879

++		 * The finish time of the entity may be invalid, and

4880

++		 * it is in the past for sure, otherwise the queue

4881

++		 * would have been on the idle tree.

4882

++		 */

4883

++		entity->start = st->vtime;

4884

++		st->wsum += entity->weight;

4885

++		bfq_get_entity(entity);

4886

++

4887

++		BUG_ON(entity->on_st);

4888

++		entity->on_st = 1;

4889

++	}

4890

++

4891

++	st = __bfq_entity_update_weight_prio(st, entity);

4892

++	bfq_calc_finish(entity, entity->budget);

4893

++	bfq_active_insert(st, entity);

4894

++}

4895

++

4896

++/**

4897

++ * bfq_activate_entity - activate an entity and its ancestors if necessary.

4898

++ * @entity: the entity to activate.

4899

++ *

4900

++ * Activate @entity and all the entities on the path from it to the root.

4901

++ */

4902

++static void bfq_activate_entity(struct bfq_entity *entity)

4903

++{

4904

++	struct bfq_sched_data *sd;

4905

++

4906

++	for_each_entity(entity) {

4907

++		__bfq_activate_entity(entity);

4908

++

4909

++		sd = entity->sched_data;

4910

++		if (!bfq_update_next_active(sd))

4911

++			/*

4912

++			 * No need to propagate the activation to the

4913

++			 * upper entities, as they will be updated when

4914

++			 * the active entity is rescheduled.

4915

++			 */

4916

++			break;

4917

++	}

4918

++}

4919

++

4920

++/**

4921

++ * __bfq_deactivate_entity - deactivate an entity from its service tree.

4922

++ * @entity: the entity to deactivate.

4923

++ * @requeue: if false, the entity will not be put into the idle tree.

4924

++ *

4925

++ * Deactivate an entity, independently from its previous state.  If the

4926

++ * entity was not on a service tree just return, otherwise if it is on

4927

++ * any scheduler tree, extract it from that tree, and if necessary

4928

++ * and if the caller did not specify @requeue, put it on the idle tree.

4929

++ *

4930

++ * Return %1 if the caller should update the entity hierarchy, i.e.,

4931

++ * if the entity was under service or if it was the next_active for

4932

++ * its sched_data; return %0 otherwise.

4933

++ */

4934

++static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)

4935

++{

4936

++	struct bfq_sched_data *sd = entity->sched_data;

4937

++	struct bfq_service_tree *st = bfq_entity_service_tree(entity);

4938

++	int was_active = entity == sd->active_entity;

4939

++	int ret = 0;

4940

++

4941

++	if (!entity->on_st)

4942

++		return 0;

4943

++

4944

++	BUG_ON(was_active && entity->tree != NULL);

4945

++

4946

++	if (was_active) {

4947

++		bfq_calc_finish(entity, entity->service);

4948

++		sd->active_entity = NULL;

4949

++	} else if (entity->tree == &st->active)

4950

++		bfq_active_extract(st, entity);

4951

++	else if (entity->tree == &st->idle)

4952

++		bfq_idle_extract(st, entity);

4953

++	else if (entity->tree != NULL)

4954

++		BUG();

4955

++

4956

++	if (was_active || sd->next_active == entity)

4957

++		ret = bfq_update_next_active(sd);

4958

++

4959

++	if (!requeue || !bfq_gt(entity->finish, st->vtime))

4960

++		bfq_forget_entity(st, entity);

4961

++	else

4962

++		bfq_idle_insert(st, entity);

4963

++

4964

++	BUG_ON(sd->active_entity == entity);

4965

++	BUG_ON(sd->next_active == entity);

4966

++

4967

++	return ret;

4968

++}

4969

++

4970

++/**

4971

++ * bfq_deactivate_entity - deactivate an entity.

4972

++ * @entity: the entity to deactivate.

4973

++ * @requeue: true if the entity can be put on the idle tree

4974

++ */

4975

++static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)

4976

++{

4977

++	struct bfq_sched_data *sd;

4978

++	struct bfq_entity *parent;

4979

++

4980

++	for_each_entity_safe(entity, parent) {

4981

++		sd = entity->sched_data;

4982

++

4983

++		if (!__bfq_deactivate_entity(entity, requeue))

4984

++			/*

4985

++			 * The parent entity is still backlogged, and

4986

++			 * we don't need to update it as it is still

4987

++			 * under service.

4988

++			 */

4989

++			break;

4990

++

4991

++		if (sd->next_active != NULL)

4992

++			/*

4993

++			 * The parent entity is still backlogged and

4994

++			 * the budgets on the path towards the root

4995

++			 * need to be updated.

4996

++			 */

4997

++			goto update;

4998

++

4999

++		/*

5000

++		 * If we reach there the parent is no more backlogged and

5001

++		 * we want to propagate the dequeue upwards.

5002

++		 */

5003

++		requeue = 1;

5004

++	}

5005

++

5006

++	return;

5007

++

5008

++update:

5009

++	entity = parent;

5010

++	for_each_entity(entity) {

5011

++		__bfq_activate_entity(entity);

5012

++

5013

++		sd = entity->sched_data;

5014

++		if (!bfq_update_next_active(sd))

5015

++			break;

5016

++	}

5017

++}

5018

++

5019

++/**

5020

++ * bfq_update_vtime - update vtime if necessary.

5021

++ * @st: the service tree to act upon.

5022

++ *

5023

++ * If necessary update the service tree vtime to have at least one

5024

++ * eligible entity, skipping to its start time.  Assumes that the

5025

++ * active tree of the device is not empty.

5026

++ *

5027

++ * NOTE: this hierarchical implementation updates vtimes quite often,

5028

++ * we may end up with reactivated tasks getting timestamps after a

5029

++ * vtime skip done because we needed a ->first_active entity on some

5030

++ * intermediate node.

5031

++ */

5032

++static void bfq_update_vtime(struct bfq_service_tree *st)

5033

++{

5034

++	struct bfq_entity *entry;

5035

++	struct rb_node *node = st->active.rb_node;

5036

++

5037

++	entry = rb_entry(node, struct bfq_entity, rb_node);

5038

++	if (bfq_gt(entry->min_start, st->vtime)) {

5039

++		st->vtime = entry->min_start;

5040

++		bfq_forget_idle(st);

5041

++	}

5042

++}

5043

++

5044

++/**

5045

++ * bfq_first_active - find the eligible entity with the smallest finish time

5046

++ * @st: the service tree to select from.

5047

++ *

5048

++ * This function searches the first schedulable entity, starting from the

5049

++ * root of the tree and going on the left every time on this side there is

5050

++ * a subtree with at least one eligible (start >= vtime) entity.  The path

5051

++ * on the right is followed only if a) the left subtree contains no eligible

5052

++ * entities and b) no eligible entity has been found yet.

5053

++ */

5054

++static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)

5055

++{

5056

++	struct bfq_entity *entry, *first = NULL;

5057

++	struct rb_node *node = st->active.rb_node;

5058

++

5059

++	while (node != NULL) {

5060

++		entry = rb_entry(node, struct bfq_entity, rb_node);

5061

++left:

5062

++		if (!bfq_gt(entry->start, st->vtime))

5063

++			first = entry;

5064

++

5065

++		BUG_ON(bfq_gt(entry->min_start, st->vtime));

5066

++

5067

++		if (node->rb_left != NULL) {

5068

++			entry = rb_entry(node->rb_left,

5069

++					 struct bfq_entity, rb_node);

5070

++			if (!bfq_gt(entry->min_start, st->vtime)) {

5071

++				node = node->rb_left;

5072

++				goto left;

5073

++			}

5074

++		}

5075

++		if (first != NULL)

5076

++			break;

5077

++		node = node->rb_right;

5078

++	}

5079

++

5080

++	BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active));

5081

++	return first;

5082

++}

5083

++

5084

++/**

5085

++ * __bfq_lookup_next_entity - return the first eligible entity in @st.

5086

++ * @st: the service tree.

5087

++ *

5088

++ * Update the virtual time in @st and return the first eligible entity

5089

++ * it contains.

5090

++ */

5091

++static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st,

5092

++						   bool force)

5093

++{

5094

++	struct bfq_entity *entity, *new_next_active = NULL;

5095

++

5096

++	if (RB_EMPTY_ROOT(&st->active))

5097

++		return NULL;

5098

++

5099

++	bfq_update_vtime(st);

5100

++	entity = bfq_first_active_entity(st);

5101

++	BUG_ON(bfq_gt(entity->start, st->vtime));

5102

++

5103

++	/*

5104

++	 * If the chosen entity does not match with the sched_data's

5105

++	 * next_active and we are forcedly serving the IDLE priority

5106

++	 * class tree, bubble up budget update.

5107

++	 */

5108

++	if (unlikely(force && entity != entity->sched_data->next_active)) {

5109

++		new_next_active = entity;

5110

++		for_each_entity(new_next_active)

5111

++			bfq_update_budget(new_next_active);

5112

++	}

5113

++

5114

++	return entity;

5115

++}

5116

++

5117

++/**

5118

++ * bfq_lookup_next_entity - return the first eligible entity in @sd.

5119

++ * @sd: the sched_data.

5120

++ * @extract: if true the returned entity will be also extracted from @sd.

5121

++ *

5122

++ * NOTE: since we cache the next_active entity at each level of the

5123

++ * hierarchy, the complexity of the lookup can be decreased with

5124

++ * absolutely no effort just returning the cached next_active value;

5125

++ * we prefer to do full lookups to test the consistency of * the data

5126

++ * structures.

5127

++ */

5128

++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,

5129

++						 int extract,

5130

++						 struct bfq_data *bfqd)

5131

++{

5132

++	struct bfq_service_tree *st = sd->service_tree;

5133

++	struct bfq_entity *entity;

5134

++	int i=0;

5135

++

5136

++	BUG_ON(sd->active_entity != NULL);

5137

++

5138

++	if (bfqd != NULL &&

5139

++	    jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) {

5140

++		entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, true);

5141

++		if (entity != NULL) {

5142

++			i = BFQ_IOPRIO_CLASSES - 1;

5143

++			bfqd->bfq_class_idle_last_service = jiffies;

5144

++			sd->next_active = entity;

5145

++		}

5146

++	}

5147

++	for (; i < BFQ_IOPRIO_CLASSES; i++) {

5148

++		entity = __bfq_lookup_next_entity(st + i, false);

5149

++		if (entity != NULL) {

5150

++			if (extract) {

5151

++				bfq_check_next_active(sd, entity);

5152

++				bfq_active_extract(st + i, entity);

5153

++				sd->active_entity = entity;

5154

++				sd->next_active = NULL;

5155

++			}

5156

++			break;

5157

++		}

5158

++	}

5159

++

5160

++	return entity;

5161

++}

5162

++

5163

++/*

5164

++ * Get next queue for service.

5165

++ */

5166

++static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)

5167

++{

5168

++	struct bfq_entity *entity = NULL;

5169

++	struct bfq_sched_data *sd;

5170

++	struct bfq_queue *bfqq;

5171

++

5172

++	BUG_ON(bfqd->active_queue != NULL);

5173

++

5174

++	if (bfqd->busy_queues == 0)

5175

++		return NULL;

5176

++

5177

++	sd = &bfqd->root_group->sched_data;

5178

++	for (; sd != NULL; sd = entity->my_sched_data) {

5179

++		entity = bfq_lookup_next_entity(sd, 1, bfqd);

5180

++		BUG_ON(entity == NULL);

5181

++		entity->service = 0;

5182

++	}

5183

++

5184

++	bfqq = bfq_entity_to_bfqq(entity);

5185

++	BUG_ON(bfqq == NULL);

5186

++

5187

++	return bfqq;

5188

++}

5189

++

5190

++/*

5191

++ * Forced extraction of the given queue.

5192

++ */

5193

++static void bfq_get_next_queue_forced(struct bfq_data *bfqd,

5194

++				      struct bfq_queue *bfqq)

5195

++{

5196

++	struct bfq_entity *entity;

5197

++	struct bfq_sched_data *sd;

5198

++

5199

++	BUG_ON(bfqd->active_queue != NULL);

5200

++

5201

++	entity = &bfqq->entity;

5202

++	/*

5203

++	 * Bubble up extraction/update from the leaf to the root.

5204

++	*/

5205

++	for_each_entity(entity) {

5206

++		sd = entity->sched_data;

5207

++		bfq_update_budget(entity);

5208

++		bfq_update_vtime(bfq_entity_service_tree(entity));

5209

++		bfq_active_extract(bfq_entity_service_tree(entity), entity);

5210

++		sd->active_entity = entity;

5211

++		sd->next_active = NULL;

5212

++		entity->service = 0;

5213

++	}

5214

++

5215

++	return;

5216

++}

5217

++

5218

++static void __bfq_bfqd_reset_active(struct bfq_data *bfqd)

5219

++{

5220

++	if (bfqd->active_bic != NULL) {

5221

++		put_io_context(bfqd->active_bic->icq.ioc);

5222

++		bfqd->active_bic = NULL;

5223

++	}

5224

++

5225

++	bfqd->active_queue = NULL;

5226

++	del_timer(&bfqd->idle_slice_timer);

5227

++}

5228

++

5229

++static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,

5230

++				int requeue)

5231

++{

5232

++	struct bfq_entity *entity = &bfqq->entity;

5233

++

5234

++	if (bfqq == bfqd->active_queue)

5235

++		__bfq_bfqd_reset_active(bfqd);

5236

++

5237

++	bfq_deactivate_entity(entity, requeue);

5238

++}

5239

++

5240

++static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)

5241

++{

5242

++	struct bfq_entity *entity = &bfqq->entity;

5243

++

5244

++	bfq_activate_entity(entity);

5245

++}

5246

++

5247

++/*

5248

++ * Called when the bfqq no longer has requests pending, remove it from

5249

++ * the service tree.

5250

++ */

5251

++static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,

5252

++			      int requeue)

5253

++{

5254

++	BUG_ON(!bfq_bfqq_busy(bfqq));

5255

++	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));

5256

++

5257

++	bfq_log_bfqq(bfqd, bfqq, "del from busy");

5258

++

5259

++	bfq_clear_bfqq_busy(bfqq);

5260

++

5261

++	BUG_ON(bfqd->busy_queues == 0);

5262

++	bfqd->busy_queues--;

5263

++

5264

++	bfq_deactivate_bfqq(bfqd, bfqq, requeue);

5265

++}

5266

++

5267

++/*

5268

++ * Called when an inactive queue receives a new request.

5269

++ */

5270

++static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)

5271

++{

5272

++	BUG_ON(bfq_bfqq_busy(bfqq));

5273

++	BUG_ON(bfqq == bfqd->active_queue);

5274

++

5275

++	bfq_log_bfqq(bfqd, bfqq, "add to busy");

5276

++

5277

++	bfq_activate_bfqq(bfqd, bfqq);

5278

++

5279

++	bfq_mark_bfqq_busy(bfqq);

5280

++	bfqd->busy_queues++;

5281

++}

5282

+diff --git a/block/bfq.h b/block/bfq.h

5283

+new file mode 100644

5284

+index 0000000..b4e9ab0

5285

+--- /dev/null

5286

++++ b/block/bfq.h

5287

+@@ -0,0 +1,603 @@

5288

++/*

5289

++ * BFQ-v6r2 for 3.9.0: data structures and common functions prototypes.

5290

++ *

5291

++ * Based on ideas and code from CFQ:

5292

++ * Copyright (C) 2003 Jens Axboe <axboe@××××××.dk>

5293

++ *

5294

++ * Copyright (C) 2008 Fabio Checconi <fabio@×××××××××××××.it>

5295

++ *		      Paolo Valente <paolo.valente@×××××××.it>

5296

++ *

5297

++ * Copyright (C) 2010 Paolo Valente <paolo.valente@×××××××.it>

5298

++ */

5299

++

5300

++#ifndef _BFQ_H

5301

++#define _BFQ_H

5302

++

5303

++#include <linux/blktrace_api.h>

5304

++#include <linux/hrtimer.h>

5305

++#include <linux/ioprio.h>

5306

++#include <linux/rbtree.h>

5307

++

5308

++#define BFQ_IOPRIO_CLASSES	3

5309

++#define BFQ_CL_IDLE_TIMEOUT	HZ/5

5310

++

5311

++#define BFQ_MIN_WEIGHT	1

5312

++#define BFQ_MAX_WEIGHT	1000

5313

++

5314

++#define BFQ_DEFAULT_GRP_WEIGHT	10

5315

++#define BFQ_DEFAULT_GRP_IOPRIO	0

5316

++#define BFQ_DEFAULT_GRP_CLASS	IOPRIO_CLASS_BE

5317

++

5318

++struct bfq_entity;

5319

++

5320

++/**

5321

++ * struct bfq_service_tree - per ioprio_class service tree.

5322

++ * @active: tree for active entities (i.e., those backlogged).

5323

++ * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i).

5324

++ * @first_idle: idle entity with minimum F_i.

5325

++ * @last_idle: idle entity with maximum F_i.

5326

++ * @vtime: scheduler virtual time.

5327

++ * @wsum: scheduler weight sum; active and idle entities contribute to it.

5328

++ *

5329

++ * Each service tree represents a B-WF2Q+ scheduler on its own.  Each

5330

++ * ioprio_class has its own independent scheduler, and so its own

5331

++ * bfq_service_tree.  All the fields are protected by the queue lock

5332

++ * of the containing bfqd.

5333

++ */

5334

++struct bfq_service_tree {

5335

++	struct rb_root active;

5336

++	struct rb_root idle;

5337

++

5338

++	struct bfq_entity *first_idle;

5339

++	struct bfq_entity *last_idle;

5340

++

5341

++	u64 vtime;

5342

++	unsigned long wsum;

5343

++};

5344

++

5345

++/**

5346

++ * struct bfq_sched_data - multi-class scheduler.

5347

++ * @active_entity: entity under service.

5348

++ * @next_active: head-of-the-line entity in the scheduler.

5349

++ * @service_tree: array of service trees, one per ioprio_class.

5350

++ *

5351

++ * bfq_sched_data is the basic scheduler queue.  It supports three

5352

++ * ioprio_classes, and can be used either as a toplevel queue or as

5353

++ * an intermediate queue on a hierarchical setup.

5354

++ * @next_active points to the active entity of the sched_data service

5355

++ * trees that will be scheduled next.

5356

++ *

5357

++ * The supported ioprio_classes are the same as in CFQ, in descending

5358

++ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.

5359

++ * Requests from higher priority queues are served before all the

5360

++ * requests from lower priority queues; among requests of the same

5361

++ * queue requests are served according to B-WF2Q+.

5362

++ * All the fields are protected by the queue lock of the containing bfqd.

5363

++ */

5364

++struct bfq_sched_data {

5365

++	struct bfq_entity *active_entity;

5366

++	struct bfq_entity *next_active;

5367

++	struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];

5368

++};

5369

++

5370

++/**

5371

++ * struct bfq_entity - schedulable entity.

5372

++ * @rb_node: service_tree member.

5373

++ * @on_st: flag, true if the entity is on a tree (either the active or

5374

++ *         the idle one of its service_tree).

5375

++ * @finish: B-WF2Q+ finish timestamp (aka F_i).

5376

++ * @start: B-WF2Q+ start timestamp (aka S_i).

5377

++ * @tree: tree the entity is enqueued into; %NULL if not on a tree.

5378

++ * @min_start: minimum start time of the (active) subtree rooted at

5379

++ *             this entity; used for O(log N) lookups into active trees.

5380

++ * @service: service received during the last round of service.

5381

++ * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight.

5382

++ * @weight: weight of the queue

5383

++ * @parent: parent entity, for hierarchical scheduling.

5384

++ * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the

5385

++ *                 associated scheduler queue, %NULL on leaf nodes.

5386

++ * @sched_data: the scheduler queue this entity belongs to.

5387

++ * @ioprio: the ioprio in use.

5388

++ * @new_weight: when a weight change is requested, the new weight value.

5389

++ * @orig_weight: original weight, used to implement weight boosting

5390

++ * @new_ioprio: when an ioprio change is requested, the new ioprio value.

5391

++ * @ioprio_class: the ioprio_class in use.

5392

++ * @new_ioprio_class: when an ioprio_class change is requested, the new

5393

++ *                    ioprio_class value.

5394

++ * @ioprio_changed: flag, true when the user requested a weight, ioprio or

5395

++ *                  ioprio_class change.

5396

++ *

5397

++ * A bfq_entity is used to represent either a bfq_queue (leaf node in the

5398

++ * cgroup hierarchy) or a bfq_group into the upper level scheduler.  Each

5399

++ * entity belongs to the sched_data of the parent group in the cgroup

5400

++ * hierarchy.  Non-leaf entities have also their own sched_data, stored

5401

++ * in @my_sched_data.

5402

++ *

5403

++ * Each entity stores independently its priority values; this would

5404

++ * allow different weights on different devices, but this

5405

++ * functionality is not exported to userspace by now.  Priorities and

5406

++ * weights are updated lazily, first storing the new values into the

5407

++ * new_* fields, then setting the @ioprio_changed flag.  As soon as

5408

++ * there is a transition in the entity state that allows the priority

5409

++ * update to take place the effective and the requested priority

5410

++ * values are synchronized.

5411

++ *

5412

++ * Unless cgroups are used, the weight value is calculated from the

5413

++ * ioprio to export the same interface as CFQ.  When dealing with

5414

++ * ``well-behaved'' queues (i.e., queues that do not spend too much

5415

++ * time to consume their budget and have true sequential behavior, and

5416

++ * when there are no external factors breaking anticipation) the

5417

++ * relative weights at each level of the cgroups hierarchy should be

5418

++ * guaranteed.  All the fields are protected by the queue lock of the

5419

++ * containing bfqd.

5420

++ */

5421

++struct bfq_entity {

5422

++	struct rb_node rb_node;

5423

++

5424

++	int on_st;

5425

++

5426

++	u64 finish;

5427

++	u64 start;

5428

++

5429

++	struct rb_root *tree;

5430

++

5431

++	u64 min_start;

5432

++

5433

++	unsigned long service, budget;

5434

++	unsigned short weight, new_weight;

5435

++	unsigned short orig_weight;

5436

++

5437

++	struct bfq_entity *parent;

5438

++

5439

++	struct bfq_sched_data *my_sched_data;

5440

++	struct bfq_sched_data *sched_data;

5441

++

5442

++	unsigned short ioprio, new_ioprio;

5443

++	unsigned short ioprio_class, new_ioprio_class;

5444

++

5445

++	int ioprio_changed;

5446

++};

5447

++

5448

++struct bfq_group;

5449

++

5450

++/**

5451

++ * struct bfq_queue - leaf schedulable entity.

5452

++ * @ref: reference counter.

5453

++ * @bfqd: parent bfq_data.

5454

++ * @new_bfqq: shared bfq_queue if queue is cooperating with

5455

++ *           one or more other queues.

5456

++ * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree).

5457

++ * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree).

5458

++ * @sort_list: sorted list of pending requests.

5459

++ * @next_rq: if fifo isn't expired, next request to serve.

5460

++ * @queued: nr of requests queued in @sort_list.

5461

++ * @allocated: currently allocated requests.

5462

++ * @meta_pending: pending metadata requests.

5463

++ * @fifo: fifo list of requests in sort_list.

5464

++ * @entity: entity representing this queue in the scheduler.

5465

++ * @max_budget: maximum budget allowed from the feedback mechanism.

5466

++ * @budget_timeout: budget expiration (in jiffies).

5467

++ * @dispatched: number of requests on the dispatch list or inside driver.

5468

++ * @org_ioprio: saved ioprio during boosted periods.

5469

++ * @flags: status flags.

5470

++ * @bfqq_list: node for active/idle bfqq list inside our bfqd.

5471

++ * @seek_samples: number of seeks sampled

5472

++ * @seek_total: sum of the distances of the seeks sampled

5473

++ * @seek_mean: mean seek distance

5474

++ * @last_request_pos: position of the last request enqueued

5475

++ * @pid: pid of the process owning the queue, used for logging purposes.

5476

++ * @last_rais_start_time: last (idle -> weight-raised) transition attempt

5477

++ * @raising_cur_max_time: current max raising time for this queue

5478

++ *

5479

++ * A bfq_queue is a leaf request queue; it can be associated to an io_context

5480

++ * or more (if it is an async one).  @cgroup holds a reference to the

5481

++ * cgroup, to be sure that it does not disappear while a bfqq still

5482

++ * references it (mostly to avoid races between request issuing and task

5483

++ * migration followed by cgroup distruction).

5484

++ * All the fields are protected by the queue lock of the containing bfqd.

5485

++ */

5486

++struct bfq_queue {

5487

++	atomic_t ref;

5488

++	struct bfq_data *bfqd;

5489

++

5490

++	/* fields for cooperating queues handling */

5491

++	struct bfq_queue *new_bfqq;

5492

++	struct rb_node pos_node;

5493

++	struct rb_root *pos_root;

5494

++

5495

++	struct rb_root sort_list;

5496

++	struct request *next_rq;

5497

++	int queued[2];

5498

++	int allocated[2];

5499

++	int meta_pending;

5500

++	struct list_head fifo;

5501

++

5502

++	struct bfq_entity entity;

5503

++

5504

++	unsigned long max_budget;

5505

++	unsigned long budget_timeout;

5506

++

5507

++	int dispatched;

5508

++

5509

++	unsigned short org_ioprio;

5510

++

5511

++	unsigned int flags;

5512

++

5513

++	struct list_head bfqq_list;

5514

++

5515

++	unsigned int seek_samples;

5516

++	u64 seek_total;

5517

++	sector_t seek_mean;

5518

++	sector_t last_request_pos;

5519

++

5520

++	pid_t pid;

5521

++

5522

++	/* weight-raising fields */

5523

++	unsigned int raising_cur_max_time;

5524

++	u64 last_rais_start_finish, soft_rt_next_start;

5525

++	unsigned int raising_coeff;

5526

++};

5527

++

5528

++/**

5529

++ * struct bfq_ttime - per process thinktime stats.

5530

++ * @ttime_total: total process thinktime

5531

++ * @ttime_samples: number of thinktime samples

5532

++ * @ttime_mean: average process thinktime

5533

++ */

5534

++struct bfq_ttime {

5535

++	unsigned long last_end_request;

5536

++

5537

++	unsigned long ttime_total;

5538

++	unsigned long ttime_samples;

5539

++	unsigned long ttime_mean;

5540

++};

5541

++

5542

++/**

5543

++ * struct bfq_io_cq - per (request_queue, io_context) structure.

5544

++ * @icq: associated io_cq structure

5545

++ * @bfqq: array of two process queues, the sync and the async

5546

++ * @ttime: associated @bfq_ttime struct

5547

++ */

5548

++struct bfq_io_cq {

5549

++	struct io_cq icq; /* must be the first member */

5550

++	struct bfq_queue *bfqq[2];

5551

++	struct bfq_ttime ttime;

5552

++	int ioprio;

5553

++};

5554

++

5555

++/**

5556

++ * struct bfq_data - per device data structure.

5557

++ * @queue: request queue for the managed device.

5558

++ * @root_group: root bfq_group for the device.

5559

++ * @rq_pos_tree: rbtree sorted by next_request position,

5560

++ *		used when determining if two or more queues

5561

++ *		have interleaving requests (see bfq_close_cooperator).

5562

++ * @busy_queues: number of bfq_queues containing requests (including the

5563

++ *		 queue under service, even if it is idling).

5564

++ * @queued: number of queued requests.

5565

++ * @rq_in_driver: number of requests dispatched and waiting for completion.

5566

++ * @sync_flight: number of sync requests in the driver.

5567

++ * @max_rq_in_driver: max number of reqs in driver in the last @hw_tag_samples

5568

++ *		      completed requests .

5569

++ * @hw_tag_samples: nr of samples used to calculate hw_tag.

5570

++ * @hw_tag: flag set to one if the driver is showing a queueing behavior.

5571

++ * @budgets_assigned: number of budgets assigned.

5572

++ * @idle_slice_timer: timer set when idling for the next sequential request

5573

++ *                    from the queue under service.

5574

++ * @unplug_work: delayed work to restart dispatching on the request queue.

5575

++ * @active_queue: bfq_queue under service.

5576

++ * @active_bic: bfq_io_cq (bic) associated with the @active_queue.

5577

++ * @last_position: on-disk position of the last served request.

5578

++ * @last_budget_start: beginning of the last budget.

5579

++ * @last_idling_start: beginning of the last idle slice.

5580

++ * @peak_rate: peak transfer rate observed for a budget.

5581

++ * @peak_rate_samples: number of samples used to calculate @peak_rate.

5582

++ * @bfq_max_budget: maximum budget allotted to a bfq_queue before rescheduling.

5583

++ * @group_list: list of all the bfq_groups active on the device.

5584

++ * @active_list: list of all the bfq_queues active on the device.

5585

++ * @idle_list: list of all the bfq_queues idle on the device.

5586

++ * @bfq_quantum: max number of requests dispatched per dispatch round.

5587

++ * @bfq_fifo_expire: timeout for async/sync requests; when it expires

5588

++ *                   requests are served in fifo order.

5589

++ * @bfq_back_penalty: weight of backward seeks wrt forward ones.

5590

++ * @bfq_back_max: maximum allowed backward seek.

5591

++ * @bfq_slice_idle: maximum idling time.

5592

++ * @bfq_user_max_budget: user-configured max budget value (0 for auto-tuning).

5593

++ * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to

5594

++ *                           async queues.

5595

++ * @bfq_timeout: timeout for bfq_queues to consume their budget; used to

5596

++ *               to prevent seeky queues to impose long latencies to well

5597

++ *               behaved ones (this also implies that seeky queues cannot

5598

++ *               receive guarantees in the service domain; after a timeout

5599

++ *               they are charged for the whole allocated budget, to try

5600

++ *               to preserve a behavior reasonably fair among them, but

5601

++ *               without service-domain guarantees).

5602

++ * @bfq_raising_coeff: Maximum factor by which the weight of a boosted

5603

++ *                            queue is multiplied

5604

++ * @bfq_raising_max_time: maximum duration of a weight-raising period (jiffies)

5605

++ * @bfq_raising_rt_max_time: maximum duration for soft real-time processes

5606

++ * @bfq_raising_min_idle_time: minimum idle period after which weight-raising

5607

++ *			       may be reactivated for a queue (in jiffies)

5608

++ * @bfq_raising_min_inter_arr_async: minimum period between request arrivals

5609

++ *				     after which weight-raising may be

5610

++ *				     reactivated for an already busy queue

5611

++ *				     (in jiffies)

5612

++ * @bfq_raising_max_softrt_rate: max service-rate for a soft real-time queue,

5613

++ *			         sectors per seconds

5614

++ * @RT_prod: cached value of the product R*T used for computing the maximum

5615

++ * 	     duration of the weight raising automatically

5616

++ * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions

5617

++ *

5618

++ * All the fields are protected by the @queue lock.

5619

++ */

5620

++struct bfq_data {

5621

++	struct request_queue *queue;

5622

++

5623

++	struct bfq_group *root_group;

5624

++

5625

++	struct rb_root rq_pos_tree;

5626

++

5627

++	int busy_queues;

5628

++	int queued;

5629

++	int rq_in_driver;

5630

++	int sync_flight;

5631

++

5632

++	int max_rq_in_driver;

5633

++	int hw_tag_samples;

5634

++	int hw_tag;

5635

++

5636

++	int budgets_assigned;

5637

++

5638

++	struct timer_list idle_slice_timer;

5639

++	struct work_struct unplug_work;

5640

++

5641

++	struct bfq_queue *active_queue;

5642

++	struct bfq_io_cq *active_bic;

5643

++

5644

++	sector_t last_position;

5645

++

5646

++	ktime_t last_budget_start;

5647

++	ktime_t last_idling_start;

5648

++	int peak_rate_samples;

5649

++	u64 peak_rate;

5650

++	unsigned long bfq_max_budget;

5651

++

5652

++	struct hlist_head group_list;

5653

++	struct list_head active_list;

5654

++	struct list_head idle_list;

5655

++

5656

++	unsigned int bfq_quantum;

5657

++	unsigned int bfq_fifo_expire[2];

5658

++	unsigned int bfq_back_penalty;

5659

++	unsigned int bfq_back_max;

5660

++	unsigned int bfq_slice_idle;

5661

++	u64 bfq_class_idle_last_service;

5662

++

5663

++	unsigned int bfq_user_max_budget;

5664

++	unsigned int bfq_max_budget_async_rq;

5665

++	unsigned int bfq_timeout[2];

5666

++

5667

++	bool low_latency;

5668

++

5669

++	/* parameters of the low_latency heuristics */

5670

++	unsigned int bfq_raising_coeff;

5671

++	unsigned int bfq_raising_max_time;

5672

++	unsigned int bfq_raising_rt_max_time;

5673

++	unsigned int bfq_raising_min_idle_time;

5674

++	unsigned int bfq_raising_min_inter_arr_async;

5675

++	unsigned int bfq_raising_max_softrt_rate;

5676

++	u64 RT_prod;

5677

++

5678

++	struct bfq_queue oom_bfqq;

5679

++};

5680

++

5681

++enum bfqq_state_flags {

5682

++	BFQ_BFQQ_FLAG_busy = 0,		/* has requests or is under service */

5683

++	BFQ_BFQQ_FLAG_wait_request,	/* waiting for a request */

5684

++	BFQ_BFQQ_FLAG_must_alloc,	/* must be allowed rq alloc */

5685

++	BFQ_BFQQ_FLAG_fifo_expire,	/* FIFO checked in this slice */

5686

++	BFQ_BFQQ_FLAG_idle_window,	/* slice idling enabled */

5687

++	BFQ_BFQQ_FLAG_prio_changed,	/* task priority has changed */

5688

++	BFQ_BFQQ_FLAG_sync,		/* synchronous queue */

5689

++	BFQ_BFQQ_FLAG_budget_new,	/* no completion with this budget */

5690

++	BFQ_BFQQ_FLAG_coop,		/* bfqq is shared */

5691

++	BFQ_BFQQ_FLAG_split_coop,	/* shared bfqq will be splitted */

5692

++	BFQ_BFQQ_FLAG_some_coop_idle,   /* some cooperator is inactive */

5693

++};

5694

++

5695

++#define BFQ_BFQQ_FNS(name)						\

5696

++static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq)		\

5697

++{									\

5698

++	(bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name);			\

5699

++}									\

5700

++static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq)	\

5701

++{									\

5702

++	(bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name);			\

5703

++}									\

5704

++static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq)		\

5705

++{									\

5706

++	return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0;	\

5707

++}

5708

++

5709

++BFQ_BFQQ_FNS(busy);

5710

++BFQ_BFQQ_FNS(wait_request);

5711

++BFQ_BFQQ_FNS(must_alloc);

5712

++BFQ_BFQQ_FNS(fifo_expire);

5713

++BFQ_BFQQ_FNS(idle_window);

5714

++BFQ_BFQQ_FNS(prio_changed);

5715

++BFQ_BFQQ_FNS(sync);

5716

++BFQ_BFQQ_FNS(budget_new);

5717

++BFQ_BFQQ_FNS(coop);

5718

++BFQ_BFQQ_FNS(split_coop);

5719

++BFQ_BFQQ_FNS(some_coop_idle);

5720

++#undef BFQ_BFQQ_FNS

5721

++

5722

++/* Logging facilities. */

5723

++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \

5724

++	blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args)

5725

++

5726

++#define bfq_log(bfqd, fmt, args...) \

5727

++	blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)

5728

++

5729

++/* Expiration reasons. */

5730

++enum bfqq_expiration {

5731

++	BFQ_BFQQ_TOO_IDLE = 0,		/* queue has been idling for too long */

5732

++	BFQ_BFQQ_BUDGET_TIMEOUT,	/* budget took too long to be used */

5733

++	BFQ_BFQQ_BUDGET_EXHAUSTED,	/* budget consumed */

5734

++	BFQ_BFQQ_NO_MORE_REQUESTS,	/* the queue has no more requests */

5735

++};

5736

++

5737

++#ifdef CONFIG_CGROUP_BFQIO

5738

++/**

5739

++ * struct bfq_group - per (device, cgroup) data structure.

5740

++ * @entity: schedulable entity to insert into the parent group sched_data.

5741

++ * @sched_data: own sched_data, to contain child entities (they may be

5742

++ *              both bfq_queues and bfq_groups).

5743

++ * @group_node: node to be inserted into the bfqio_cgroup->group_data

5744

++ *              list of the containing cgroup's bfqio_cgroup.

5745

++ * @bfqd_node: node to be inserted into the @bfqd->group_list list

5746

++ *             of the groups active on the same device; used for cleanup.

5747

++ * @bfqd: the bfq_data for the device this group acts upon.

5748

++ * @async_bfqq: array of async queues for all the tasks belonging to

5749

++ *              the group, one queue per ioprio value per ioprio_class,

5750

++ *              except for the idle class that has only one queue.

5751

++ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored).

5752

++ * @my_entity: pointer to @entity, %NULL for the toplevel group; used

5753

++ *             to avoid too many special cases during group creation/migration.

5754

++ *

5755

++ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup

5756

++ * there is a set of bfq_groups, each one collecting the lower-level

5757

++ * entities belonging to the group that are acting on the same device.

5758

++ *

5759

++ * Locking works as follows:

5760

++ *    o @group_node is protected by the bfqio_cgroup lock, and is accessed

5761

++ *      via RCU from its readers.

5762

++ *    o @bfqd is protected by the queue lock, RCU is used to access it

5763

++ *      from the readers.

5764

++ *    o All the other fields are protected by the @bfqd queue lock.

5765

++ */

5766

++struct bfq_group {

5767

++	struct bfq_entity entity;

5768

++	struct bfq_sched_data sched_data;

5769

++

5770

++	struct hlist_node group_node;

5771

++	struct hlist_node bfqd_node;

5772

++

5773

++	void *bfqd;

5774

++

5775

++	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];

5776

++	struct bfq_queue *async_idle_bfqq;

5777

++

5778

++	struct bfq_entity *my_entity;

5779

++};

5780

++

5781

++/**

5782

++ * struct bfqio_cgroup - bfq cgroup data structure.

5783

++ * @css: subsystem state for bfq in the containing cgroup.

5784

++ * @weight: cgroup weight.

5785

++ * @ioprio: cgroup ioprio.

5786

++ * @ioprio_class: cgroup ioprio_class.

5787

++ * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data.

5788

++ * @group_data: list containing the bfq_group belonging to this cgroup.

5789

++ *

5790

++ * @group_data is accessed using RCU, with @lock protecting the updates,

5791

++ * @ioprio and @ioprio_class are protected by @lock.

5792

++ */

5793

++struct bfqio_cgroup {

5794

++	struct cgroup_subsys_state css;

5795

++

5796

++	unsigned short weight, ioprio, ioprio_class;

5797

++

5798

++	spinlock_t lock;

5799

++	struct hlist_head group_data;

5800

++};

5801

++#else

5802

++struct bfq_group {

5803

++	struct bfq_sched_data sched_data;

5804

++

5805

++	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];

5806

++	struct bfq_queue *async_idle_bfqq;

5807

++};

5808

++#endif

5809

++

5810

++static inline struct bfq_service_tree *

5811

++bfq_entity_service_tree(struct bfq_entity *entity)

5812

++{

5813

++	struct bfq_sched_data *sched_data = entity->sched_data;

5814

++	unsigned int idx = entity->ioprio_class - 1;

5815

++

5816

++	BUG_ON(idx >= BFQ_IOPRIO_CLASSES);

5817

++	BUG_ON(sched_data == NULL);

5818

++

5819

++	return sched_data->service_tree + idx;

5820

++}

5821

++

5822

++static inline struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic,

5823

++					    int is_sync)

5824

++{

5825

++	return bic->bfqq[!!is_sync];

5826

++}

5827

++

5828

++static inline void bic_set_bfqq(struct bfq_io_cq *bic,

5829

++				struct bfq_queue *bfqq, int is_sync)

5830

++{

5831

++	bic->bfqq[!!is_sync] = bfqq;

5832

++}

5833

++

5834

++static inline struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)

5835

++{

5836

++	return bic->icq.q->elevator->elevator_data;

5837

++}

5838

++

5839

++/**

5840

++ * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer.

5841

++ * @ptr: a pointer to a bfqd.

5842

++ * @flags: storage for the flags to be saved.

5843

++ *

5844

++ * This function allows bfqg->bfqd to be protected by the

5845

++ * queue lock of the bfqd they reference; the pointer is dereferenced

5846

++ * under RCU, so the storage for bfqd is assured to be safe as long

5847

++ * as the RCU read side critical section does not end.  After the

5848

++ * bfqd->queue->queue_lock is taken the pointer is rechecked, to be

5849

++ * sure that no other writer accessed it.  If we raced with a writer,

5850

++ * the function returns NULL, with the queue unlocked, otherwise it

5851

++ * returns the dereferenced pointer, with the queue locked.

5852

++ */

5853

++static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr,

5854

++						   unsigned long *flags)

5855

++{

5856

++	struct bfq_data *bfqd;

5857

++

5858

++	rcu_read_lock();

5859

++	bfqd = rcu_dereference(*(struct bfq_data **)ptr);

5860

++

5861

++	if (bfqd != NULL) {

5862

++		spin_lock_irqsave(bfqd->queue->queue_lock, *flags);

5863

++		if (*ptr == bfqd)

5864

++			goto out;

5865

++		spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);

5866

++	}

5867

++

5868

++	bfqd = NULL;

5869

++out:

5870

++	rcu_read_unlock();

5871

++	return bfqd;

5872

++}

5873

++

5874

++static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd,

5875

++				       unsigned long *flags)

5876

++{

5877

++	spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);

5878

++}

5879

++

5880

++static void bfq_changed_ioprio(struct bfq_io_cq *bic);

5881

++static void bfq_put_queue(struct bfq_queue *bfqq);

5882

++static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);

5883

++static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,

5884

++				       struct bfq_group *bfqg, int is_sync,

5885

++				       struct bfq_io_cq *bic, gfp_t gfp_mask);

5886

++static void bfq_end_raising_async_queues(struct bfq_data *bfqd,

5887

++					 struct bfq_group *bfqg);

5888

++static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);

5889

++static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);

5890

++#endif

5891

+--

5892

+1.8.1.4

5893

+

5894

5895

Added: genpatches-2.6/trunk/3.10/1803_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v6r2-for-3.9.0.patch1

5896

===================================================================

5897

--- genpatches-2.6/trunk/3.10/1803_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v6r2-for-3.9.0.patch1	                        (rev 0)

5898

+++ genpatches-2.6/trunk/3.10/1803_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v6r2-for-3.9.0.patch1	2013-07-01 07:02:35 UTC (rev 2423)

5899

@@ -0,0 +1,1049 @@

5900

+From 84032f90ea34f4d17b361eac4707793797db1461 Mon Sep 17 00:00:00 2001

5901

+From: Arianna Avanzini <avanzini.arianna@×××××.com>

5902

+Date: Fri, 14 Jun 2013 13:46:47 +0200

5903

+Subject: [PATCH 3/3] block, bfq: add Early Queue Merge (EQM) to BFQ-v6r2 for

5904

+ 3.9.0

5905

+

5906

+A set of processes may happen  to  perform interleaved reads, i.e., requests

5907

+whose union would give rise to a  sequential read  pattern.  There   are two

5908

+typical  cases: in the first  case,   processes  read  fixed-size  chunks of

5909

+data at a fixed distance from each other, while in the second case processes

5910

+may read variable-size chunks at  variable distances. The latter case occurs

5911

+for  example with  KVM, which  splits the  I/O generated  by the  guest into

5912

+multiple chunks,  and lets these chunks  be served by a  pool of cooperating

5913

+processes,  iteratively  assigning  the  next  chunk of  I/O  to  the  first

5914

+available  process. CFQ  uses actual  queue merging  for the  first type  of

5915

+processes, whereas it  uses preemption to get a sequential  read pattern out

5916

+of the read requests  performed by the second type of  processes. In the end

5917

+it uses  two different  mechanisms to  achieve the  same goal:  boosting the

5918

+throughput with interleaved I/O.

5919

+

5920

+This patch introduces  Early Queue Merge (EQM), a unified mechanism to get a

5921

+sequential  read pattern  with both  types of  processes. The  main idea  is

5922

+checking newly arrived requests against the next request of the active queue

5923

+both in case of actual request insert and in case of request merge. By doing

5924

+so, both the types of processes can be handled by just merging their queues.

5925

+EQM is  then simpler and  more compact than the  pair of mechanisms  used in

5926

+CFQ.

5927

+

5928

+Finally, EQM  also preserves the  typical low-latency properties of  BFQ, by

5929

+properly restoring the weight-raising state of  a queue when it gets back to

5930

+a non-merged state.

5931

+

5932

+Signed-off-by: Mauro Andreolini <mauro.andreolini@×××××××.it>

5933

+Signed-off-by: Arianna Avanzini <avanzini.arianna@×××××.com>

5934

+Reviewed-by: Paolo Valente <paolo.valente@×××××××.it>

5935

+---

5936

+ block/bfq-iosched.c | 653 ++++++++++++++++++++++++++++++++++++----------------

5937

+ block/bfq-sched.c   |  28 ---

5938

+ block/bfq.h         |  16 ++

5939

+ 3 files changed, 466 insertions(+), 231 deletions(-)

5940

+

5941

+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c

5942

+index b230927..bc57923 100644

5943

+--- a/block/bfq-iosched.c

5944

++++ b/block/bfq-iosched.c

5945

+@@ -444,6 +444,43 @@ static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd)

5946

+ 	return dur;

5947

+ }

5948

+

5949

++static inline void

5950

++bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)

5951

++{

5952

++	if (bic->saved_idle_window)

5953

++		bfq_mark_bfqq_idle_window(bfqq);

5954

++	else

5955

++		bfq_clear_bfqq_idle_window(bfqq);

5956

++	if (bic->raising_time_left && bfqq->bfqd->low_latency) {

5957

++		/*

5958

++		 * Start a weight raising period with the duration given by

5959

++		 * the raising_time_left snapshot.

5960

++		 */

5961

++		bfqq->raising_coeff = bfqq->bfqd->bfq_raising_coeff;

5962

++		bfqq->raising_cur_max_time = bic->raising_time_left;

5963

++		bfqq->last_rais_start_finish = jiffies;

5964

++	}

5965

++	/*

5966

++	 * Clear raising_time_left to prevent bfq_bfqq_save_state() from

5967

++	 * getting confused about the queue's need of a weight-raising

5968

++	 * period.

5969

++	 */

5970

++	bic->raising_time_left = 0;

5971

++}

5972

++

5973

++/*

5974

++ * Must be called with the queue_lock held.

5975

++ */

5976

++static int bfqq_process_refs(struct bfq_queue *bfqq)

5977

++{

5978

++	int process_refs, io_refs;

5979

++

5980

++	io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];

5981

++	process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;

5982

++	BUG_ON(process_refs < 0);

5983

++	return process_refs;

5984

++}

5985

++

5986

+ static void bfq_add_rq_rb(struct request *rq)

5987

+ {

5988

+ 	struct bfq_queue *bfqq = RQ_BFQQ(rq);

5989

+@@ -483,11 +520,20 @@ static void bfq_add_rq_rb(struct request *rq)

5990

+ 		if (! bfqd->low_latency)

5991

+ 			goto add_bfqq_busy;

5992

+

5993

++		if (bfq_bfqq_just_split(bfqq))

5994

++			goto set_ioprio_changed;

5995

++

5996

+ 		/*

5997

+-		 * If the queue is not being boosted and has been idle

5998

+-		 * for enough time, start a weight-raising period

5999

++		 * If the queue:

6000

++		 * - is not being boosted,

6001

++		 * - has been idle for enough time,

6002

++		 * - is not a sync queue or is linked to a bfq_io_cq (it is

6003

++		 *   shared "for its nature" or it is not shared and its

6004

++		 *   requests have not been redirected to a shared queue)

6005

++		 * start a weight-raising period.

6006

+ 		 */

6007

+-		if(old_raising_coeff == 1 && (idle_for_long_time || soft_rt)) {

6008

++		if(old_raising_coeff == 1 && (idle_for_long_time || soft_rt) &&

6009

++		   (!bfq_bfqq_sync(bfqq) || bfqq->bic != NULL)) {

6010

+ 			bfqq->raising_coeff = bfqd->bfq_raising_coeff;

6011

+ 			if (idle_for_long_time)

6012

+ 				bfqq->raising_cur_max_time =

6013

+@@ -517,6 +563,7 @@ static void bfq_add_rq_rb(struct request *rq)

6014

+ 						raising_cur_max_time));

6015

+ 				}

6016

+ 		}

6017

++set_ioprio_changed:

6018

+ 		if (old_raising_coeff != bfqq->raising_coeff)

6019

+ 			entity->ioprio_changed = 1;

6020

+ add_bfqq_busy:

6021

+@@ -695,89 +742,35 @@ static void bfq_end_raising(struct bfq_data *bfqd)

6022

+ 	spin_unlock_irq(bfqd->queue->queue_lock);

6023

+ }

6024

+

6025

+-static int bfq_allow_merge(struct request_queue *q, struct request *rq,

6026

+-			   struct bio *bio)

6027

++static inline sector_t bfq_io_struct_pos(void *io_struct, bool request)

6028

+ {

6029

+-	struct bfq_data *bfqd = q->elevator->elevator_data;

6030

+-	struct bfq_io_cq *bic;

6031

+-	struct bfq_queue *bfqq;

6032

+-

6033

+-	/*

6034

+-	 * Disallow merge of a sync bio into an async request.

6035

+-	 */

6036

+-	if (bfq_bio_sync(bio) && !rq_is_sync(rq))

6037

+-		return 0;

6038

+-

6039

+-	/*

6040

+-	 * Lookup the bfqq that this bio will be queued with. Allow

6041

+-	 * merge only if rq is queued there.

6042

+-	 * Queue lock is held here.

6043

+-	 */

6044

+-	bic = bfq_bic_lookup(bfqd, current->io_context);

6045

+-	if (bic == NULL)

6046

+-		return 0;

6047

+-

6048

+-	bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));

6049

+-	return bfqq == RQ_BFQQ(rq);

6050

+-}

6051

+-

6052

+-static void __bfq_set_active_queue(struct bfq_data *bfqd,

6053

+-				   struct bfq_queue *bfqq)

6054

+-{

6055

+-	if (bfqq != NULL) {

6056

+-		bfq_mark_bfqq_must_alloc(bfqq);

6057

+-		bfq_mark_bfqq_budget_new(bfqq);

6058

+-		bfq_clear_bfqq_fifo_expire(bfqq);

6059

+-

6060

+-		bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;

6061

+-

6062

+-		bfq_log_bfqq(bfqd, bfqq, "set_active_queue, cur-budget = %lu",

6063

+-			     bfqq->entity.budget);

6064

+-	}

6065

+-

6066

+-	bfqd->active_queue = bfqq;

6067

+-}

6068

+-

6069

+-/*

6070

+- * Get and set a new active queue for service.

6071

+- */

6072

+-static struct bfq_queue *bfq_set_active_queue(struct bfq_data *bfqd,

6073

+-					      struct bfq_queue *bfqq)

6074

+-{

6075

+-	if (!bfqq)

6076

+-		bfqq = bfq_get_next_queue(bfqd);

6077

++	if (request)

6078

++		return blk_rq_pos(io_struct);

6079

+ 	else

6080

+-		bfq_get_next_queue_forced(bfqd, bfqq);

6081

+-

6082

+-	__bfq_set_active_queue(bfqd, bfqq);

6083

+-	return bfqq;

6084

++		return ((struct bio *)io_struct)->bi_sector;

6085

+ }

6086

+

6087

+-static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd,

6088

+-					  struct request *rq)

6089

++static inline sector_t bfq_dist_from(sector_t pos1,

6090

++				     sector_t pos2)

6091

+ {

6092

+-	if (blk_rq_pos(rq) >= bfqd->last_position)

6093

+-		return blk_rq_pos(rq) - bfqd->last_position;

6094

++	if (pos1 >= pos2)

6095

++		return pos1 - pos2;

6096

+ 	else

6097

+-		return bfqd->last_position - blk_rq_pos(rq);

6098

++		return pos2 - pos1;

6099

+ }

6100

+

6101

+-/*

6102

+- * Return true if bfqq has no request pending and rq is close enough to

6103

+- * bfqd->last_position, or if rq is closer to bfqd->last_position than

6104

+- * bfqq->next_rq

6105

+- */

6106

+-static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq)

6107

++static inline int bfq_rq_close_to_sector(void *io_struct, bool request,

6108

++					 sector_t sector)

6109

+ {

6110

+-	return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR;

6111

++	return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <=

6112

++	       BFQQ_SEEK_THR;

6113

+ }

6114

+

6115

+-static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)

6116

++static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector)

6117

+ {

6118

+ 	struct rb_root *root = &bfqd->rq_pos_tree;

6119

+ 	struct rb_node *parent, *node;

6120

+ 	struct bfq_queue *__bfqq;

6121

+-	sector_t sector = bfqd->last_position;

6122

+

6123

+ 	if (RB_EMPTY_ROOT(root))

6124

+ 		return NULL;

6125

+@@ -796,7 +789,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)

6126

+ 	 * position).

6127

+ 	 */

6128

+ 	__bfqq = rb_entry(parent, struct bfq_queue, pos_node);

6129

+-	if (bfq_rq_close(bfqd, __bfqq->next_rq))

6130

++	if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))

6131

+ 		return __bfqq;

6132

+

6133

+ 	if (blk_rq_pos(__bfqq->next_rq) < sector)

6134

+@@ -807,7 +800,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)

6135

+ 		return NULL;

6136

+

6137

+ 	__bfqq = rb_entry(node, struct bfq_queue, pos_node);

6138

+-	if (bfq_rq_close(bfqd, __bfqq->next_rq))

6139

++	if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))

6140

+ 		return __bfqq;

6141

+

6142

+ 	return NULL;

6143

+@@ -816,14 +809,12 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)

6144

+ /*

6145

+  * bfqd - obvious

6146

+  * cur_bfqq - passed in so that we don't decide that the current queue

6147

+- *            is closely cooperating with itself.

6148

+- *

6149

+- * We are assuming that cur_bfqq has dispatched at least one request,

6150

+- * and that bfqd->last_position reflects a position on the disk associated

6151

+- * with the I/O issued by cur_bfqq.

6152

++ *            is closely cooperating with itself

6153

++ * sector - used as a reference point to search for a close queue

6154

+  */

6155

+ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,

6156

+-					      struct bfq_queue *cur_bfqq)

6157

++					      struct bfq_queue *cur_bfqq,

6158

++					      sector_t sector)

6159

+ {

6160

+ 	struct bfq_queue *bfqq;

6161

+

6162

+@@ -843,7 +834,7 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,

6163

+ 	 * working closely on the same area of the disk. In that case,

6164

+ 	 * we can group them together and don't waste time idling.

6165

+ 	 */

6166

+-	bfqq = bfqq_close(bfqd);

6167

++	bfqq = bfqq_close(bfqd, sector);

6168

+ 	if (bfqq == NULL || bfqq == cur_bfqq)

6169

+ 		return NULL;

6170

+

6171

+@@ -870,6 +861,275 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,

6172

+ 	return bfqq;

6173

+ }

6174

+

6175

++static struct bfq_queue *

6176

++bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)

6177

++{

6178

++	int process_refs, new_process_refs;

6179

++	struct bfq_queue *__bfqq;

6180

++

6181

++	/*

6182

++	 * If there are no process references on the new_bfqq, then it is

6183

++	 * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain

6184

++	 * may have dropped their last reference (not just their last process

6185

++	 * reference).

6186

++	 */

6187

++	if (!bfqq_process_refs(new_bfqq))

6188

++		return NULL;

6189

++

6190

++	/* Avoid a circular list and skip interim queue merges. */

6191

++	while ((__bfqq = new_bfqq->new_bfqq)) {

6192

++		if (__bfqq == bfqq)

6193

++			return NULL;

6194

++		new_bfqq = __bfqq;

6195

++	}

6196

++

6197

++	process_refs = bfqq_process_refs(bfqq);

6198

++	new_process_refs = bfqq_process_refs(new_bfqq);

6199

++	/*

6200

++	 * If the process for the bfqq has gone away, there is no

6201

++	 * sense in merging the queues.

6202

++	 */

6203

++	if (process_refs == 0 || new_process_refs == 0)

6204

++		return NULL;

6205

++

6206

++	bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",

6207

++		new_bfqq->pid);

6208

++

6209

++	/*

6210

++	 * Merging is just a redirection: the requests of the process owning

6211

++	 * one of the two queues are redirected to the other queue. The latter

6212

++	 * queue, in its turn, is set as shared if this is the first time that

6213

++	 * the requests of some process are redirected to it.

6214

++	 *

6215

++	 * We redirect bfqq to new_bfqq and not the opposite, because we

6216

++	 * are in the context of the process owning bfqq, hence we have the

6217

++	 * io_cq of this process. So we can immediately configure this io_cq

6218

++	 * to redirect the requests of the process to new_bfqq.

6219

++	 *

6220

++	 * NOTE, even if new_bfqq coincides with the active queue, the io_cq of

6221

++	 * new_bfqq is not available, because, if the active queue is shared,

6222

++	 * bfqd->active_bic may not point to the io_cq of the active queue.

6223

++	 * Redirecting the requests of the process owning bfqq to the currently

6224

++	 * active queue is in any case the best option, as we feed the active queue

6225

++	 * with new requests close to the last request served and, by doing so,

6226

++	 * hopefully increase the throughput.

6227

++	 */

6228

++	bfqq->new_bfqq = new_bfqq;

6229

++	atomic_add(process_refs, &new_bfqq->ref);

6230

++	return new_bfqq;

6231

++}

6232

++

6233

++/*

6234

++ * Attempt to schedule a merge of bfqq with the currently active queue or

6235

++ * with a close queue among the scheduled queues.

6236

++ * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue

6237

++ * structure otherwise.

6238

++ */

6239

++static struct bfq_queue *

6240

++bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,

6241

++		     void *io_struct, bool request)

6242

++{

6243

++	struct bfq_queue *active_bfqq, *new_bfqq;

6244

++

6245

++	if (bfqq->new_bfqq)

6246

++		return bfqq->new_bfqq;

6247

++

6248

++	if (!io_struct)

6249

++		return NULL;

6250

++

6251

++	active_bfqq = bfqd->active_queue;

6252

++

6253

++	if (active_bfqq == NULL || active_bfqq == bfqq || !bfqd->active_bic)

6254

++		goto check_scheduled;

6255

++

6256

++	if (bfq_class_idle(active_bfqq) || bfq_class_idle(bfqq))

6257

++		goto check_scheduled;

6258

++

6259

++	if (bfq_class_rt(active_bfqq) != bfq_class_rt(bfqq))

6260

++		goto check_scheduled;

6261

++

6262

++	if (active_bfqq->entity.parent != bfqq->entity.parent)

6263

++		goto check_scheduled;

6264

++

6265

++	if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&

6266

++	    bfq_bfqq_sync(active_bfqq) && bfq_bfqq_sync(bfqq))

6267

++		if ((new_bfqq = bfq_setup_merge(bfqq, active_bfqq)))

6268

++			return new_bfqq; /* Merge with the active queue */

6269

++

6270

++	/*

6271

++	 * Check whether there is a cooperator among currently scheduled

6272

++	 * queues. The only thing we need is that the bio/request is not

6273

++	 * NULL, as we need it to establish whether a cooperator exists.

6274

++	 */

6275

++check_scheduled:

6276

++	new_bfqq = bfq_close_cooperator(bfqd, bfqq,

6277

++					bfq_io_struct_pos(io_struct, request));

6278

++	if (new_bfqq)

6279

++		return bfq_setup_merge(bfqq, new_bfqq);

6280

++

6281

++	return NULL;

6282

++}

6283

++

6284

++static inline void

6285

++bfq_bfqq_save_state(struct bfq_queue *bfqq)

6286

++{

6287

++	/*

6288

++	 * If bfqq->bic == NULL, the queue is already shared or its requests

6289

++	 * have already been redirected to a shared queue; both idle window

6290

++	 * and weight raising state have already been saved. Do nothing.

6291

++	 */

6292

++	if (bfqq->bic == NULL)

6293

++		return;

6294

++	if (bfqq->bic->raising_time_left)

6295

++		/*

6296

++		 * This is the queue of a just-started process, and would

6297

++		 * deserve weight raising: we set raising_time_left to the full

6298

++		 * weight-raising duration to trigger weight-raising when and

6299

++		 * if the queue is split and the first request of the queue

6300

++		 * is enqueued.

6301

++		 */

6302

++		bfqq->bic->raising_time_left = bfq_wrais_duration(bfqq->bfqd);

6303

++	else if (bfqq->raising_coeff > 1) {

6304

++		unsigned long wrais_duration =

6305

++			jiffies - bfqq->last_rais_start_finish;

6306

++		/*

6307

++		 * It may happen that a queue's weight raising period lasts

6308

++		 * longer than its raising_cur_max_time, as weight raising is

6309

++		 * handled only when a request is enqueued or dispatched (it

6310

++		 * does not use any timer). If the weight raising period is

6311

++		 * about to end, don't save it.

6312

++		 */

6313

++		if (bfqq->raising_cur_max_time <= wrais_duration)

6314

++			bfqq->bic->raising_time_left = 0;

6315

++		else

6316

++			bfqq->bic->raising_time_left =

6317

++				bfqq->raising_cur_max_time - wrais_duration;

6318

++		/*

6319

++		 * The bfq_queue is becoming shared or the requests of the

6320

++		 * process owning the queue are being redirected to a shared

6321

++		 * queue. Stop the weight raising period of the queue, as in

6322

++		 * both cases it should not be owned by an interactive or soft

6323

++		 * real-time application.

6324

++		 */

6325

++		bfq_bfqq_end_raising(bfqq);

6326

++	} else

6327

++		bfqq->bic->raising_time_left = 0;

6328

++	bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);

6329

++}

6330

++

6331

++static inline void

6332

++bfq_get_bic_reference(struct bfq_queue *bfqq)

6333

++{

6334

++	/*

6335

++	 * If bfqq->bic has a non-NULL value, the bic to which it belongs

6336

++	 * is about to begin using a shared bfq_queue.

6337

++	 */

6338

++	if (bfqq->bic)

6339

++		atomic_long_inc(&bfqq->bic->icq.ioc->refcount);

6340

++}

6341

++

6342

++static void

6343

++bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,

6344

++                struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)

6345

++{

6346

++        bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",

6347

++		(long unsigned)new_bfqq->pid);

6348

++	/* Save weight raising and idle window of the merged queues */

6349

++	bfq_bfqq_save_state(bfqq);

6350

++	bfq_bfqq_save_state(new_bfqq);

6351

++	/*

6352

++	 * Grab a reference to the bic, to prevent it from being destroyed

6353

++	 * before being possibly touched by a bfq_split_bfqq().

6354

++	 */

6355

++	bfq_get_bic_reference(bfqq);

6356

++	bfq_get_bic_reference(new_bfqq);

6357

++	/* Merge queues (that is, let bic redirect its requests to new_bfqq) */

6358

++        bic_set_bfqq(bic, new_bfqq, 1);

6359

++        bfq_mark_bfqq_coop(new_bfqq);

6360

++	/*

6361

++	 * new_bfqq now belongs to at least two bics (it is a shared queue): set

6362

++	 * new_bfqq->bic to NULL. bfqq either:

6363

++	 * - does not belong to any bic any more, and hence bfqq->bic must

6364

++	 *   be set to NULL, or

6365

++	 * - is a queue whose owning bics have already been redirected to a

6366

++	 *   different queue, hence the queue is destined to not belong to any

6367

++	 *   bic soon and bfqq->bic is already NULL (therefore the next

6368

++	 *   assignment causes no harm).

6369

++	 */

6370

++	new_bfqq->bic = NULL;

6371

++	bfqq->bic = NULL;

6372

++        bfq_put_queue(bfqq);

6373

++}

6374

++

6375

++static int bfq_allow_merge(struct request_queue *q, struct request *rq,

6376

++			   struct bio *bio)

6377

++{

6378

++	struct bfq_data *bfqd = q->elevator->elevator_data;

6379

++	struct bfq_io_cq *bic;

6380

++	struct bfq_queue *bfqq, *new_bfqq;

6381

++

6382

++	/*

6383

++	 * Disallow merge of a sync bio into an async request.

6384

++	 */

6385

++	if (bfq_bio_sync(bio) && !rq_is_sync(rq))

6386

++		return 0;

6387

++

6388

++	/*

6389

++	 * Lookup the bfqq that this bio will be queued with. Allow

6390

++	 * merge only if rq is queued there.

6391

++	 * Queue lock is held here.

6392

++	 */

6393

++	bic = bfq_bic_lookup(bfqd, current->io_context);

6394

++	if (bic == NULL)

6395

++		return 0;

6396

++

6397

++	bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));

6398

++	/*

6399

++	 * We take advantage of this function to perform an early merge

6400

++	 * of the queues of possible cooperating processes.

6401

++	 */

6402

++	if (bfqq != NULL &&

6403

++	    (new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false))) {

6404

++		bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);

6405

++		/*

6406

++		 * If we get here, the bio will be queued in the shared queue,

6407

++		 * i.e., new_bfqq, so use new_bfqq to decide whether bio and

6408

++		 * rq can be merged.

6409

++		 */

6410

++		bfqq = new_bfqq;

6411

++	}

6412

++

6413

++	return bfqq == RQ_BFQQ(rq);

6414

++}

6415

++

6416

++static void __bfq_set_active_queue(struct bfq_data *bfqd,

6417

++				   struct bfq_queue *bfqq)

6418

++{

6419

++	if (bfqq != NULL) {

6420

++		bfq_mark_bfqq_must_alloc(bfqq);

6421

++		bfq_mark_bfqq_budget_new(bfqq);

6422

++		bfq_clear_bfqq_fifo_expire(bfqq);

6423

++

6424

++		bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;

6425

++

6426

++		bfq_log_bfqq(bfqd, bfqq, "set_active_queue, cur-budget = %lu",

6427

++			     bfqq->entity.budget);

6428

++	}

6429

++

6430

++	bfqd->active_queue = bfqq;

6431

++}

6432

++

6433

++/*

6434

++ * Get and set a new active queue for service.

6435

++ */

6436

++static struct bfq_queue *bfq_set_active_queue(struct bfq_data *bfqd)

6437

++{

6438

++	struct bfq_queue *bfqq = bfq_get_next_queue(bfqd);

6439

++

6440

++	__bfq_set_active_queue(bfqd, bfqq);

6441

++	return bfqq;

6442

++}

6443

++

6444

+ /*

6445

+  * If enough samples have been computed, return the current max budget

6446

+  * stored in bfqd, which is dynamically updated according to the

6447

+@@ -1017,63 +1277,6 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq)

6448

+ 	return rq;

6449

+ }

6450

+

6451

+-/*

6452

+- * Must be called with the queue_lock held.

6453

+- */

6454

+-static int bfqq_process_refs(struct bfq_queue *bfqq)

6455

+-{

6456

+-	int process_refs, io_refs;

6457

+-

6458

+-	io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];

6459

+-	process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;

6460

+-	BUG_ON(process_refs < 0);

6461

+-	return process_refs;

6462

+-}

6463

+-

6464

+-static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)

6465

+-{

6466

+-	int process_refs, new_process_refs;

6467

+-	struct bfq_queue *__bfqq;

6468

+-

6469

+-	/*

6470

+-	 * If there are no process references on the new_bfqq, then it is

6471

+-	 * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain

6472

+-	 * may have dropped their last reference (not just their last process

6473

+-	 * reference).

6474

+-	 */

6475

+-	if (!bfqq_process_refs(new_bfqq))

6476

+-		return;

6477

+-

6478

+-	/* Avoid a circular list and skip interim queue merges. */

6479

+-	while ((__bfqq = new_bfqq->new_bfqq)) {

6480

+-		if (__bfqq == bfqq)

6481

+-			return;

6482

+-		new_bfqq = __bfqq;

6483

+-	}

6484

+-

6485

+-	process_refs = bfqq_process_refs(bfqq);

6486

+-	new_process_refs = bfqq_process_refs(new_bfqq);

6487

+-	/*

6488

+-	 * If the process for the bfqq has gone away, there is no

6489

+-	 * sense in merging the queues.

6490

+-	 */

6491

+-	if (process_refs == 0 || new_process_refs == 0)

6492

+-		return;

6493

+-

6494

+-	/*

6495

+-	 * Merge in the direction of the lesser amount of work.

6496

+-	 */

6497

+-	if (new_process_refs >= process_refs) {

6498

+-		bfqq->new_bfqq = new_bfqq;

6499

+-		atomic_add(process_refs, &new_bfqq->ref);

6500

+-	} else {

6501

+-		new_bfqq->new_bfqq = bfqq;

6502

+-		atomic_add(new_process_refs, &bfqq->ref);

6503

+-	}

6504

+-	bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",

6505

+-		new_bfqq->pid);

6506

+-}

6507

+-

6508

+ static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq)

6509

+ {

6510

+ 	struct bfq_entity *entity = &bfqq->entity;

6511

+@@ -1493,6 +1696,14 @@ static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)

6512

+  *   is likely to boost the disk throughput);

6513

+  * - the queue is weight-raised (waiting for the request is necessary for

6514

+  *   providing the queue with fairness and latency guarantees).

6515

++ *

6516

++ * In any case, idling can be disabled for cooperation issues, if

6517

++ * 1) there is a close cooperator for the queue, or

6518

++ * 2) the queue is shared and some cooperator is likely to be idle (in this

6519

++ *    case, by not arming the idle timer, we try to slow down the queue, to

6520

++ *    prevent the zones of the disk accessed by the active cooperators to

6521

++ *    become too distant from the zone that will be accessed by the currently

6522

++ *    idle cooperators).

6523

+  */

6524

+ static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq,

6525

+ 				      int budg_timeout)

6526

+@@ -1507,7 +1718,7 @@ static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq,

6527

+ 		(bfqd->rq_in_driver == 0 ||

6528

+ 				budg_timeout ||

6529

+                                 bfqq->raising_coeff > 1) &&

6530

+-                !bfq_close_cooperator(bfqd, bfqq) &&

6531

++                !bfq_close_cooperator(bfqd, bfqq, bfqd->last_position) &&

6532

+                 (!bfq_bfqq_coop(bfqq) ||

6533

+ 			!bfq_bfqq_some_coop_idle(bfqq)) &&

6534

+ 		!bfq_queue_nonrot_noidle(bfqd, bfqq));

6535

+@@ -1519,7 +1730,7 @@ static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq,

6536

+  */

6537

+ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)

6538

+ {

6539

+-	struct bfq_queue *bfqq, *new_bfqq = NULL;

6540

++	struct bfq_queue *bfqq;

6541

+ 	struct request *next_rq;

6542

+ 	enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;

6543

+ 	int budg_timeout;

6544

+@@ -1530,17 +1741,6 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)

6545

+

6546

+ 	bfq_log_bfqq(bfqd, bfqq, "select_queue: already active queue");

6547

+

6548

+-	/*

6549

+-         * If another queue has a request waiting within our mean seek

6550

+-         * distance, let it run. The expire code will check for close

6551

+-         * cooperators and put the close queue at the front of the

6552

+-         * service tree. If possible, merge the expiring queue with the

6553

+-         * new bfqq.

6554

+-         */

6555

+-        new_bfqq = bfq_close_cooperator(bfqd, bfqq);

6556

+-        if (new_bfqq != NULL && bfqq->new_bfqq == NULL)

6557

+-                bfq_setup_merge(bfqq, new_bfqq);

6558

+-

6559

+ 	budg_timeout = bfq_may_expire_for_budg_timeout(bfqq);

6560

+ 	if (budg_timeout &&

6561

+ 	    !bfq_bfqq_must_idle(bfqq, budg_timeout))

6562

+@@ -1577,10 +1777,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)

6563

+ 				bfq_clear_bfqq_wait_request(bfqq);

6564

+ 				del_timer(&bfqd->idle_slice_timer);

6565

+ 			}

6566

+-			if (new_bfqq == NULL)

6567

+-				goto keep_queue;

6568

+-			else

6569

+-				goto expire;

6570

++			goto keep_queue;

6571

+ 		}

6572

+ 	}

6573

+

6574

+@@ -1589,26 +1786,19 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)

6575

+ 	 * queue still has requests in flight or is idling for a new request,

6576

+ 	 * then keep it.

6577

+ 	 */

6578

+-	if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) ||

6579

++	if (timer_pending(&bfqd->idle_slice_timer) ||

6580

+ 	    (bfqq->dispatched != 0 &&

6581

+ 	     (bfq_bfqq_idle_window(bfqq) || bfqq->raising_coeff > 1) &&

6582

+-	     !bfq_queue_nonrot_noidle(bfqd, bfqq)))) {

6583

++	     !bfq_queue_nonrot_noidle(bfqd, bfqq))) {

6584

+ 		bfqq = NULL;

6585

+ 		goto keep_queue;

6586

+-	} else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) {

6587

+-		/*

6588

+-		 * Expiring the queue because there is a close cooperator,

6589

+-		 * cancel timer.

6590

+-		 */

6591

+-		bfq_clear_bfqq_wait_request(bfqq);

6592

+-		del_timer(&bfqd->idle_slice_timer);

6593

+ 	}

6594

+

6595

+ 	reason = BFQ_BFQQ_NO_MORE_REQUESTS;

6596

+ expire:

6597

+ 	bfq_bfqq_expire(bfqd, bfqq, 0, reason);

6598

+ new_queue:

6599

+-	bfqq = bfq_set_active_queue(bfqd, new_bfqq);

6600

++	bfqq = bfq_set_active_queue(bfqd);

6601

+ 	bfq_log(bfqd, "select_queue: new queue %d returned",

6602

+ 		bfqq != NULL ? bfqq->pid : 0);

6603

+ keep_queue:

6604

+@@ -1617,9 +1807,8 @@ keep_queue:

6605

+

6606

+ static void update_raising_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)

6607

+ {

6608

++	struct bfq_entity *entity = &bfqq->entity;

6609

+ 	if (bfqq->raising_coeff > 1) { /* queue is being boosted */

6610

+-		struct bfq_entity *entity = &bfqq->entity;

6611

+-

6612

+ 		bfq_log_bfqq(bfqd, bfqq,

6613

+ 			"raising period dur %u/%u msec, "

6614

+ 			"old raising coeff %u, w %d(%d)",

6615

+@@ -1656,12 +1845,14 @@ static void update_raising_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)

6616

+ 					     jiffies_to_msecs(bfqq->

6617

+ 						raising_cur_max_time));

6618

+ 				bfq_bfqq_end_raising(bfqq);

6619

+-				__bfq_entity_update_weight_prio(

6620

+-					bfq_entity_service_tree(entity),

6621

+-					entity);

6622

+ 			}

6623

+ 		}

6624

+ 	}

6625

++	/* Update weight both if it must be raised and if it must be lowered */

6626

++	if ((entity->weight > entity->orig_weight) != (bfqq->raising_coeff > 1))

6627

++		__bfq_entity_update_weight_prio(

6628

++			bfq_entity_service_tree(entity),

6629

++			entity);

6630

+ }

6631

+

6632

+ /*

6633

+@@ -1901,6 +2092,25 @@ static void bfq_init_icq(struct io_cq *icq)

6634

+ 	struct bfq_io_cq *bic = icq_to_bic(icq);

6635

+

6636

+ 	bic->ttime.last_end_request = jiffies;

6637

++	/*

6638

++	 * A newly created bic indicates that the process has just

6639

++	 * started doing I/O, and is probably mapping into memory its

6640

++	 * executable and libraries: it definitely needs weight raising.

6641

++	 * There is however the possibility that the process performs,

6642

++	 * for a while, I/O close to some other process. EQM intercepts

6643

++	 * this behavior and may merge the queue corresponding to the

6644

++	 * process  with some other queue, BEFORE the weight of the queue

6645

++	 * is raised. Merged queues are not weight-raised (they are assumed

6646

++	 * to belong to processes that benefit only from high throughput).

6647

++	 * If the merge is basically the consequence of an accident, then

6648

++	 * the queue will be split soon and will get back its old weight.

6649

++	 * It is then important to write down somewhere that this queue

6650

++	 * does need weight raising, even if it did not make it to get its

6651

++	 * weight raised before being merged. To this purpose, we overload

6652

++	 * the field raising_time_left and assign 1 to it, to mark the queue

6653

++	 * as needing weight raising.

6654

++	 */

6655

++	bic->raising_time_left = 1;

6656

+ }

6657

+

6658

+ static void bfq_exit_icq(struct io_cq *icq)

6659

+@@ -1914,6 +2124,13 @@ static void bfq_exit_icq(struct io_cq *icq)

6660

+ 	}

6661

+

6662

+ 	if (bic->bfqq[BLK_RW_SYNC]) {

6663

++		/*

6664

++		 * If the bic is using a shared queue, put the reference

6665

++		 * taken on the io_context when the bic started using a

6666

++		 * shared bfq_queue.

6667

++		 */

6668

++		if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC]))

6669

++			put_io_context(icq->ioc);

6670

+ 		bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);

6671

+ 		bic->bfqq[BLK_RW_SYNC] = NULL;

6672

+ 	}

6673

+@@ -2211,6 +2428,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,

6674

+ 	if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))

6675

+ 		return;

6676

+

6677

++	/* Idle window just restored, statistics are meaningless. */

6678

++	if (bfq_bfqq_just_split(bfqq))

6679

++		return;

6680

++

6681

+ 	enable_idle = bfq_bfqq_idle_window(bfqq);

6682

+

6683

+ 	if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||

6684

+@@ -2251,6 +2472,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,

6685

+ 	if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||

6686

+ 	    !BFQQ_SEEKY(bfqq))

6687

+ 		bfq_update_idle_window(bfqd, bfqq, bic);

6688

++	bfq_clear_bfqq_just_split(bfqq);

6689

+

6690

+ 	bfq_log_bfqq(bfqd, bfqq,

6691

+ 		     "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",

6692

+@@ -2302,13 +2524,45 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,

6693

+ static void bfq_insert_request(struct request_queue *q, struct request *rq)

6694

+ {

6695

+ 	struct bfq_data *bfqd = q->elevator->elevator_data;

6696

+-	struct bfq_queue *bfqq = RQ_BFQQ(rq);

6697

++	struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq;

6698

+

6699

+ 	assert_spin_locked(bfqd->queue->queue_lock);

6700

++

6701

++	/*

6702

++	 * An unplug may trigger a requeue of a request from the device

6703

++	 * driver: make sure we are in process context while trying to

6704

++	 * merge two bfq_queues.

6705

++	 */

6706

++	if (!in_interrupt() &&

6707

++	    (new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true))) {

6708

++		if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)

6709

++			new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1);

6710

++		/*

6711

++		 * Release the request's reference to the old bfqq

6712

++		 * and make sure one is taken to the shared queue.

6713

++		 */

6714

++		new_bfqq->allocated[rq_data_dir(rq)]++;

6715

++		bfqq->allocated[rq_data_dir(rq)]--;

6716

++		atomic_inc(&new_bfqq->ref);

6717

++		bfq_put_queue(bfqq);

6718

++		if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)

6719

++			bfq_merge_bfqqs(bfqd, RQ_BIC(rq), bfqq, new_bfqq);

6720

++		rq->elv.priv[1] = new_bfqq;

6721

++		bfqq = new_bfqq;

6722

++	}

6723

++

6724

+ 	bfq_init_prio_data(bfqq, RQ_BIC(rq));

6725

+

6726

+ 	bfq_add_rq_rb(rq);

6727

+

6728

++	/*

6729

++	 * Here a newly-created bfq_queue has already started a weight-raising

6730

++	 * period: clear raising_time_left to prevent bfq_bfqq_save_state()

6731

++	 * from assigning it a full weight-raising period. See the detailed

6732

++	 * comments about this field in bfq_init_icq().

6733

++	 */

6734

++	if (bfqq->bic != NULL)

6735

++		bfqq->bic->raising_time_left = 0;

6736

+ 	rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]);

6737

+ 	list_add_tail(&rq->queuelist, &bfqq->fifo);

6738

+

6739

+@@ -2371,15 +2625,6 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)

6740

+ 		if (bfq_bfqq_budget_new(bfqq))

6741

+ 			bfq_set_budget_timeout(bfqd);

6742

+

6743

+-		/* Idling is disabled also for cooperation issues:

6744

+-		 * 1) there is a close cooperator for the queue, or

6745

+-		 * 2) the queue is shared and some cooperator is likely

6746

+-		 *    to be idle (in this case, by not arming the idle timer,

6747

+-		 *    we try to slow down the queue, to prevent the zones

6748

+-		 *    of the disk accessed by the active cooperators to become

6749

+-		 *    too distant from the zone that will be accessed by the

6750

+-		 *    currently idle cooperators)

6751

+-		 */

6752

+ 		if (bfq_bfqq_must_idle(bfqq, budg_timeout))

6753

+ 			bfq_arm_slice_timer(bfqd);

6754

+ 		else if (budg_timeout)

6755

+@@ -2449,18 +2694,6 @@ static void bfq_put_request(struct request *rq)

6756

+ 	}

6757

+ }

6758

+

6759

+-static struct bfq_queue *

6760

+-bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,

6761

+-                struct bfq_queue *bfqq)

6762

+-{

6763

+-        bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",

6764

+-		(long unsigned)bfqq->new_bfqq->pid);

6765

+-        bic_set_bfqq(bic, bfqq->new_bfqq, 1);

6766

+-        bfq_mark_bfqq_coop(bfqq->new_bfqq);

6767

+-        bfq_put_queue(bfqq);

6768

+-        return bic_to_bfqq(bic, 1);

6769

+-}

6770

+-

6771

+ /*

6772

+  * Returns NULL if a new bfqq should be allocated, or the old bfqq if this

6773

+  * was the last process referring to said bfqq.

6774

+@@ -2469,6 +2702,9 @@ static struct bfq_queue *

6775

+ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)

6776

+ {

6777

+ 	bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");

6778

++

6779

++	put_io_context(bic->icq.ioc);

6780

++

6781

+ 	if (bfqq_process_refs(bfqq) == 1) {

6782

+ 		bfqq->pid = current->pid;

6783

+ 		bfq_clear_bfqq_some_coop_idle(bfqq);

6784

+@@ -2498,6 +2734,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,

6785

+ 	struct bfq_queue *bfqq;

6786

+ 	struct bfq_group *bfqg;

6787

+ 	unsigned long flags;

6788

++	bool split = false;

6789

+

6790

+ 	might_sleep_if(gfp_mask & __GFP_WAIT);

6791

+

6792

+@@ -2516,24 +2753,14 @@ new_queue:

6793

+ 		bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask);

6794

+ 		bic_set_bfqq(bic, bfqq, is_sync);

6795

+ 	} else {

6796

+-		/*

6797

+-		 * If the queue was seeky for too long, break it apart.

6798

+-		 */

6799

++		/* If the queue was seeky for too long, break it apart. */

6800

+ 		if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {

6801

+ 			bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");

6802

+ 			bfqq = bfq_split_bfqq(bic, bfqq);

6803

++			split = true;

6804

+ 			if (!bfqq)

6805

+ 				goto new_queue;

6806

+ 		}

6807

+-

6808

+-		/*

6809

+-		 * Check to see if this queue is scheduled to merge with

6810

+-		 * another closely cooperating queue. The merging of queues

6811

+-		 * happens here as it must be done in process context.

6812

+-		 * The reference on new_bfqq was taken in merge_bfqqs.

6813

+-		 */

6814

+-		if (bfqq->new_bfqq != NULL)

6815

+-			bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq);

6816

+ 	}

6817

+

6818

+ 	bfqq->allocated[rw]++;

6819

+@@ -2544,6 +2771,26 @@ new_queue:

6820

+ 	rq->elv.priv[0] = bic;

6821

+ 	rq->elv.priv[1] = bfqq;

6822

+

6823

++	/*

6824

++	 * If a bfq_queue has only one process reference, it is owned

6825

++	 * by only one bfq_io_cq: we can set the bic field of the

6826

++	 * bfq_queue to the address of that structure. Also, if the

6827

++	 * queue has just been split, mark a flag so that the

6828

++	 * information is available to the other scheduler hooks.

6829

++	 */

6830

++	if (bfqq_process_refs(bfqq) == 1) {

6831

++		bfqq->bic = bic;

6832

++		if (split) {

6833

++			bfq_mark_bfqq_just_split(bfqq);

6834

++			/*

6835

++			 * If the queue has just been split from a shared queue,

6836

++			 * restore the idle window and the possible weight

6837

++			 * raising period.

6838

++			 */

6839

++			bfq_bfqq_resume_state(bfqq, bic);

6840

++		}

6841

++	}

6842

++

6843

+ 	spin_unlock_irqrestore(q->queue_lock, flags);

6844

+

6845

+ 	return 0;

6846

+diff --git a/block/bfq-sched.c b/block/bfq-sched.c

6847

+index 03f8061..a0edaa2 100644

6848

+--- a/block/bfq-sched.c

6849

++++ b/block/bfq-sched.c

6850

+@@ -978,34 +978,6 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)

6851

+ 	return bfqq;

6852

+ }

6853

+

6854

+-/*

6855

+- * Forced extraction of the given queue.

6856

+- */

6857

+-static void bfq_get_next_queue_forced(struct bfq_data *bfqd,

6858

+-				      struct bfq_queue *bfqq)

6859

+-{

6860

+-	struct bfq_entity *entity;

6861

+-	struct bfq_sched_data *sd;

6862

+-

6863

+-	BUG_ON(bfqd->active_queue != NULL);

6864

+-

6865

+-	entity = &bfqq->entity;

6866

+-	/*

6867

+-	 * Bubble up extraction/update from the leaf to the root.

6868

+-	*/

6869

+-	for_each_entity(entity) {

6870

+-		sd = entity->sched_data;

6871

+-		bfq_update_budget(entity);

6872

+-		bfq_update_vtime(bfq_entity_service_tree(entity));

6873

+-		bfq_active_extract(bfq_entity_service_tree(entity), entity);

6874

+-		sd->active_entity = entity;

6875

+-		sd->next_active = NULL;

6876

+-		entity->service = 0;

6877

+-	}

6878

+-

6879

+-	return;

6880

+-}

6881

+-

6882

+ static void __bfq_bfqd_reset_active(struct bfq_data *bfqd)

6883

+ {

6884

+ 	if (bfqd->active_bic != NULL) {

6885

+diff --git a/block/bfq.h b/block/bfq.h

6886

+index b4e9ab0..ca5b444 100644

6887

+--- a/block/bfq.h

6888

++++ b/block/bfq.h

6889

+@@ -188,6 +188,8 @@ struct bfq_group;

6890

+  * @pid: pid of the process owning the queue, used for logging purposes.

6891

+  * @last_rais_start_time: last (idle -> weight-raised) transition attempt

6892

+  * @raising_cur_max_time: current max raising time for this queue

6893

++ * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the

6894

++ *	 queue is shared

6895

+  *

6896

+  * A bfq_queue is a leaf request queue; it can be associated to an io_context

6897

+  * or more (if it is an async one).  @cgroup holds a reference to the

6898

+@@ -231,6 +233,7 @@ struct bfq_queue {

6899

+ 	sector_t last_request_pos;

6900

+

6901

+ 	pid_t pid;

6902

++	struct bfq_io_cq *bic;

6903

+

6904

+ 	/* weight-raising fields */

6905

+ 	unsigned int raising_cur_max_time;

6906

+@@ -257,12 +260,23 @@ struct bfq_ttime {

6907

+  * @icq: associated io_cq structure

6908

+  * @bfqq: array of two process queues, the sync and the async

6909

+  * @ttime: associated @bfq_ttime struct

6910

++ * @raising_time_left: snapshot of the time left before weight raising ends

6911

++ *		       for the sync queue associated to this process; this

6912

++ *		       snapshot is taken to remember this value while the weight

6913

++ *		       raising is suspended because the queue is merged with a

6914

++ *		       shared queue, and is used to set @raising_cur_max_time

6915

++ *		       when the queue is split from the shared queue and its

6916

++ *		       weight is raised again

6917

++ * @saved_idle_window: same purpose as the previous field for the idle window

6918

+  */

6919

+ struct bfq_io_cq {

6920

+ 	struct io_cq icq; /* must be the first member */

6921

+ 	struct bfq_queue *bfqq[2];

6922

+ 	struct bfq_ttime ttime;

6923

+ 	int ioprio;

6924

++

6925

++	unsigned int raising_time_left;

6926

++	unsigned int saved_idle_window;

6927

+ };

6928

+

6929

+ /**

6930

+@@ -403,6 +417,7 @@ enum bfqq_state_flags {

6931

+ 	BFQ_BFQQ_FLAG_coop,		/* bfqq is shared */

6932

+ 	BFQ_BFQQ_FLAG_split_coop,	/* shared bfqq will be splitted */

6933

+ 	BFQ_BFQQ_FLAG_some_coop_idle,   /* some cooperator is inactive */

6934

++	BFQ_BFQQ_FLAG_just_split,	/* queue has just been split */

6935

+ };

6936

+

6937

+ #define BFQ_BFQQ_FNS(name)						\

6938

+@@ -430,6 +445,7 @@ BFQ_BFQQ_FNS(budget_new);

6939

+ BFQ_BFQQ_FNS(coop);

6940

+ BFQ_BFQQ_FNS(split_coop);

6941

+ BFQ_BFQQ_FNS(some_coop_idle);

6942

++BFQ_BFQQ_FNS(just_split);

6943

+ #undef BFQ_BFQQ_FNS

6944

+

6945

+ /* Logging facilities. */

6946

+--

6947

+1.8.1.4

6948

+

Gentoo Archives: gentoo-commits