Gentoo Archives: gentoo-commits

From: "Richard Yao (ryao)" <ryao@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] gentoo-x86 commit in sys-fs/zfs/files: zfs-0.6.0_rc9-range-lock-caller-allocate.patch
Date: Mon, 25 Jun 2012 21:03:38
Message-Id: 20120625210327.40E5A2004B@flycatcher.gentoo.org
1 ryao 12/06/25 21:03:27
2
3 Modified: zfs-0.6.0_rc9-range-lock-caller-allocate.patch
4 Log:
5 Fix additional deadlock fix regression
6
7 (Portage version: 2.1.10.49/cvs/Linux x86_64)
8
9 Revision Changes Path
10 1.4 sys-fs/zfs/files/zfs-0.6.0_rc9-range-lock-caller-allocate.patch
11
12 file : http://sources.gentoo.org/viewvc.cgi/gentoo-x86/sys-fs/zfs/files/zfs-0.6.0_rc9-range-lock-caller-allocate.patch?rev=1.4&view=markup
13 plain: http://sources.gentoo.org/viewvc.cgi/gentoo-x86/sys-fs/zfs/files/zfs-0.6.0_rc9-range-lock-caller-allocate.patch?rev=1.4&content-type=text/plain
14 diff : http://sources.gentoo.org/viewvc.cgi/gentoo-x86/sys-fs/zfs/files/zfs-0.6.0_rc9-range-lock-caller-allocate.patch?r1=1.3&r2=1.4
15
16 Index: zfs-0.6.0_rc9-range-lock-caller-allocate.patch
17 ===================================================================
18 RCS file: /var/cvsroot/gentoo-x86/sys-fs/zfs/files/zfs-0.6.0_rc9-range-lock-caller-allocate.patch,v
19 retrieving revision 1.3
20 retrieving revision 1.4
21 diff -u -r1.3 -r1.4
22 --- zfs-0.6.0_rc9-range-lock-caller-allocate.patch 25 Jun 2012 20:21:55 -0000 1.3
23 +++ zfs-0.6.0_rc9-range-lock-caller-allocate.patch 25 Jun 2012 21:03:27 -0000 1.4
24 @@ -1,36 +1,28 @@
25 -From fc1f1d3940f4d2e5b1b85481d900d8198cf4b6f3 Mon Sep 17 00:00:00 2001
26 -From: Richard Yao <ryao@×××××××××××××.edu>
27 -Date: Mon, 25 Jun 2012 14:41:30 -0400
28 -Subject: [PATCH] Make callers responsible for memory allocation in
29 - zfs_range_lock()
30 +commit e7deab3edf6940f13013ca147c91472577223923
31 +Author: Richard Yao <ryao@×××××××××××××.edu>
32 +Date: Mon Jun 25 14:41:30 2012 -0400
33
34 -zfs_range_lock() is used in zvols, and previously, it could deadlock due
35 -to an allocation using KM_SLEEP. We avoid this by moving responsibility
36 -the memory allocation from zfs_range_lock() to the caller. This enables
37 -us to avoid such deadlocks and use stack allocations, which are more
38 -efficient and prevents deadlocks. The contexts in which stack
39 -allocations are done do not appear to be stack heavy, so we do not risk
40 -overflowing the stack from doing this.
41 -
42 -Signed-off-by: Richard Yao <ryao@×××××××××××××.edu>
43 -
44 -Conflicts:
45 -
46 - module/zfs/zvol.c
47 ----
48 - cmd/ztest/ztest.c | 32 +++++++++++++++++---------------
49 - include/sys/zfs_rlock.h | 2 +-
50 - module/zfs/zfs_rlock.c | 15 +++++++--------
51 - module/zfs/zfs_vnops.c | 30 ++++++++++++++++--------------
52 - module/zfs/zfs_znode.c | 30 +++++++++++++++---------------
53 - module/zfs/zvol.c | 24 +++++++++++++-----------
54 - 6 files changed, 69 insertions(+), 64 deletions(-)
55 + Make callers responsible for memory allocation in zfs_range_lock()
56 +
57 + zfs_range_lock() is used in zvols, and previously, it could deadlock due
58 + to an allocation using KM_SLEEP. We avoid this by moving responsibility
59 + the memory allocation from zfs_range_lock() to the caller. This enables
60 + us to avoid such deadlocks and use stack allocations, which are more
61 + efficient and prevents deadlocks. The contexts in which stack
62 + allocations are done do not appear to be stack heavy, so we do not risk
63 + overflowing the stack from doing this.
64 +
65 + Signed-off-by: Richard Yao <ryao@×××××××××××××.edu>
66 +
67 + Conflicts:
68 +
69 + module/zfs/zvol.c
70
71 diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c
72 index 72d511b..c5dd0c2 100644
73 --- a/cmd/ztest/ztest.c
74 +++ b/cmd/ztest/ztest.c
75 -@@ -973,12 +973,11 @@ enum ztest_object {
76 +@@ -973,12 +973,11 @@ ztest_object_unlock(ztest_ds_t *zd, uint64_t object)
77 }
78
79 static rl_t *
80 @@ -44,7 +36,7 @@
81
82 rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL);
83 rl->rl_object = object;
84 -@@ -1389,7 +1388,7 @@ enum ztest_object {
85 +@@ -1389,7 +1388,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
86 dmu_tx_t *tx;
87 dmu_buf_t *db;
88 arc_buf_t *abuf = NULL;
89 @@ -53,7 +45,7 @@
90
91 if (byteswap)
92 byteswap_uint64_array(lr, sizeof (*lr));
93 -@@ -1413,7 +1412,7 @@ enum ztest_object {
94 +@@ -1413,7 +1412,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
95 bt = NULL;
96
97 ztest_object_lock(zd, lr->lr_foid, RL_READER);
98 @@ -62,7 +54,7 @@
99
100 VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
101
102 -@@ -1438,7 +1437,7 @@ enum ztest_object {
103 +@@ -1438,7 +1437,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
104 if (abuf != NULL)
105 dmu_return_arcbuf(abuf);
106 dmu_buf_rele(db, FTAG);
107 @@ -71,7 +63,7 @@
108 ztest_object_unlock(zd, lr->lr_foid);
109 return (ENOSPC);
110 }
111 -@@ -1495,7 +1494,7 @@ enum ztest_object {
112 +@@ -1495,7 +1494,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
113
114 dmu_tx_commit(tx);
115
116 @@ -80,7 +72,7 @@
117 ztest_object_unlock(zd, lr->lr_foid);
118
119 return (0);
120 -@@ -1507,13 +1506,13 @@ enum ztest_object {
121 +@@ -1507,13 +1506,13 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap)
122 objset_t *os = zd->zd_os;
123 dmu_tx_t *tx;
124 uint64_t txg;
125 @@ -96,7 +88,7 @@
126 RL_WRITER);
127
128 tx = dmu_tx_create(os);
129 -@@ -1522,7 +1521,7 @@ enum ztest_object {
130 +@@ -1522,7 +1521,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap)
131
132 txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
133 if (txg == 0) {
134 @@ -105,7 +97,7 @@
135 ztest_object_unlock(zd, lr->lr_foid);
136 return (ENOSPC);
137 }
138 -@@ -1534,7 +1533,7 @@ enum ztest_object {
139 +@@ -1534,7 +1533,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap)
140
141 dmu_tx_commit(tx);
142
143 @@ -114,7 +106,7 @@
144 ztest_object_unlock(zd, lr->lr_foid);
145
146 return (0);
147 -@@ -1670,6 +1669,8 @@ enum ztest_object {
148 +@@ -1670,6 +1669,8 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
149 dmu_object_info_t doi;
150 dmu_buf_t *db;
151 zgd_t *zgd;
152 @@ -123,7 +115,7 @@
153 int error;
154
155 ztest_object_lock(zd, object, RL_READER);
156 -@@ -1694,9 +1695,10 @@ enum ztest_object {
157 +@@ -1694,9 +1695,10 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
158 zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL);
159 zgd->zgd_zilog = zd->zd_zilog;
160 zgd->zgd_private = zd;
161 @@ -135,7 +127,7 @@
162 RL_READER);
163
164 error = dmu_read(os, object, offset, size, buf,
165 -@@ -1711,7 +1713,7 @@ enum ztest_object {
166 +@@ -1711,7 +1713,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
167 offset = 0;
168 }
169
170 @@ -144,7 +136,7 @@
171 RL_READER);
172
173 error = dmu_buf_hold(os, object, offset, zgd, &db,
174 -@@ -1953,12 +1955,12 @@ enum ztest_object {
175 +@@ -1953,12 +1955,12 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
176 objset_t *os = zd->zd_os;
177 dmu_tx_t *tx;
178 uint64_t txg;
179 @@ -159,7 +151,7 @@
180
181 tx = dmu_tx_create(os);
182
183 -@@ -1974,7 +1976,7 @@ enum ztest_object {
184 +@@ -1974,7 +1976,7 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
185 (void) dmu_free_long_range(os, object, offset, size);
186 }
187
188 @@ -172,7 +164,7 @@
189 index da18b1f..85dc16a 100644
190 --- a/include/sys/zfs_rlock.h
191 +++ b/include/sys/zfs_rlock.h
192 -@@ -63,7 +63,7 @@
193 +@@ -63,7 +63,7 @@ typedef struct rl {
194 * is converted to WRITER that specified to lock from the start of the
195 * end of file. zfs_range_lock() returns the range lock structure.
196 */
197 @@ -198,7 +190,7 @@
198 *
199 * AVL tree
200 * --------
201 -@@ -420,13 +420,11 @@
202 +@@ -420,13 +420,11 @@ got_lock:
203 * previously locked as RL_WRITER).
204 */
205 rl_t *
206 @@ -213,7 +205,7 @@
207 new->r_zp = zp;
208 new->r_off = off;
209 if (len + off < off) /* overflow */
210 -@@ -531,7 +529,6 @@
211 +@@ -531,7 +529,6 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove, list_t *free_list)
212 }
213
214 mutex_exit(&zp->z_range_lock);
215 @@ -221,7 +213,7 @@
216 }
217 }
218
219 -@@ -572,7 +569,9 @@
220 +@@ -572,7 +569,9 @@ zfs_range_unlock(rl_t *rl)
221
222 while ((free_rl = list_head(&free_list)) != NULL) {
223 list_remove(&free_list, free_rl);
224 @@ -233,10 +225,10 @@
225
226 list_destroy(&free_list);
227 diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
228 -index 2da5fec..c8ca7c5 100644
229 +index 2da5fec..1ef5299 100644
230 --- a/module/zfs/zfs_vnops.c
231 +++ b/module/zfs/zfs_vnops.c
232 -@@ -370,7 +370,7 @@
233 +@@ -370,7 +370,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
234 objset_t *os;
235 ssize_t n, nbytes;
236 int error = 0;
237 @@ -245,7 +237,7 @@
238 #ifdef HAVE_UIO_ZEROCOPY
239 xuio_t *xuio = NULL;
240 #endif /* HAVE_UIO_ZEROCOPY */
241 -@@ -418,7 +418,7 @@
242 +@@ -418,7 +418,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
243 /*
244 * Lock the range against changes.
245 */
246 @@ -254,7 +246,7 @@
247
248 /*
249 * If we are reading past end-of-file we can skip
250 -@@ -482,7 +482,7 @@
251 +@@ -482,7 +482,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
252 n -= nbytes;
253 }
254 out:
255 @@ -263,7 +255,7 @@
256
257 ZFS_ACCESSTIME_STAMP(zsb, zp);
258 zfs_inode_update(zp);
259 -@@ -524,7 +524,7 @@
260 +@@ -524,7 +524,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
261 zilog_t *zilog;
262 offset_t woff;
263 ssize_t n, nbytes;
264 @@ -272,7 +264,7 @@
265 int max_blksz = zsb->z_max_blksz;
266 int error = 0;
267 arc_buf_t *abuf;
268 -@@ -608,9 +608,9 @@
269 +@@ -608,9 +608,9 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
270 * Obtain an appending range lock to guarantee file append
271 * semantics. We reset the write offset once we have the lock.
272 */
273 @@ -285,7 +277,7 @@
274 /*
275 * We overlocked the file because this write will cause
276 * the file block size to increase.
277 -@@ -625,11 +625,11 @@
278 +@@ -625,11 +625,11 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
279 * this write, then this range lock will lock the entire file
280 * so that we can re-write the block safely.
281 */
282 @@ -299,7 +291,7 @@
283 ZFS_EXIT(zsb);
284 return (EFBIG);
285 }
286 -@@ -719,7 +719,7 @@
287 +@@ -719,7 +719,7 @@ again:
288 * on the first iteration since zfs_range_reduce() will
289 * shrink down r_len to the appropriate size.
290 */
291 @@ -308,7 +300,7 @@
292 uint64_t new_blksz;
293
294 if (zp->z_blksz > max_blksz) {
295 -@@ -729,7 +729,7 @@
296 +@@ -729,7 +729,7 @@ again:
297 new_blksz = MIN(end_size, max_blksz);
298 }
299 zfs_grow_blocksize(zp, new_blksz, tx);
300 @@ -317,7 +309,7 @@
301 }
302
303 /*
304 -@@ -842,7 +842,7 @@
305 +@@ -842,7 +842,7 @@ again:
306 uio_prefaultpages(MIN(n, max_blksz), uio);
307 }
308
309 @@ -326,23 +318,23 @@
310
311 /*
312 * If we're in replay mode, or we made no progress, return error.
313 -@@ -915,6 +915,7 @@
314 - blkptr_t *bp = &lr->lr_blkptr;
315 - dmu_buf_t *db;
316 - zgd_t *zgd;
317 -+ rl_t rl;
318 - int error = 0;
319 +@@ -893,6 +893,7 @@ zfs_get_done(zgd_t *zgd, int error)
320 + if (error == 0 && zgd->zgd_bp)
321 + zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
322
323 - ASSERT(zio != NULL);
324 -@@ -935,6 +936,7 @@
325 ++ kmem_free(zgd->zgd_rl, sizeof (rl_t));
326 + kmem_free(zgd, sizeof (zgd_t));
327 + }
328 +
329 +@@ -935,6 +936,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
330 }
331
332 zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
333 -+ zgd->zgd_rl = &rl;
334 ++ zgd->zgd_rl = (rl_t *)kmem_zalloc(sizeof (rl_t), KM_SLEEP);
335 zgd->zgd_zilog = zsb->z_log;
336 zgd->zgd_private = zp;
337
338 -@@ -946,7 +948,7 @@
339 +@@ -946,7 +948,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
340 * we don't have to write the data twice.
341 */
342 if (buf != NULL) { /* immediate write */
343 @@ -351,7 +343,7 @@
344 /* test for truncation needs to be done while range locked */
345 if (offset >= zp->z_size) {
346 error = ENOENT;
347 -@@ -967,7 +969,7 @@
348 +@@ -967,7 +969,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
349 size = zp->z_blksz;
350 blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
351 offset -= blkoff;
352 @@ -364,7 +356,7 @@
353 index 3a6872f..e363839 100644
354 --- a/module/zfs/zfs_znode.c
355 +++ b/module/zfs/zfs_znode.c
356 -@@ -1158,20 +1158,20 @@
357 +@@ -1158,20 +1158,20 @@ zfs_extend(znode_t *zp, uint64_t end)
358 {
359 zfs_sb_t *zsb = ZTOZSB(zp);
360 dmu_tx_t *tx;
361 @@ -388,7 +380,7 @@
362 return (0);
363 }
364 top:
365 -@@ -1202,7 +1202,7 @@
366 +@@ -1202,7 +1202,7 @@ top:
367 goto top;
368 }
369 dmu_tx_abort(tx);
370 @@ -397,7 +389,7 @@
371 return (error);
372 }
373
374 -@@ -1214,7 +1214,7 @@
375 +@@ -1214,7 +1214,7 @@ top:
376 VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)),
377 &zp->z_size, sizeof (zp->z_size), tx));
378
379 @@ -406,7 +398,7 @@
380
381 dmu_tx_commit(tx);
382
383 -@@ -1235,19 +1235,19 @@
384 +@@ -1235,19 +1235,19 @@ static int
385 zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
386 {
387 zfs_sb_t *zsb = ZTOZSB(zp);
388 @@ -429,7 +421,7 @@
389 return (0);
390 }
391
392 -@@ -1256,7 +1256,7 @@
393 +@@ -1256,7 +1256,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
394
395 error = dmu_free_long_range(zsb->z_os, zp->z_id, off, len);
396
397 @@ -438,7 +430,7 @@
398
399 return (error);
400 }
401 -@@ -1275,7 +1275,7 @@
402 +@@ -1275,7 +1275,7 @@ zfs_trunc(znode_t *zp, uint64_t end)
403 {
404 zfs_sb_t *zsb = ZTOZSB(zp);
405 dmu_tx_t *tx;
406 @@ -447,7 +439,7 @@
407 int error;
408 sa_bulk_attr_t bulk[2];
409 int count = 0;
410 -@@ -1283,19 +1283,19 @@
411 +@@ -1283,19 +1283,19 @@ zfs_trunc(znode_t *zp, uint64_t end)
412 /*
413 * We will change zp_size, lock the whole file.
414 */
415 @@ -470,7 +462,7 @@
416 return (error);
417 }
418 top:
419 -@@ -1310,7 +1310,7 @@
420 +@@ -1310,7 +1310,7 @@ top:
421 goto top;
422 }
423 dmu_tx_abort(tx);
424 @@ -479,7 +471,7 @@
425 return (error);
426 }
427
428 -@@ -1327,7 +1327,7 @@
429 +@@ -1327,7 +1327,7 @@ top:
430
431 dmu_tx_commit(tx);
432
433 @@ -492,7 +484,7 @@
434 index 125d58d..bbe53d9 100644
435 --- a/module/zfs/zvol.c
436 +++ b/module/zfs/zvol.c
437 -@@ -537,7 +537,7 @@
438 +@@ -537,7 +537,7 @@ zvol_write(void *arg)
439 uint64_t size = blk_rq_bytes(req);
440 int error = 0;
441 dmu_tx_t *tx;
442 @@ -501,7 +493,7 @@
443
444 if (req->cmd_flags & VDEV_REQ_FLUSH)
445 zil_commit(zv->zv_zilog, ZVOL_OBJ);
446 -@@ -550,7 +550,7 @@
447 +@@ -550,7 +550,7 @@ zvol_write(void *arg)
448 return;
449 }
450
451 @@ -510,7 +502,7 @@
452
453 tx = dmu_tx_create(zv->zv_objset);
454 dmu_tx_hold_write(tx, ZVOL_OBJ, offset, size);
455 -@@ -559,7 +559,7 @@
456 +@@ -559,7 +559,7 @@ zvol_write(void *arg)
457 error = dmu_tx_assign(tx, TXG_WAIT);
458 if (error) {
459 dmu_tx_abort(tx);
460 @@ -519,7 +511,7 @@
461 blk_end_request(req, -error, size);
462 return;
463 }
464 -@@ -570,7 +570,7 @@
465 +@@ -570,7 +570,7 @@ zvol_write(void *arg)
466 req->cmd_flags & VDEV_REQ_FUA);
467
468 dmu_tx_commit(tx);
469 @@ -528,7 +520,7 @@
470
471 if ((req->cmd_flags & VDEV_REQ_FUA) ||
472 zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)
473 -@@ -589,7 +589,7 @@
474 +@@ -589,7 +589,7 @@ zvol_discard(void *arg)
475 uint64_t offset = blk_rq_pos(req) << 9;
476 uint64_t size = blk_rq_bytes(req);
477 int error;
478 @@ -537,7 +529,7 @@
479
480 if (offset + size > zv->zv_volsize) {
481 blk_end_request(req, -EIO, size);
482 -@@ -601,7 +601,7 @@
483 +@@ -601,7 +601,7 @@ zvol_discard(void *arg)
484 return;
485 }
486
487 @@ -546,7 +538,7 @@
488
489 error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, size);
490
491 -@@ -609,7 +609,7 @@
492 +@@ -609,7 +609,7 @@ zvol_discard(void *arg)
493 * TODO: maybe we should add the operation to the log.
494 */
495
496 @@ -555,7 +547,7 @@
497
498 blk_end_request(req, -error, size);
499 }
500 -@@ -630,18 +630,18 @@
501 +@@ -630,18 +630,18 @@ zvol_read(void *arg)
502 uint64_t offset = blk_rq_pos(req) << 9;
503 uint64_t size = blk_rq_bytes(req);
504 int error;
505 @@ -577,7 +569,7 @@
506
507 /* convert checksum errors into IO errors */
508 if (error == ECKSUM)
509 -@@ -744,6 +744,7 @@
510 +@@ -744,6 +744,7 @@ zvol_get_done(zgd_t *zgd, int error)
511 if (error == 0 && zgd->zgd_bp)
512 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
513
514 @@ -585,7 +577,7 @@
515 kmem_free(zgd, sizeof (zgd_t));
516 }
517
518 -@@ -766,7 +767,8 @@
519 +@@ -766,7 +767,8 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
520
521 zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
522 zgd->zgd_zilog = zv->zv_zilog;
523 @@ -595,6 +587,3 @@
524
525 /*
526 * Write records come in two flavors: immediate and indirect.
527 ---
528 -1.7.10
529 -