1 |
Author: dsd |
2 |
Date: 2008-11-28 15:37:18 +0000 (Fri, 28 Nov 2008) |
3 |
New Revision: 1412 |
4 |
|
5 |
Modified: |
6 |
genpatches-2.6/trunk/2.6.27/1500_inotify-watch-removal-race.patch |
7 |
Log: |
8 |
right patch file, thanks to Tiago Cunha |
9 |
|
10 |
Modified: genpatches-2.6/trunk/2.6.27/1500_inotify-watch-removal-race.patch |
11 |
=================================================================== |
12 |
--- genpatches-2.6/trunk/2.6.27/1500_inotify-watch-removal-race.patch 2008-11-27 19:38:38 UTC (rev 1411) |
13 |
+++ genpatches-2.6/trunk/2.6.27/1500_inotify-watch-removal-race.patch 2008-11-28 15:37:18 UTC (rev 1412) |
14 |
@@ -1,13 +1,567 @@ |
15 |
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www/w3.porg/TR/html4/strict.dtd"> |
16 |
-<!-- git web interface version 1.4.4.2.492.ga748, (C) 2005-2006, Kay Sievers <kay.sievers@××××.org>, Christian Gierke --> |
17 |
-<!-- git core binaries version 1.6.0.2 --> |
18 |
-<head> |
19 |
-<meta http-equiv="content-type" content="; charset=utf-8"/> |
20 |
-<meta name="generator" content="gitweb/1.4.4.2.492.ga748 git/1.6.0.2"/> |
21 |
-<meta name="robots" content="index, nofollow"/> |
22 |
-<meta http-equiv="refresh" content="0"/> |
23 |
-<title></title> |
24 |
-</head> |
25 |
-<body> |
26 |
-Generating....</body> |
27 |
-</html> |
28 |
+From: Al Viro <viro@×××××××××××××××.uk> |
29 |
+Date: Sat, 15 Nov 2008 01:15:43 +0000 (+0000) |
30 |
+Subject: Fix inotify watch removal/umount races |
31 |
+X-Git-Tag: v2.6.28-rc5~1 |
32 |
+X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=8f7b0ba1c853919b85b54774775f567f30006107 |
33 |
+ |
34 |
+Fix inotify watch removal/umount races |
35 |
+ |
36 |
+Inotify watch removals suck violently. |
37 |
+ |
38 |
+To kick the watch out we need (in this order) inode->inotify_mutex and |
39 |
+ih->mutex. That's fine if we have a hold on inode; however, for all |
40 |
+other cases we need to make damn sure we don't race with umount. We can |
41 |
+*NOT* just grab a reference to a watch - inotify_unmount_inodes() will |
42 |
+happily sail past it and we'll end with reference to inode potentially |
43 |
+outliving its superblock. |
44 |
+ |
45 |
+Ideally we just want to grab an active reference to superblock if we |
46 |
+can; that will make sure we won't go into inotify_umount_inodes() until |
47 |
+we are done. Cleanup is just deactivate_super(). |
48 |
+ |
49 |
+However, that leaves a messy case - what if we *are* racing with |
50 |
+umount() and active references to superblock can't be acquired anymore? |
51 |
+We can bump ->s_count, grab ->s_umount, which will almost certainly wait |
52 |
+until the superblock is shut down and the watch in question is pining |
53 |
+for fjords. That's fine, but there is a problem - we might have hit the |
54 |
+window between ->s_active getting to 0 / ->s_count - below S_BIAS (i.e. |
55 |
+the moment when superblock is past the point of no return and is heading |
56 |
+for shutdown) and the moment when deactivate_super() acquires |
57 |
+->s_umount. |
58 |
+ |
59 |
+We could just do drop_super() yield() and retry, but that's rather |
60 |
+antisocial and this stuff is luser-triggerable. OTOH, having grabbed |
61 |
+->s_umount and having found that we'd got there first (i.e. that |
62 |
+->s_root is non-NULL) we know that we won't race with |
63 |
+inotify_umount_inodes(). |
64 |
+ |
65 |
+So we could grab a reference to watch and do the rest as above, just |
66 |
+with drop_super() instead of deactivate_super(), right? Wrong. We had |
67 |
+to drop ih->mutex before we could grab ->s_umount. So the watch |
68 |
+could've been gone already. |
69 |
+ |
70 |
+That still can be dealt with - we need to save watch->wd, do idr_find() |
71 |
+and compare its result with our pointer. If they match, we either have |
72 |
+the damn thing still alive or we'd lost not one but two races at once, |
73 |
+the watch had been killed and a new one got created with the same ->wd |
74 |
+at the same address. That couldn't have happened in inotify_destroy(), |
75 |
+but inotify_rm_wd() could run into that. Still, "new one got created" |
76 |
+is not a problem - we have every right to kill it or leave it alone, |
77 |
+whatever's more convenient. |
78 |
+ |
79 |
+So we can use idr_find(...) == watch && watch->inode->i_sb == sb as |
80 |
+"grab it and kill it" check. If it's been our original watch, we are |
81 |
+fine, if it's a newcomer - nevermind, just pretend that we'd won the |
82 |
+race and kill the fscker anyway; we are safe since we know that its |
83 |
+superblock won't be going away. |
84 |
+ |
85 |
+And yes, this is far beyond mere "not very pretty"; so's the entire |
86 |
+concept of inotify to start with. |
87 |
+ |
88 |
+Signed-off-by: Al Viro <viro@×××××××××××××××.uk> |
89 |
+Acked-by: Greg KH <greg@×××××.com> |
90 |
+Signed-off-by: Linus Torvalds <torvalds@××××××××××××××××.org> |
91 |
+--- |
92 |
+ |
93 |
+diff --git a/fs/inotify.c b/fs/inotify.c |
94 |
+index 690e725..7bbed1b 100644 |
95 |
+--- a/fs/inotify.c |
96 |
++++ b/fs/inotify.c |
97 |
+@@ -106,6 +106,20 @@ void get_inotify_watch(struct inotify_watch *watch) |
98 |
+ } |
99 |
+ EXPORT_SYMBOL_GPL(get_inotify_watch); |
100 |
+ |
101 |
++int pin_inotify_watch(struct inotify_watch *watch) |
102 |
++{ |
103 |
++ struct super_block *sb = watch->inode->i_sb; |
104 |
++ spin_lock(&sb_lock); |
105 |
++ if (sb->s_count >= S_BIAS) { |
106 |
++ atomic_inc(&sb->s_active); |
107 |
++ spin_unlock(&sb_lock); |
108 |
++ atomic_inc(&watch->count); |
109 |
++ return 1; |
110 |
++ } |
111 |
++ spin_unlock(&sb_lock); |
112 |
++ return 0; |
113 |
++} |
114 |
++ |
115 |
+ /** |
116 |
+ * put_inotify_watch - decrements the ref count on a given watch. cleans up |
117 |
+ * watch references if the count reaches zero. inotify_watch is freed by |
118 |
+@@ -124,6 +138,13 @@ void put_inotify_watch(struct inotify_watch *watch) |
119 |
+ } |
120 |
+ EXPORT_SYMBOL_GPL(put_inotify_watch); |
121 |
+ |
122 |
++void unpin_inotify_watch(struct inotify_watch *watch) |
123 |
++{ |
124 |
++ struct super_block *sb = watch->inode->i_sb; |
125 |
++ put_inotify_watch(watch); |
126 |
++ deactivate_super(sb); |
127 |
++} |
128 |
++ |
129 |
+ /* |
130 |
+ * inotify_handle_get_wd - returns the next WD for use by the given handle |
131 |
+ * |
132 |
+@@ -479,6 +500,112 @@ void inotify_init_watch(struct inotify_watch *watch) |
133 |
+ } |
134 |
+ EXPORT_SYMBOL_GPL(inotify_init_watch); |
135 |
+ |
136 |
++/* |
137 |
++ * Watch removals suck violently. To kick the watch out we need (in this |
138 |
++ * order) inode->inotify_mutex and ih->mutex. That's fine if we have |
139 |
++ * a hold on inode; however, for all other cases we need to make damn sure |
140 |
++ * we don't race with umount. We can *NOT* just grab a reference to a |
141 |
++ * watch - inotify_unmount_inodes() will happily sail past it and we'll end |
142 |
++ * with reference to inode potentially outliving its superblock. Ideally |
143 |
++ * we just want to grab an active reference to superblock if we can; that |
144 |
++ * will make sure we won't go into inotify_umount_inodes() until we are |
145 |
++ * done. Cleanup is just deactivate_super(). However, that leaves a messy |
146 |
++ * case - what if we *are* racing with umount() and active references to |
147 |
++ * superblock can't be acquired anymore? We can bump ->s_count, grab |
148 |
++ * ->s_umount, which will almost certainly wait until the superblock is shut |
149 |
++ * down and the watch in question is pining for fjords. That's fine, but |
150 |
++ * there is a problem - we might have hit the window between ->s_active |
151 |
++ * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock |
152 |
++ * is past the point of no return and is heading for shutdown) and the |
153 |
++ * moment when deactivate_super() acquires ->s_umount. We could just do |
154 |
++ * drop_super() yield() and retry, but that's rather antisocial and this |
155 |
++ * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having |
156 |
++ * found that we'd got there first (i.e. that ->s_root is non-NULL) we know |
157 |
++ * that we won't race with inotify_umount_inodes(). So we could grab a |
158 |
++ * reference to watch and do the rest as above, just with drop_super() instead |
159 |
++ * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we |
160 |
++ * could grab ->s_umount. So the watch could've been gone already. |
161 |
++ * |
162 |
++ * That still can be dealt with - we need to save watch->wd, do idr_find() |
163 |
++ * and compare its result with our pointer. If they match, we either have |
164 |
++ * the damn thing still alive or we'd lost not one but two races at once, |
165 |
++ * the watch had been killed and a new one got created with the same ->wd |
166 |
++ * at the same address. That couldn't have happened in inotify_destroy(), |
167 |
++ * but inotify_rm_wd() could run into that. Still, "new one got created" |
168 |
++ * is not a problem - we have every right to kill it or leave it alone, |
169 |
++ * whatever's more convenient. |
170 |
++ * |
171 |
++ * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as |
172 |
++ * "grab it and kill it" check. If it's been our original watch, we are |
173 |
++ * fine, if it's a newcomer - nevermind, just pretend that we'd won the |
174 |
++ * race and kill the fscker anyway; we are safe since we know that its |
175 |
++ * superblock won't be going away. |
176 |
++ * |
177 |
++ * And yes, this is far beyond mere "not very pretty"; so's the entire |
178 |
++ * concept of inotify to start with. |
179 |
++ */ |
180 |
++ |
181 |
++/** |
182 |
++ * pin_to_kill - pin the watch down for removal |
183 |
++ * @ih: inotify handle |
184 |
++ * @watch: watch to kill |
185 |
++ * |
186 |
++ * Called with ih->mutex held, drops it. Possible return values: |
187 |
++ * 0 - nothing to do, it has died |
188 |
++ * 1 - remove it, drop the reference and deactivate_super() |
189 |
++ * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid |
190 |
++ * that variant, since it involved a lot of PITA, but that's the best that |
191 |
++ * could've been done. |
192 |
++ */ |
193 |
++static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) |
194 |
++{ |
195 |
++ struct super_block *sb = watch->inode->i_sb; |
196 |
++ s32 wd = watch->wd; |
197 |
++ |
198 |
++ spin_lock(&sb_lock); |
199 |
++ if (sb->s_count >= S_BIAS) { |
200 |
++ atomic_inc(&sb->s_active); |
201 |
++ spin_unlock(&sb_lock); |
202 |
++ get_inotify_watch(watch); |
203 |
++ mutex_unlock(&ih->mutex); |
204 |
++ return 1; /* the best outcome */ |
205 |
++ } |
206 |
++ sb->s_count++; |
207 |
++ spin_unlock(&sb_lock); |
208 |
++ mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ |
209 |
++ down_read(&sb->s_umount); |
210 |
++ if (likely(!sb->s_root)) { |
211 |
++ /* fs is already shut down; the watch is dead */ |
212 |
++ drop_super(sb); |
213 |
++ return 0; |
214 |
++ } |
215 |
++ /* raced with the final deactivate_super() */ |
216 |
++ mutex_lock(&ih->mutex); |
217 |
++ if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) { |
218 |
++ /* the watch is dead */ |
219 |
++ mutex_unlock(&ih->mutex); |
220 |
++ drop_super(sb); |
221 |
++ return 0; |
222 |
++ } |
223 |
++ /* still alive or freed and reused with the same sb and wd; kill */ |
224 |
++ get_inotify_watch(watch); |
225 |
++ mutex_unlock(&ih->mutex); |
226 |
++ return 2; |
227 |
++} |
228 |
++ |
229 |
++static void unpin_and_kill(struct inotify_watch *watch, int how) |
230 |
++{ |
231 |
++ struct super_block *sb = watch->inode->i_sb; |
232 |
++ put_inotify_watch(watch); |
233 |
++ switch (how) { |
234 |
++ case 1: |
235 |
++ deactivate_super(sb); |
236 |
++ break; |
237 |
++ case 2: |
238 |
++ drop_super(sb); |
239 |
++ } |
240 |
++} |
241 |
++ |
242 |
+ /** |
243 |
+ * inotify_destroy - clean up and destroy an inotify instance |
244 |
+ * @ih: inotify handle |
245 |
+@@ -490,11 +617,15 @@ void inotify_destroy(struct inotify_handle *ih) |
246 |
+ * pretty. We cannot do a simple iteration over the list, because we |
247 |
+ * do not know the inode until we iterate to the watch. But we need to |
248 |
+ * hold inode->inotify_mutex before ih->mutex. The following works. |
249 |
++ * |
250 |
++ * AV: it had to become even uglier to start working ;-/ |
251 |
+ */ |
252 |
+ while (1) { |
253 |
+ struct inotify_watch *watch; |
254 |
+ struct list_head *watches; |
255 |
++ struct super_block *sb; |
256 |
+ struct inode *inode; |
257 |
++ int how; |
258 |
+ |
259 |
+ mutex_lock(&ih->mutex); |
260 |
+ watches = &ih->watches; |
261 |
+@@ -503,8 +634,10 @@ void inotify_destroy(struct inotify_handle *ih) |
262 |
+ break; |
263 |
+ } |
264 |
+ watch = list_first_entry(watches, struct inotify_watch, h_list); |
265 |
+- get_inotify_watch(watch); |
266 |
+- mutex_unlock(&ih->mutex); |
267 |
++ sb = watch->inode->i_sb; |
268 |
++ how = pin_to_kill(ih, watch); |
269 |
++ if (!how) |
270 |
++ continue; |
271 |
+ |
272 |
+ inode = watch->inode; |
273 |
+ mutex_lock(&inode->inotify_mutex); |
274 |
+@@ -518,7 +651,7 @@ void inotify_destroy(struct inotify_handle *ih) |
275 |
+ |
276 |
+ mutex_unlock(&ih->mutex); |
277 |
+ mutex_unlock(&inode->inotify_mutex); |
278 |
+- put_inotify_watch(watch); |
279 |
++ unpin_and_kill(watch, how); |
280 |
+ } |
281 |
+ |
282 |
+ /* free this handle: the put matching the get in inotify_init() */ |
283 |
+@@ -719,7 +852,9 @@ void inotify_evict_watch(struct inotify_watch *watch) |
284 |
+ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) |
285 |
+ { |
286 |
+ struct inotify_watch *watch; |
287 |
++ struct super_block *sb; |
288 |
+ struct inode *inode; |
289 |
++ int how; |
290 |
+ |
291 |
+ mutex_lock(&ih->mutex); |
292 |
+ watch = idr_find(&ih->idr, wd); |
293 |
+@@ -727,9 +862,12 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) |
294 |
+ mutex_unlock(&ih->mutex); |
295 |
+ return -EINVAL; |
296 |
+ } |
297 |
+- get_inotify_watch(watch); |
298 |
++ sb = watch->inode->i_sb; |
299 |
++ how = pin_to_kill(ih, watch); |
300 |
++ if (!how) |
301 |
++ return 0; |
302 |
++ |
303 |
+ inode = watch->inode; |
304 |
+- mutex_unlock(&ih->mutex); |
305 |
+ |
306 |
+ mutex_lock(&inode->inotify_mutex); |
307 |
+ mutex_lock(&ih->mutex); |
308 |
+@@ -740,7 +878,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) |
309 |
+ |
310 |
+ mutex_unlock(&ih->mutex); |
311 |
+ mutex_unlock(&inode->inotify_mutex); |
312 |
+- put_inotify_watch(watch); |
313 |
++ unpin_and_kill(watch, how); |
314 |
+ |
315 |
+ return 0; |
316 |
+ } |
317 |
+diff --git a/include/linux/inotify.h b/include/linux/inotify.h |
318 |
+index bd57857..37ea289 100644 |
319 |
+--- a/include/linux/inotify.h |
320 |
++++ b/include/linux/inotify.h |
321 |
+@@ -134,6 +134,8 @@ extern void inotify_remove_watch_locked(struct inotify_handle *, |
322 |
+ struct inotify_watch *); |
323 |
+ extern void get_inotify_watch(struct inotify_watch *); |
324 |
+ extern void put_inotify_watch(struct inotify_watch *); |
325 |
++extern int pin_inotify_watch(struct inotify_watch *); |
326 |
++extern void unpin_inotify_watch(struct inotify_watch *); |
327 |
+ |
328 |
+ #else |
329 |
+ |
330 |
+@@ -228,6 +230,15 @@ static inline void put_inotify_watch(struct inotify_watch *watch) |
331 |
+ { |
332 |
+ } |
333 |
+ |
334 |
++extern inline int pin_inotify_watch(struct inotify_watch *watch) |
335 |
++{ |
336 |
++ return 0; |
337 |
++} |
338 |
++ |
339 |
++extern inline void unpin_inotify_watch(struct inotify_watch *watch) |
340 |
++{ |
341 |
++} |
342 |
++ |
343 |
+ #endif /* CONFIG_INOTIFY */ |
344 |
+ |
345 |
+ #endif /* __KERNEL __ */ |
346 |
+diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c |
347 |
+index 8ba0e0d..8b50944 100644 |
348 |
+--- a/kernel/audit_tree.c |
349 |
++++ b/kernel/audit_tree.c |
350 |
+@@ -24,6 +24,7 @@ struct audit_chunk { |
351 |
+ struct list_head trees; /* with root here */ |
352 |
+ int dead; |
353 |
+ int count; |
354 |
++ atomic_long_t refs; |
355 |
+ struct rcu_head head; |
356 |
+ struct node { |
357 |
+ struct list_head list; |
358 |
+@@ -56,7 +57,8 @@ static LIST_HEAD(prune_list); |
359 |
+ * tree is refcounted; one reference for "some rules on rules_list refer to |
360 |
+ * it", one for each chunk with pointer to it. |
361 |
+ * |
362 |
+- * chunk is refcounted by embedded inotify_watch. |
363 |
++ * chunk is refcounted by embedded inotify_watch + .refs (non-zero refcount |
364 |
++ * of watch contributes 1 to .refs). |
365 |
+ * |
366 |
+ * node.index allows to get from node.list to containing chunk. |
367 |
+ * MSB of that sucker is stolen to mark taggings that we might have to |
368 |
+@@ -121,6 +123,7 @@ static struct audit_chunk *alloc_chunk(int count) |
369 |
+ INIT_LIST_HEAD(&chunk->hash); |
370 |
+ INIT_LIST_HEAD(&chunk->trees); |
371 |
+ chunk->count = count; |
372 |
++ atomic_long_set(&chunk->refs, 1); |
373 |
+ for (i = 0; i < count; i++) { |
374 |
+ INIT_LIST_HEAD(&chunk->owners[i].list); |
375 |
+ chunk->owners[i].index = i; |
376 |
+@@ -129,9 +132,8 @@ static struct audit_chunk *alloc_chunk(int count) |
377 |
+ return chunk; |
378 |
+ } |
379 |
+ |
380 |
+-static void __free_chunk(struct rcu_head *rcu) |
381 |
++static void free_chunk(struct audit_chunk *chunk) |
382 |
+ { |
383 |
+- struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head); |
384 |
+ int i; |
385 |
+ |
386 |
+ for (i = 0; i < chunk->count; i++) { |
387 |
+@@ -141,14 +143,16 @@ static void __free_chunk(struct rcu_head *rcu) |
388 |
+ kfree(chunk); |
389 |
+ } |
390 |
+ |
391 |
+-static inline void free_chunk(struct audit_chunk *chunk) |
392 |
++void audit_put_chunk(struct audit_chunk *chunk) |
393 |
+ { |
394 |
+- call_rcu(&chunk->head, __free_chunk); |
395 |
++ if (atomic_long_dec_and_test(&chunk->refs)) |
396 |
++ free_chunk(chunk); |
397 |
+ } |
398 |
+ |
399 |
+-void audit_put_chunk(struct audit_chunk *chunk) |
400 |
++static void __put_chunk(struct rcu_head *rcu) |
401 |
+ { |
402 |
+- put_inotify_watch(&chunk->watch); |
403 |
++ struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head); |
404 |
++ audit_put_chunk(chunk); |
405 |
+ } |
406 |
+ |
407 |
+ enum {HASH_SIZE = 128}; |
408 |
+@@ -176,7 +180,7 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode) |
409 |
+ |
410 |
+ list_for_each_entry_rcu(p, list, hash) { |
411 |
+ if (p->watch.inode == inode) { |
412 |
+- get_inotify_watch(&p->watch); |
413 |
++ atomic_long_inc(&p->refs); |
414 |
+ return p; |
415 |
+ } |
416 |
+ } |
417 |
+@@ -194,17 +198,49 @@ int audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree) |
418 |
+ |
419 |
+ /* tagging and untagging inodes with trees */ |
420 |
+ |
421 |
+-static void untag_chunk(struct audit_chunk *chunk, struct node *p) |
422 |
++static struct audit_chunk *find_chunk(struct node *p) |
423 |
++{ |
424 |
++ int index = p->index & ~(1U<<31); |
425 |
++ p -= index; |
426 |
++ return container_of(p, struct audit_chunk, owners[0]); |
427 |
++} |
428 |
++ |
429 |
++static void untag_chunk(struct node *p) |
430 |
+ { |
431 |
++ struct audit_chunk *chunk = find_chunk(p); |
432 |
+ struct audit_chunk *new; |
433 |
+ struct audit_tree *owner; |
434 |
+ int size = chunk->count - 1; |
435 |
+ int i, j; |
436 |
+ |
437 |
++ if (!pin_inotify_watch(&chunk->watch)) { |
438 |
++ /* |
439 |
++ * Filesystem is shutting down; all watches are getting |
440 |
++ * evicted, just take it off the node list for this |
441 |
++ * tree and let the eviction logics take care of the |
442 |
++ * rest. |
443 |
++ */ |
444 |
++ owner = p->owner; |
445 |
++ if (owner->root == chunk) { |
446 |
++ list_del_init(&owner->same_root); |
447 |
++ owner->root = NULL; |
448 |
++ } |
449 |
++ list_del_init(&p->list); |
450 |
++ p->owner = NULL; |
451 |
++ put_tree(owner); |
452 |
++ return; |
453 |
++ } |
454 |
++ |
455 |
++ spin_unlock(&hash_lock); |
456 |
++ |
457 |
++ /* |
458 |
++ * pin_inotify_watch() succeeded, so the watch won't go away |
459 |
++ * from under us. |
460 |
++ */ |
461 |
+ mutex_lock(&chunk->watch.inode->inotify_mutex); |
462 |
+ if (chunk->dead) { |
463 |
+ mutex_unlock(&chunk->watch.inode->inotify_mutex); |
464 |
+- return; |
465 |
++ goto out; |
466 |
+ } |
467 |
+ |
468 |
+ owner = p->owner; |
469 |
+@@ -221,7 +257,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p) |
470 |
+ inotify_evict_watch(&chunk->watch); |
471 |
+ mutex_unlock(&chunk->watch.inode->inotify_mutex); |
472 |
+ put_inotify_watch(&chunk->watch); |
473 |
+- return; |
474 |
++ goto out; |
475 |
+ } |
476 |
+ |
477 |
+ new = alloc_chunk(size); |
478 |
+@@ -263,7 +299,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p) |
479 |
+ inotify_evict_watch(&chunk->watch); |
480 |
+ mutex_unlock(&chunk->watch.inode->inotify_mutex); |
481 |
+ put_inotify_watch(&chunk->watch); |
482 |
+- return; |
483 |
++ goto out; |
484 |
+ |
485 |
+ Fallback: |
486 |
+ // do the best we can |
487 |
+@@ -277,6 +313,9 @@ Fallback: |
488 |
+ put_tree(owner); |
489 |
+ spin_unlock(&hash_lock); |
490 |
+ mutex_unlock(&chunk->watch.inode->inotify_mutex); |
491 |
++out: |
492 |
++ unpin_inotify_watch(&chunk->watch); |
493 |
++ spin_lock(&hash_lock); |
494 |
+ } |
495 |
+ |
496 |
+ static int create_chunk(struct inode *inode, struct audit_tree *tree) |
497 |
+@@ -387,13 +426,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) |
498 |
+ return 0; |
499 |
+ } |
500 |
+ |
501 |
+-static struct audit_chunk *find_chunk(struct node *p) |
502 |
+-{ |
503 |
+- int index = p->index & ~(1U<<31); |
504 |
+- p -= index; |
505 |
+- return container_of(p, struct audit_chunk, owners[0]); |
506 |
+-} |
507 |
+- |
508 |
+ static void kill_rules(struct audit_tree *tree) |
509 |
+ { |
510 |
+ struct audit_krule *rule, *next; |
511 |
+@@ -431,17 +463,10 @@ static void prune_one(struct audit_tree *victim) |
512 |
+ spin_lock(&hash_lock); |
513 |
+ while (!list_empty(&victim->chunks)) { |
514 |
+ struct node *p; |
515 |
+- struct audit_chunk *chunk; |
516 |
+ |
517 |
+ p = list_entry(victim->chunks.next, struct node, list); |
518 |
+- chunk = find_chunk(p); |
519 |
+- get_inotify_watch(&chunk->watch); |
520 |
+- spin_unlock(&hash_lock); |
521 |
+- |
522 |
+- untag_chunk(chunk, p); |
523 |
+ |
524 |
+- put_inotify_watch(&chunk->watch); |
525 |
+- spin_lock(&hash_lock); |
526 |
++ untag_chunk(p); |
527 |
+ } |
528 |
+ spin_unlock(&hash_lock); |
529 |
+ put_tree(victim); |
530 |
+@@ -469,7 +494,6 @@ static void trim_marked(struct audit_tree *tree) |
531 |
+ |
532 |
+ while (!list_empty(&tree->chunks)) { |
533 |
+ struct node *node; |
534 |
+- struct audit_chunk *chunk; |
535 |
+ |
536 |
+ node = list_entry(tree->chunks.next, struct node, list); |
537 |
+ |
538 |
+@@ -477,14 +501,7 @@ static void trim_marked(struct audit_tree *tree) |
539 |
+ if (!(node->index & (1U<<31))) |
540 |
+ break; |
541 |
+ |
542 |
+- chunk = find_chunk(node); |
543 |
+- get_inotify_watch(&chunk->watch); |
544 |
+- spin_unlock(&hash_lock); |
545 |
+- |
546 |
+- untag_chunk(chunk, node); |
547 |
+- |
548 |
+- put_inotify_watch(&chunk->watch); |
549 |
+- spin_lock(&hash_lock); |
550 |
++ untag_chunk(node); |
551 |
+ } |
552 |
+ if (!tree->root && !tree->goner) { |
553 |
+ tree->goner = 1; |
554 |
+@@ -878,7 +895,7 @@ static void handle_event(struct inotify_watch *watch, u32 wd, u32 mask, |
555 |
+ static void destroy_watch(struct inotify_watch *watch) |
556 |
+ { |
557 |
+ struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch); |
558 |
+- free_chunk(chunk); |
559 |
++ call_rcu(&chunk->head, __put_chunk); |
560 |
+ } |
561 |
+ |
562 |
+ static const struct inotify_operations rtree_inotify_ops = { |
563 |
+diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c |
564 |
+index b7d354e..9fd85a4 100644 |
565 |
+--- a/kernel/auditfilter.c |
566 |
++++ b/kernel/auditfilter.c |
567 |
+@@ -1094,8 +1094,8 @@ static void audit_inotify_unregister(struct list_head *in_list) |
568 |
+ list_for_each_entry_safe(p, n, in_list, ilist) { |
569 |
+ list_del(&p->ilist); |
570 |
+ inotify_rm_watch(audit_ih, &p->wdata); |
571 |
+- /* the put matching the get in audit_do_del_rule() */ |
572 |
+- put_inotify_watch(&p->wdata); |
573 |
++ /* the unpin matching the pin in audit_do_del_rule() */ |
574 |
++ unpin_inotify_watch(&p->wdata); |
575 |
+ } |
576 |
+ } |
577 |
+ |
578 |
+@@ -1389,9 +1389,13 @@ static inline int audit_del_rule(struct audit_entry *entry, |
579 |
+ /* Put parent on the inotify un-registration |
580 |
+ * list. Grab a reference before releasing |
581 |
+ * audit_filter_mutex, to be released in |
582 |
+- * audit_inotify_unregister(). */ |
583 |
+- list_add(&parent->ilist, &inotify_list); |
584 |
+- get_inotify_watch(&parent->wdata); |
585 |
++ * audit_inotify_unregister(). |
586 |
++ * If filesystem is going away, just leave |
587 |
++ * the sucker alone, eviction will take |
588 |
++ * care of it. |
589 |
++ */ |
590 |
++ if (pin_inotify_watch(&parent->wdata)) |
591 |
++ list_add(&parent->ilist, &inotify_list); |
592 |
+ } |
593 |
+ } |
594 |
+ } |