1 |
commit: b59ee32ab3065145d58b65bb7895cca32a510ec5 |
2 |
Author: Alice Ferrazzi <alicef <AT> gentoo <DOT> org> |
3 |
AuthorDate: Sat Jan 30 13:33:01 2021 +0000 |
4 |
Commit: Alice Ferrazzi <alicef <AT> gentoo <DOT> org> |
5 |
CommitDate: Sat Jan 30 13:33:38 2021 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=b59ee32a |
7 |
|
8 |
Linux patch 4.19.172 |
9 |
|
10 |
Signed-off-by: Alice Ferrazzi <alicef <AT> gentoo.org> |
11 |
|
12 |
0000_README | 4 + |
13 |
1171_linux-4.19.172.patch | 1606 +++++++++++++++++++++++++++++++++++++++++++++ |
14 |
2 files changed, 1610 insertions(+) |
15 |
|
16 |
diff --git a/0000_README b/0000_README |
17 |
index 1c1a372..a202ba1 100644 |
18 |
--- a/0000_README |
19 |
+++ b/0000_README |
20 |
@@ -723,6 +723,10 @@ Patch: 1170_linux-4.19.171.patch |
21 |
From: https://www.kernel.org |
22 |
Desc: Linux 4.19.171 |
23 |
|
24 |
+Patch: 1171_linux-4.19.172.patch |
25 |
+From: https://www.kernel.org |
26 |
+Desc: Linux 4.19.172 |
27 |
+ |
28 |
Patch: 1500_XATTR_USER_PREFIX.patch |
29 |
From: https://bugs.gentoo.org/show_bug.cgi?id=470644 |
30 |
Desc: Support for namespace user.pax.* on tmpfs. |
31 |
|
32 |
diff --git a/1171_linux-4.19.172.patch b/1171_linux-4.19.172.patch |
33 |
new file mode 100644 |
34 |
index 0000000..fc24ced |
35 |
--- /dev/null |
36 |
+++ b/1171_linux-4.19.172.patch |
37 |
@@ -0,0 +1,1606 @@ |
38 |
+diff --git a/Documentation/device-mapper/dm-integrity.txt b/Documentation/device-mapper/dm-integrity.txt |
39 |
+index 297251b0d2d57..bf6af2ade0a67 100644 |
40 |
+--- a/Documentation/device-mapper/dm-integrity.txt |
41 |
++++ b/Documentation/device-mapper/dm-integrity.txt |
42 |
+@@ -146,6 +146,13 @@ block_size:number |
43 |
+ Supported values are 512, 1024, 2048 and 4096 bytes. If not |
44 |
+ specified the default block size is 512 bytes. |
45 |
+ |
46 |
++legacy_recalculate |
47 |
++ Allow recalculating of volumes with HMAC keys. This is disabled by |
48 |
++ default for security reasons - an attacker could modify the volume, |
49 |
++ set recalc_sector to zero, and the kernel would not detect the |
50 |
++ modification. |
51 |
++ |
52 |
++ |
53 |
+ The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can |
54 |
+ be changed when reloading the target (load an inactive table and swap the |
55 |
+ tables with suspend and resume). The other arguments should not be changed |
56 |
+diff --git a/Makefile b/Makefile |
57 |
+index 335b015c5c9ba..7da0ddd650521 100644 |
58 |
+--- a/Makefile |
59 |
++++ b/Makefile |
60 |
+@@ -1,7 +1,7 @@ |
61 |
+ # SPDX-License-Identifier: GPL-2.0 |
62 |
+ VERSION = 4 |
63 |
+ PATCHLEVEL = 19 |
64 |
+-SUBLEVEL = 171 |
65 |
++SUBLEVEL = 172 |
66 |
+ EXTRAVERSION = |
67 |
+ NAME = "People's Front" |
68 |
+ |
69 |
+diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c |
70 |
+index 3b78dcda47364..874caed723905 100644 |
71 |
+--- a/drivers/gpio/gpio-mvebu.c |
72 |
++++ b/drivers/gpio/gpio-mvebu.c |
73 |
+@@ -650,9 +650,8 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip, |
74 |
+ |
75 |
+ spin_lock_irqsave(&mvpwm->lock, flags); |
76 |
+ |
77 |
+- val = (unsigned long long) |
78 |
+- readl_relaxed(mvebu_pwmreg_blink_on_duration(mvpwm)); |
79 |
+- val *= NSEC_PER_SEC; |
80 |
++ u = readl_relaxed(mvebu_pwmreg_blink_on_duration(mvpwm)); |
81 |
++ val = (unsigned long long) u * NSEC_PER_SEC; |
82 |
+ do_div(val, mvpwm->clk_rate); |
83 |
+ if (val > UINT_MAX) |
84 |
+ state->duty_cycle = UINT_MAX; |
85 |
+@@ -661,21 +660,17 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip, |
86 |
+ else |
87 |
+ state->duty_cycle = 1; |
88 |
+ |
89 |
+- val = (unsigned long long) |
90 |
+- readl_relaxed(mvebu_pwmreg_blink_off_duration(mvpwm)); |
91 |
++ val = (unsigned long long) u; /* on duration */ |
92 |
++ /* period = on + off duration */ |
93 |
++ val += readl_relaxed(mvebu_pwmreg_blink_off_duration(mvpwm)); |
94 |
+ val *= NSEC_PER_SEC; |
95 |
+ do_div(val, mvpwm->clk_rate); |
96 |
+- if (val < state->duty_cycle) { |
97 |
++ if (val > UINT_MAX) |
98 |
++ state->period = UINT_MAX; |
99 |
++ else if (val) |
100 |
++ state->period = val; |
101 |
++ else |
102 |
+ state->period = 1; |
103 |
+- } else { |
104 |
+- val -= state->duty_cycle; |
105 |
+- if (val > UINT_MAX) |
106 |
+- state->period = UINT_MAX; |
107 |
+- else if (val) |
108 |
+- state->period = val; |
109 |
+- else |
110 |
+- state->period = 1; |
111 |
+- } |
112 |
+ |
113 |
+ regmap_read(mvchip->regs, GPIO_BLINK_EN_OFF + mvchip->offset, &u); |
114 |
+ if (u) |
115 |
+diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c |
116 |
+index 523014f2c0eb2..8006732b8f424 100644 |
117 |
+--- a/drivers/hid/wacom_sys.c |
118 |
++++ b/drivers/hid/wacom_sys.c |
119 |
+@@ -150,9 +150,9 @@ static int wacom_wac_pen_serial_enforce(struct hid_device *hdev, |
120 |
+ } |
121 |
+ |
122 |
+ if (flush) |
123 |
+- wacom_wac_queue_flush(hdev, &wacom_wac->pen_fifo); |
124 |
++ wacom_wac_queue_flush(hdev, wacom_wac->pen_fifo); |
125 |
+ else if (insert) |
126 |
+- wacom_wac_queue_insert(hdev, &wacom_wac->pen_fifo, |
127 |
++ wacom_wac_queue_insert(hdev, wacom_wac->pen_fifo, |
128 |
+ raw_data, report_size); |
129 |
+ |
130 |
+ return insert && !flush; |
131 |
+@@ -1251,7 +1251,7 @@ static void wacom_devm_kfifo_release(struct device *dev, void *res) |
132 |
+ static int wacom_devm_kfifo_alloc(struct wacom *wacom) |
133 |
+ { |
134 |
+ struct wacom_wac *wacom_wac = &wacom->wacom_wac; |
135 |
+- struct kfifo_rec_ptr_2 *pen_fifo = &wacom_wac->pen_fifo; |
136 |
++ struct kfifo_rec_ptr_2 *pen_fifo; |
137 |
+ int error; |
138 |
+ |
139 |
+ pen_fifo = devres_alloc(wacom_devm_kfifo_release, |
140 |
+@@ -1268,6 +1268,7 @@ static int wacom_devm_kfifo_alloc(struct wacom *wacom) |
141 |
+ } |
142 |
+ |
143 |
+ devres_add(&wacom->hdev->dev, pen_fifo); |
144 |
++ wacom_wac->pen_fifo = pen_fifo; |
145 |
+ |
146 |
+ return 0; |
147 |
+ } |
148 |
+diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h |
149 |
+index f67d871841c0c..46da97162ef43 100644 |
150 |
+--- a/drivers/hid/wacom_wac.h |
151 |
++++ b/drivers/hid/wacom_wac.h |
152 |
+@@ -344,7 +344,7 @@ struct wacom_wac { |
153 |
+ struct input_dev *pen_input; |
154 |
+ struct input_dev *touch_input; |
155 |
+ struct input_dev *pad_input; |
156 |
+- struct kfifo_rec_ptr_2 pen_fifo; |
157 |
++ struct kfifo_rec_ptr_2 *pen_fifo; |
158 |
+ int pid; |
159 |
+ int num_contacts_left; |
160 |
+ u8 bt_features; |
161 |
+diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c |
162 |
+index 1917051b512f5..cffd423172726 100644 |
163 |
+--- a/drivers/md/dm-integrity.c |
164 |
++++ b/drivers/md/dm-integrity.c |
165 |
+@@ -240,6 +240,7 @@ struct dm_integrity_c { |
166 |
+ |
167 |
+ bool journal_uptodate; |
168 |
+ bool just_formatted; |
169 |
++ bool legacy_recalculate; |
170 |
+ |
171 |
+ struct alg_spec internal_hash_alg; |
172 |
+ struct alg_spec journal_crypt_alg; |
173 |
+@@ -345,6 +346,14 @@ static int dm_integrity_failed(struct dm_integrity_c *ic) |
174 |
+ return READ_ONCE(ic->failed); |
175 |
+ } |
176 |
+ |
177 |
++static bool dm_integrity_disable_recalculate(struct dm_integrity_c *ic) |
178 |
++{ |
179 |
++ if ((ic->internal_hash_alg.key || ic->journal_mac_alg.key) && |
180 |
++ !ic->legacy_recalculate) |
181 |
++ return true; |
182 |
++ return false; |
183 |
++} |
184 |
++ |
185 |
+ static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i, |
186 |
+ unsigned j, unsigned char seq) |
187 |
+ { |
188 |
+@@ -2503,6 +2512,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, |
189 |
+ arg_count += !!ic->internal_hash_alg.alg_string; |
190 |
+ arg_count += !!ic->journal_crypt_alg.alg_string; |
191 |
+ arg_count += !!ic->journal_mac_alg.alg_string; |
192 |
++ arg_count += ic->legacy_recalculate; |
193 |
+ DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start, |
194 |
+ ic->tag_size, ic->mode, arg_count); |
195 |
+ if (ic->meta_dev) |
196 |
+@@ -2516,6 +2526,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, |
197 |
+ DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); |
198 |
+ DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage); |
199 |
+ DMEMIT(" commit_time:%u", ic->autocommit_msec); |
200 |
++ if (ic->legacy_recalculate) |
201 |
++ DMEMIT(" legacy_recalculate"); |
202 |
+ |
203 |
+ #define EMIT_ALG(a, n) \ |
204 |
+ do { \ |
205 |
+@@ -3118,7 +3130,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) |
206 |
+ unsigned extra_args; |
207 |
+ struct dm_arg_set as; |
208 |
+ static const struct dm_arg _args[] = { |
209 |
+- {0, 15, "Invalid number of feature args"}, |
210 |
++ {0, 12, "Invalid number of feature args"}, |
211 |
+ }; |
212 |
+ unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; |
213 |
+ bool recalculate; |
214 |
+@@ -3248,6 +3260,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) |
215 |
+ goto bad; |
216 |
+ } else if (!strcmp(opt_string, "recalculate")) { |
217 |
+ recalculate = true; |
218 |
++ } else if (!strcmp(opt_string, "legacy_recalculate")) { |
219 |
++ ic->legacy_recalculate = true; |
220 |
+ } else { |
221 |
+ r = -EINVAL; |
222 |
+ ti->error = "Invalid argument"; |
223 |
+@@ -3523,6 +3537,14 @@ try_smaller_buffer: |
224 |
+ } |
225 |
+ } |
226 |
+ |
227 |
++ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && |
228 |
++ le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors && |
229 |
++ dm_integrity_disable_recalculate(ic)) { |
230 |
++ ti->error = "Recalculating with HMAC is disabled for security reasons - if you really need it, use the argument \"legacy_recalculate\""; |
231 |
++ r = -EOPNOTSUPP; |
232 |
++ goto bad; |
233 |
++ } |
234 |
++ |
235 |
+ ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, |
236 |
+ 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL); |
237 |
+ if (IS_ERR(ic->bufio)) { |
238 |
+diff --git a/fs/exec.c b/fs/exec.c |
239 |
+index 52788644c4af2..6eea921a7e72f 100644 |
240 |
+--- a/fs/exec.c |
241 |
++++ b/fs/exec.c |
242 |
+@@ -1011,7 +1011,7 @@ static int exec_mmap(struct mm_struct *mm) |
243 |
+ /* Notify parent that we're no longer interested in the old VM */ |
244 |
+ tsk = current; |
245 |
+ old_mm = current->mm; |
246 |
+- mm_release(tsk, old_mm); |
247 |
++ exec_mm_release(tsk, old_mm); |
248 |
+ |
249 |
+ if (old_mm) { |
250 |
+ sync_mm_rss(old_mm); |
251 |
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c |
252 |
+index b2a9c746f8ce4..edeb837081c80 100644 |
253 |
+--- a/fs/ext4/inode.c |
254 |
++++ b/fs/ext4/inode.c |
255 |
+@@ -5209,7 +5209,7 @@ static int other_inode_match(struct inode * inode, unsigned long ino, |
256 |
+ (inode->i_state & I_DIRTY_TIME)) { |
257 |
+ struct ext4_inode_info *ei = EXT4_I(inode); |
258 |
+ |
259 |
+- inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED); |
260 |
++ inode->i_state &= ~I_DIRTY_TIME; |
261 |
+ spin_unlock(&inode->i_lock); |
262 |
+ |
263 |
+ spin_lock(&ei->i_raw_lock); |
264 |
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c |
265 |
+index 15216b440880a..f2d0c4acb3cbb 100644 |
266 |
+--- a/fs/fs-writeback.c |
267 |
++++ b/fs/fs-writeback.c |
268 |
+@@ -1157,7 +1157,7 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) |
269 |
+ */ |
270 |
+ static int move_expired_inodes(struct list_head *delaying_queue, |
271 |
+ struct list_head *dispatch_queue, |
272 |
+- int flags, unsigned long dirtied_before) |
273 |
++ unsigned long dirtied_before) |
274 |
+ { |
275 |
+ LIST_HEAD(tmp); |
276 |
+ struct list_head *pos, *node; |
277 |
+@@ -1173,8 +1173,6 @@ static int move_expired_inodes(struct list_head *delaying_queue, |
278 |
+ list_move(&inode->i_io_list, &tmp); |
279 |
+ moved++; |
280 |
+ spin_lock(&inode->i_lock); |
281 |
+- if (flags & EXPIRE_DIRTY_ATIME) |
282 |
+- inode->i_state |= I_DIRTY_TIME_EXPIRED; |
283 |
+ inode->i_state |= I_SYNC_QUEUED; |
284 |
+ spin_unlock(&inode->i_lock); |
285 |
+ if (sb_is_blkdev_sb(inode->i_sb)) |
286 |
+@@ -1222,11 +1220,11 @@ static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work, |
287 |
+ |
288 |
+ assert_spin_locked(&wb->list_lock); |
289 |
+ list_splice_init(&wb->b_more_io, &wb->b_io); |
290 |
+- moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, dirtied_before); |
291 |
++ moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before); |
292 |
+ if (!work->for_sync) |
293 |
+ time_expire_jif = jiffies - dirtytime_expire_interval * HZ; |
294 |
+ moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io, |
295 |
+- EXPIRE_DIRTY_ATIME, time_expire_jif); |
296 |
++ time_expire_jif); |
297 |
+ if (moved) |
298 |
+ wb_io_lists_populated(wb); |
299 |
+ trace_writeback_queue_io(wb, work, dirtied_before, moved); |
300 |
+@@ -1394,26 +1392,26 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
301 |
+ ret = err; |
302 |
+ } |
303 |
+ |
304 |
++ /* |
305 |
++ * If the inode has dirty timestamps and we need to write them, call |
306 |
++ * mark_inode_dirty_sync() to notify the filesystem about it and to |
307 |
++ * change I_DIRTY_TIME into I_DIRTY_SYNC. |
308 |
++ */ |
309 |
++ if ((inode->i_state & I_DIRTY_TIME) && |
310 |
++ (wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync || |
311 |
++ time_after(jiffies, inode->dirtied_time_when + |
312 |
++ dirtytime_expire_interval * HZ))) { |
313 |
++ trace_writeback_lazytime(inode); |
314 |
++ mark_inode_dirty_sync(inode); |
315 |
++ } |
316 |
++ |
317 |
+ /* |
318 |
+ * Some filesystems may redirty the inode during the writeback |
319 |
+ * due to delalloc, clear dirty metadata flags right before |
320 |
+ * write_inode() |
321 |
+ */ |
322 |
+ spin_lock(&inode->i_lock); |
323 |
+- |
324 |
+ dirty = inode->i_state & I_DIRTY; |
325 |
+- if (inode->i_state & I_DIRTY_TIME) { |
326 |
+- if ((dirty & I_DIRTY_INODE) || |
327 |
+- wbc->sync_mode == WB_SYNC_ALL || |
328 |
+- unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) || |
329 |
+- unlikely(time_after(jiffies, |
330 |
+- (inode->dirtied_time_when + |
331 |
+- dirtytime_expire_interval * HZ)))) { |
332 |
+- dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; |
333 |
+- trace_writeback_lazytime(inode); |
334 |
+- } |
335 |
+- } else |
336 |
+- inode->i_state &= ~I_DIRTY_TIME_EXPIRED; |
337 |
+ inode->i_state &= ~dirty; |
338 |
+ |
339 |
+ /* |
340 |
+@@ -1434,8 +1432,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
341 |
+ |
342 |
+ spin_unlock(&inode->i_lock); |
343 |
+ |
344 |
+- if (dirty & I_DIRTY_TIME) |
345 |
+- mark_inode_dirty_sync(inode); |
346 |
+ /* Don't write the inode if only I_DIRTY_PAGES was set */ |
347 |
+ if (dirty & ~I_DIRTY_PAGES) { |
348 |
+ int err = write_inode(inode, wbc); |
349 |
+diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c |
350 |
+index ae453dd236a69..6fcdf7e449fe7 100644 |
351 |
+--- a/fs/xfs/xfs_trans_inode.c |
352 |
++++ b/fs/xfs/xfs_trans_inode.c |
353 |
+@@ -99,9 +99,9 @@ xfs_trans_log_inode( |
354 |
+ * to log the timestamps, or will clear already cleared fields in the |
355 |
+ * worst case. |
356 |
+ */ |
357 |
+- if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) { |
358 |
++ if (inode->i_state & I_DIRTY_TIME) { |
359 |
+ spin_lock(&inode->i_lock); |
360 |
+- inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED); |
361 |
++ inode->i_state &= ~I_DIRTY_TIME; |
362 |
+ spin_unlock(&inode->i_lock); |
363 |
+ } |
364 |
+ |
365 |
+diff --git a/include/linux/compat.h b/include/linux/compat.h |
366 |
+index de0c13bdcd2c7..189d0e111d57d 100644 |
367 |
+--- a/include/linux/compat.h |
368 |
++++ b/include/linux/compat.h |
369 |
+@@ -445,8 +445,6 @@ struct compat_kexec_segment; |
370 |
+ struct compat_mq_attr; |
371 |
+ struct compat_msgbuf; |
372 |
+ |
373 |
+-extern void compat_exit_robust_list(struct task_struct *curr); |
374 |
+- |
375 |
+ #define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t)) |
376 |
+ |
377 |
+ #define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG) |
378 |
+diff --git a/include/linux/fs.h b/include/linux/fs.h |
379 |
+index 876bfb6df06a9..b6a955ba6173a 100644 |
380 |
+--- a/include/linux/fs.h |
381 |
++++ b/include/linux/fs.h |
382 |
+@@ -2071,7 +2071,6 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) |
383 |
+ #define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) |
384 |
+ #define I_LINKABLE (1 << 10) |
385 |
+ #define I_DIRTY_TIME (1 << 11) |
386 |
+-#define I_DIRTY_TIME_EXPIRED (1 << 12) |
387 |
+ #define I_WB_SWITCH (1 << 13) |
388 |
+ #define I_OVL_INUSE (1 << 14) |
389 |
+ #define I_CREATING (1 << 15) |
390 |
+diff --git a/include/linux/futex.h b/include/linux/futex.h |
391 |
+index a61bf436dcf36..b70df27d7e85c 100644 |
392 |
+--- a/include/linux/futex.h |
393 |
++++ b/include/linux/futex.h |
394 |
+@@ -2,7 +2,9 @@ |
395 |
+ #ifndef _LINUX_FUTEX_H |
396 |
+ #define _LINUX_FUTEX_H |
397 |
+ |
398 |
++#include <linux/sched.h> |
399 |
+ #include <linux/ktime.h> |
400 |
++ |
401 |
+ #include <uapi/linux/futex.h> |
402 |
+ |
403 |
+ struct inode; |
404 |
+@@ -51,15 +53,35 @@ union futex_key { |
405 |
+ #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } } |
406 |
+ |
407 |
+ #ifdef CONFIG_FUTEX |
408 |
+-extern void exit_robust_list(struct task_struct *curr); |
409 |
++enum { |
410 |
++ FUTEX_STATE_OK, |
411 |
++ FUTEX_STATE_EXITING, |
412 |
++ FUTEX_STATE_DEAD, |
413 |
++}; |
414 |
+ |
415 |
+-long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, |
416 |
+- u32 __user *uaddr2, u32 val2, u32 val3); |
417 |
+-#else |
418 |
+-static inline void exit_robust_list(struct task_struct *curr) |
419 |
++static inline void futex_init_task(struct task_struct *tsk) |
420 |
+ { |
421 |
++ tsk->robust_list = NULL; |
422 |
++#ifdef CONFIG_COMPAT |
423 |
++ tsk->compat_robust_list = NULL; |
424 |
++#endif |
425 |
++ INIT_LIST_HEAD(&tsk->pi_state_list); |
426 |
++ tsk->pi_state_cache = NULL; |
427 |
++ tsk->futex_state = FUTEX_STATE_OK; |
428 |
++ mutex_init(&tsk->futex_exit_mutex); |
429 |
+ } |
430 |
+ |
431 |
++void futex_exit_recursive(struct task_struct *tsk); |
432 |
++void futex_exit_release(struct task_struct *tsk); |
433 |
++void futex_exec_release(struct task_struct *tsk); |
434 |
++ |
435 |
++long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, |
436 |
++ u32 __user *uaddr2, u32 val2, u32 val3); |
437 |
++#else |
438 |
++static inline void futex_init_task(struct task_struct *tsk) { } |
439 |
++static inline void futex_exit_recursive(struct task_struct *tsk) { } |
440 |
++static inline void futex_exit_release(struct task_struct *tsk) { } |
441 |
++static inline void futex_exec_release(struct task_struct *tsk) { } |
442 |
+ static inline long do_futex(u32 __user *uaddr, int op, u32 val, |
443 |
+ ktime_t *timeout, u32 __user *uaddr2, |
444 |
+ u32 val2, u32 val3) |
445 |
+@@ -68,12 +90,4 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val, |
446 |
+ } |
447 |
+ #endif |
448 |
+ |
449 |
+-#ifdef CONFIG_FUTEX_PI |
450 |
+-extern void exit_pi_state_list(struct task_struct *curr); |
451 |
+-#else |
452 |
+-static inline void exit_pi_state_list(struct task_struct *curr) |
453 |
+-{ |
454 |
+-} |
455 |
+-#endif |
456 |
+- |
457 |
+ #endif |
458 |
+diff --git a/include/linux/sched.h b/include/linux/sched.h |
459 |
+index c69f308f3a53c..5524cd5c6abe6 100644 |
460 |
+--- a/include/linux/sched.h |
461 |
++++ b/include/linux/sched.h |
462 |
+@@ -996,6 +996,8 @@ struct task_struct { |
463 |
+ #endif |
464 |
+ struct list_head pi_state_list; |
465 |
+ struct futex_pi_state *pi_state_cache; |
466 |
++ struct mutex futex_exit_mutex; |
467 |
++ unsigned int futex_state; |
468 |
+ #endif |
469 |
+ #ifdef CONFIG_PERF_EVENTS |
470 |
+ struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; |
471 |
+@@ -1377,7 +1379,6 @@ extern struct pid *cad_pid; |
472 |
+ */ |
473 |
+ #define PF_IDLE 0x00000002 /* I am an IDLE thread */ |
474 |
+ #define PF_EXITING 0x00000004 /* Getting shut down */ |
475 |
+-#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */ |
476 |
+ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ |
477 |
+ #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ |
478 |
+ #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ |
479 |
+diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h |
480 |
+index 766bbe8138615..8d3b7e731b742 100644 |
481 |
+--- a/include/linux/sched/mm.h |
482 |
++++ b/include/linux/sched/mm.h |
483 |
+@@ -119,8 +119,10 @@ extern struct mm_struct *get_task_mm(struct task_struct *task); |
484 |
+ * succeeds. |
485 |
+ */ |
486 |
+ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); |
487 |
+-/* Remove the current tasks stale references to the old mm_struct */ |
488 |
+-extern void mm_release(struct task_struct *, struct mm_struct *); |
489 |
++/* Remove the current tasks stale references to the old mm_struct on exit() */ |
490 |
++extern void exit_mm_release(struct task_struct *, struct mm_struct *); |
491 |
++/* Remove the current tasks stale references to the old mm_struct on exec() */ |
492 |
++extern void exec_mm_release(struct task_struct *, struct mm_struct *); |
493 |
+ |
494 |
+ #ifdef CONFIG_MEMCG |
495 |
+ extern void mm_update_next_owner(struct mm_struct *mm); |
496 |
+diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h |
497 |
+index 29d09755e5cfc..146e7b3faa856 100644 |
498 |
+--- a/include/trace/events/writeback.h |
499 |
++++ b/include/trace/events/writeback.h |
500 |
+@@ -20,7 +20,6 @@ |
501 |
+ {I_CLEAR, "I_CLEAR"}, \ |
502 |
+ {I_SYNC, "I_SYNC"}, \ |
503 |
+ {I_DIRTY_TIME, "I_DIRTY_TIME"}, \ |
504 |
+- {I_DIRTY_TIME_EXPIRED, "I_DIRTY_TIME_EXPIRED"}, \ |
505 |
+ {I_REFERENCED, "I_REFERENCED"} \ |
506 |
+ ) |
507 |
+ |
508 |
+diff --git a/kernel/exit.c b/kernel/exit.c |
509 |
+index 65133ebddfada..908e7a33e1fcb 100644 |
510 |
+--- a/kernel/exit.c |
511 |
++++ b/kernel/exit.c |
512 |
+@@ -498,7 +498,7 @@ static void exit_mm(void) |
513 |
+ struct mm_struct *mm = current->mm; |
514 |
+ struct core_state *core_state; |
515 |
+ |
516 |
+- mm_release(current, mm); |
517 |
++ exit_mm_release(current, mm); |
518 |
+ if (!mm) |
519 |
+ return; |
520 |
+ sync_mm_rss(mm); |
521 |
+@@ -818,32 +818,12 @@ void __noreturn do_exit(long code) |
522 |
+ */ |
523 |
+ if (unlikely(tsk->flags & PF_EXITING)) { |
524 |
+ pr_alert("Fixing recursive fault but reboot is needed!\n"); |
525 |
+- /* |
526 |
+- * We can do this unlocked here. The futex code uses |
527 |
+- * this flag just to verify whether the pi state |
528 |
+- * cleanup has been done or not. In the worst case it |
529 |
+- * loops once more. We pretend that the cleanup was |
530 |
+- * done as there is no way to return. Either the |
531 |
+- * OWNER_DIED bit is set by now or we push the blocked |
532 |
+- * task into the wait for ever nirwana as well. |
533 |
+- */ |
534 |
+- tsk->flags |= PF_EXITPIDONE; |
535 |
++ futex_exit_recursive(tsk); |
536 |
+ set_current_state(TASK_UNINTERRUPTIBLE); |
537 |
+ schedule(); |
538 |
+ } |
539 |
+ |
540 |
+ exit_signals(tsk); /* sets PF_EXITING */ |
541 |
+- /* |
542 |
+- * Ensure that all new tsk->pi_lock acquisitions must observe |
543 |
+- * PF_EXITING. Serializes against futex.c:attach_to_pi_owner(). |
544 |
+- */ |
545 |
+- smp_mb(); |
546 |
+- /* |
547 |
+- * Ensure that we must observe the pi_state in exit_mm() -> |
548 |
+- * mm_release() -> exit_pi_state_list(). |
549 |
+- */ |
550 |
+- raw_spin_lock_irq(&tsk->pi_lock); |
551 |
+- raw_spin_unlock_irq(&tsk->pi_lock); |
552 |
+ |
553 |
+ /* sync mm's RSS info before statistics gathering */ |
554 |
+ if (tsk->mm) |
555 |
+@@ -918,12 +898,6 @@ void __noreturn do_exit(long code) |
556 |
+ * Make sure we are holding no locks: |
557 |
+ */ |
558 |
+ debug_check_no_locks_held(); |
559 |
+- /* |
560 |
+- * We can do this unlocked here. The futex code uses this flag |
561 |
+- * just to verify whether the pi state cleanup has been done |
562 |
+- * or not. In the worst case it loops once more. |
563 |
+- */ |
564 |
+- tsk->flags |= PF_EXITPIDONE; |
565 |
+ |
566 |
+ if (tsk->io_context) |
567 |
+ exit_io_context(tsk); |
568 |
+diff --git a/kernel/fork.c b/kernel/fork.c |
569 |
+index f2c92c1001949..cf535b9d5db75 100644 |
570 |
+--- a/kernel/fork.c |
571 |
++++ b/kernel/fork.c |
572 |
+@@ -1217,24 +1217,8 @@ static int wait_for_vfork_done(struct task_struct *child, |
573 |
+ * restoring the old one. . . |
574 |
+ * Eric Biederman 10 January 1998 |
575 |
+ */ |
576 |
+-void mm_release(struct task_struct *tsk, struct mm_struct *mm) |
577 |
++static void mm_release(struct task_struct *tsk, struct mm_struct *mm) |
578 |
+ { |
579 |
+- /* Get rid of any futexes when releasing the mm */ |
580 |
+-#ifdef CONFIG_FUTEX |
581 |
+- if (unlikely(tsk->robust_list)) { |
582 |
+- exit_robust_list(tsk); |
583 |
+- tsk->robust_list = NULL; |
584 |
+- } |
585 |
+-#ifdef CONFIG_COMPAT |
586 |
+- if (unlikely(tsk->compat_robust_list)) { |
587 |
+- compat_exit_robust_list(tsk); |
588 |
+- tsk->compat_robust_list = NULL; |
589 |
+- } |
590 |
+-#endif |
591 |
+- if (unlikely(!list_empty(&tsk->pi_state_list))) |
592 |
+- exit_pi_state_list(tsk); |
593 |
+-#endif |
594 |
+- |
595 |
+ uprobe_free_utask(tsk); |
596 |
+ |
597 |
+ /* Get rid of any cached register state */ |
598 |
+@@ -1267,6 +1251,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) |
599 |
+ complete_vfork_done(tsk); |
600 |
+ } |
601 |
+ |
602 |
++void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm) |
603 |
++{ |
604 |
++ futex_exit_release(tsk); |
605 |
++ mm_release(tsk, mm); |
606 |
++} |
607 |
++ |
608 |
++void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm) |
609 |
++{ |
610 |
++ futex_exec_release(tsk); |
611 |
++ mm_release(tsk, mm); |
612 |
++} |
613 |
++ |
614 |
+ /* |
615 |
+ * Allocate a new mm structure and copy contents from the |
616 |
+ * mm structure of the passed in task structure. |
617 |
+@@ -1937,14 +1933,8 @@ static __latent_entropy struct task_struct *copy_process( |
618 |
+ #ifdef CONFIG_BLOCK |
619 |
+ p->plug = NULL; |
620 |
+ #endif |
621 |
+-#ifdef CONFIG_FUTEX |
622 |
+- p->robust_list = NULL; |
623 |
+-#ifdef CONFIG_COMPAT |
624 |
+- p->compat_robust_list = NULL; |
625 |
+-#endif |
626 |
+- INIT_LIST_HEAD(&p->pi_state_list); |
627 |
+- p->pi_state_cache = NULL; |
628 |
+-#endif |
629 |
++ futex_init_task(p); |
630 |
++ |
631 |
+ /* |
632 |
+ * sigaltstack should be cleared when sharing the same VM |
633 |
+ */ |
634 |
+diff --git a/kernel/futex.c b/kernel/futex.c |
635 |
+index 334dc4cae780e..224adcdac6c19 100644 |
636 |
+--- a/kernel/futex.c |
637 |
++++ b/kernel/futex.c |
638 |
+@@ -341,6 +341,12 @@ static inline bool should_fail_futex(bool fshared) |
639 |
+ } |
640 |
+ #endif /* CONFIG_FAIL_FUTEX */ |
641 |
+ |
642 |
++#ifdef CONFIG_COMPAT |
643 |
++static void compat_exit_robust_list(struct task_struct *curr); |
644 |
++#else |
645 |
++static inline void compat_exit_robust_list(struct task_struct *curr) { } |
646 |
++#endif |
647 |
++ |
648 |
+ static inline void futex_get_mm(union futex_key *key) |
649 |
+ { |
650 |
+ mmgrab(key->private.mm); |
651 |
+@@ -833,6 +839,29 @@ static struct futex_pi_state *alloc_pi_state(void) |
652 |
+ return pi_state; |
653 |
+ } |
654 |
+ |
655 |
++static void pi_state_update_owner(struct futex_pi_state *pi_state, |
656 |
++ struct task_struct *new_owner) |
657 |
++{ |
658 |
++ struct task_struct *old_owner = pi_state->owner; |
659 |
++ |
660 |
++ lockdep_assert_held(&pi_state->pi_mutex.wait_lock); |
661 |
++ |
662 |
++ if (old_owner) { |
663 |
++ raw_spin_lock(&old_owner->pi_lock); |
664 |
++ WARN_ON(list_empty(&pi_state->list)); |
665 |
++ list_del_init(&pi_state->list); |
666 |
++ raw_spin_unlock(&old_owner->pi_lock); |
667 |
++ } |
668 |
++ |
669 |
++ if (new_owner) { |
670 |
++ raw_spin_lock(&new_owner->pi_lock); |
671 |
++ WARN_ON(!list_empty(&pi_state->list)); |
672 |
++ list_add(&pi_state->list, &new_owner->pi_state_list); |
673 |
++ pi_state->owner = new_owner; |
674 |
++ raw_spin_unlock(&new_owner->pi_lock); |
675 |
++ } |
676 |
++} |
677 |
++ |
678 |
+ static void get_pi_state(struct futex_pi_state *pi_state) |
679 |
+ { |
680 |
+ WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount)); |
681 |
+@@ -855,17 +884,11 @@ static void put_pi_state(struct futex_pi_state *pi_state) |
682 |
+ * and has cleaned up the pi_state already |
683 |
+ */ |
684 |
+ if (pi_state->owner) { |
685 |
+- struct task_struct *owner; |
686 |
+ unsigned long flags; |
687 |
+ |
688 |
+ raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); |
689 |
+- owner = pi_state->owner; |
690 |
+- if (owner) { |
691 |
+- raw_spin_lock(&owner->pi_lock); |
692 |
+- list_del_init(&pi_state->list); |
693 |
+- raw_spin_unlock(&owner->pi_lock); |
694 |
+- } |
695 |
+- rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner); |
696 |
++ pi_state_update_owner(pi_state, NULL); |
697 |
++ rt_mutex_proxy_unlock(&pi_state->pi_mutex); |
698 |
+ raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); |
699 |
+ } |
700 |
+ |
701 |
+@@ -890,7 +913,7 @@ static void put_pi_state(struct futex_pi_state *pi_state) |
702 |
+ * Kernel cleans up PI-state, but userspace is likely hosed. |
703 |
+ * (Robust-futex cleanup is separate and might save the day for userspace.) |
704 |
+ */ |
705 |
+-void exit_pi_state_list(struct task_struct *curr) |
706 |
++static void exit_pi_state_list(struct task_struct *curr) |
707 |
+ { |
708 |
+ struct list_head *next, *head = &curr->pi_state_list; |
709 |
+ struct futex_pi_state *pi_state; |
710 |
+@@ -960,7 +983,8 @@ void exit_pi_state_list(struct task_struct *curr) |
711 |
+ } |
712 |
+ raw_spin_unlock_irq(&curr->pi_lock); |
713 |
+ } |
714 |
+- |
715 |
++#else |
716 |
++static inline void exit_pi_state_list(struct task_struct *curr) { } |
717 |
+ #endif |
718 |
+ |
719 |
+ /* |
720 |
+@@ -1010,7 +1034,8 @@ void exit_pi_state_list(struct task_struct *curr) |
721 |
+ * FUTEX_OWNER_DIED bit. See [4] |
722 |
+ * |
723 |
+ * [10] There is no transient state which leaves owner and user space |
724 |
+- * TID out of sync. |
725 |
++ * TID out of sync. Except one error case where the kernel is denied |
726 |
++ * write access to the user address, see fixup_pi_state_owner(). |
727 |
+ * |
728 |
+ * |
729 |
+ * Serialization and lifetime rules: |
730 |
+@@ -1169,16 +1194,47 @@ out_error: |
731 |
+ return ret; |
732 |
+ } |
733 |
+ |
734 |
++/** |
735 |
++ * wait_for_owner_exiting - Block until the owner has exited |
736 |
++ * @exiting: Pointer to the exiting task |
737 |
++ * |
738 |
++ * Caller must hold a refcount on @exiting. |
739 |
++ */ |
740 |
++static void wait_for_owner_exiting(int ret, struct task_struct *exiting) |
741 |
++{ |
742 |
++ if (ret != -EBUSY) { |
743 |
++ WARN_ON_ONCE(exiting); |
744 |
++ return; |
745 |
++ } |
746 |
++ |
747 |
++ if (WARN_ON_ONCE(ret == -EBUSY && !exiting)) |
748 |
++ return; |
749 |
++ |
750 |
++ mutex_lock(&exiting->futex_exit_mutex); |
751 |
++ /* |
752 |
++ * No point in doing state checking here. If the waiter got here |
753 |
++ * while the task was in exec()->exec_futex_release() then it can |
754 |
++ * have any FUTEX_STATE_* value when the waiter has acquired the |
755 |
++ * mutex. OK, if running, EXITING or DEAD if it reached exit() |
756 |
++ * already. Highly unlikely and not a problem. Just one more round |
757 |
++ * through the futex maze. |
758 |
++ */ |
759 |
++ mutex_unlock(&exiting->futex_exit_mutex); |
760 |
++ |
761 |
++ put_task_struct(exiting); |
762 |
++} |
763 |
++ |
764 |
+ static int handle_exit_race(u32 __user *uaddr, u32 uval, |
765 |
+ struct task_struct *tsk) |
766 |
+ { |
767 |
+ u32 uval2; |
768 |
+ |
769 |
+ /* |
770 |
+- * If PF_EXITPIDONE is not yet set, then try again. |
771 |
++ * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the |
772 |
++ * caller that the alleged owner is busy. |
773 |
+ */ |
774 |
+- if (tsk && !(tsk->flags & PF_EXITPIDONE)) |
775 |
+- return -EAGAIN; |
776 |
++ if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) |
777 |
++ return -EBUSY; |
778 |
+ |
779 |
+ /* |
780 |
+ * Reread the user space value to handle the following situation: |
781 |
+@@ -1196,8 +1252,9 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval, |
782 |
+ * *uaddr = 0xC0000000; tsk = get_task(PID); |
783 |
+ * } if (!tsk->flags & PF_EXITING) { |
784 |
+ * ... attach(); |
785 |
+- * tsk->flags |= PF_EXITPIDONE; } else { |
786 |
+- * if (!(tsk->flags & PF_EXITPIDONE)) |
787 |
++ * tsk->futex_state = } else { |
788 |
++ * FUTEX_STATE_DEAD; if (tsk->futex_state != |
789 |
++ * FUTEX_STATE_DEAD) |
790 |
+ * return -EAGAIN; |
791 |
+ * return -ESRCH; <--- FAIL |
792 |
+ * } |
793 |
+@@ -1228,7 +1285,8 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval, |
794 |
+ * it after doing proper sanity checks. |
795 |
+ */ |
796 |
+ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, |
797 |
+- struct futex_pi_state **ps) |
798 |
++ struct futex_pi_state **ps, |
799 |
++ struct task_struct **exiting) |
800 |
+ { |
801 |
+ pid_t pid = uval & FUTEX_TID_MASK; |
802 |
+ struct futex_pi_state *pi_state; |
803 |
+@@ -1253,22 +1311,33 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, |
804 |
+ } |
805 |
+ |
806 |
+ /* |
807 |
+- * We need to look at the task state flags to figure out, |
808 |
+- * whether the task is exiting. To protect against the do_exit |
809 |
+- * change of the task flags, we do this protected by |
810 |
+- * p->pi_lock: |
811 |
++ * We need to look at the task state to figure out, whether the |
812 |
++ * task is exiting. To protect against the change of the task state |
813 |
++ * in futex_exit_release(), we do this protected by p->pi_lock: |
814 |
+ */ |
815 |
+ raw_spin_lock_irq(&p->pi_lock); |
816 |
+- if (unlikely(p->flags & PF_EXITING)) { |
817 |
++ if (unlikely(p->futex_state != FUTEX_STATE_OK)) { |
818 |
+ /* |
819 |
+- * The task is on the way out. When PF_EXITPIDONE is |
820 |
+- * set, we know that the task has finished the |
821 |
+- * cleanup: |
822 |
++ * The task is on the way out. When the futex state is |
823 |
++ * FUTEX_STATE_DEAD, we know that the task has finished |
824 |
++ * the cleanup: |
825 |
+ */ |
826 |
+ int ret = handle_exit_race(uaddr, uval, p); |
827 |
+ |
828 |
+ raw_spin_unlock_irq(&p->pi_lock); |
829 |
+- put_task_struct(p); |
830 |
++ /* |
831 |
++ * If the owner task is between FUTEX_STATE_EXITING and |
832 |
++ * FUTEX_STATE_DEAD then store the task pointer and keep |
833 |
++ * the reference on the task struct. The calling code will |
834 |
++ * drop all locks, wait for the task to reach |
835 |
++ * FUTEX_STATE_DEAD and then drop the refcount. This is |
836 |
++ * required to prevent a live lock when the current task |
837 |
++ * preempted the exiting task between the two states. |
838 |
++ */ |
839 |
++ if (ret == -EBUSY) |
840 |
++ *exiting = p; |
841 |
++ else |
842 |
++ put_task_struct(p); |
843 |
+ return ret; |
844 |
+ } |
845 |
+ |
846 |
+@@ -1307,7 +1376,8 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, |
847 |
+ |
848 |
+ static int lookup_pi_state(u32 __user *uaddr, u32 uval, |
849 |
+ struct futex_hash_bucket *hb, |
850 |
+- union futex_key *key, struct futex_pi_state **ps) |
851 |
++ union futex_key *key, struct futex_pi_state **ps, |
852 |
++ struct task_struct **exiting) |
853 |
+ { |
854 |
+ struct futex_q *top_waiter = futex_top_waiter(hb, key); |
855 |
+ |
856 |
+@@ -1322,7 +1392,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval, |
857 |
+ * We are the first waiter - try to look up the owner based on |
858 |
+ * @uval and attach to it. |
859 |
+ */ |
860 |
+- return attach_to_pi_owner(uaddr, uval, key, ps); |
861 |
++ return attach_to_pi_owner(uaddr, uval, key, ps, exiting); |
862 |
+ } |
863 |
+ |
864 |
+ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) |
865 |
+@@ -1350,6 +1420,8 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) |
866 |
+ * lookup |
867 |
+ * @task: the task to perform the atomic lock work for. This will |
868 |
+ * be "current" except in the case of requeue pi. |
869 |
++ * @exiting: Pointer to store the task pointer of the owner task |
870 |
++ * which is in the middle of exiting |
871 |
+ * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) |
872 |
+ * |
873 |
+ * Return: |
874 |
+@@ -1358,11 +1430,17 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) |
875 |
+ * - <0 - error |
876 |
+ * |
877 |
+ * The hb->lock and futex_key refs shall be held by the caller. |
878 |
++ * |
879 |
++ * @exiting is only set when the return value is -EBUSY. If so, this holds |
880 |
++ * a refcount on the exiting task on return and the caller needs to drop it |
881 |
++ * after waiting for the exit to complete. |
882 |
+ */ |
883 |
+ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, |
884 |
+ union futex_key *key, |
885 |
+ struct futex_pi_state **ps, |
886 |
+- struct task_struct *task, int set_waiters) |
887 |
++ struct task_struct *task, |
888 |
++ struct task_struct **exiting, |
889 |
++ int set_waiters) |
890 |
+ { |
891 |
+ u32 uval, newval, vpid = task_pid_vnr(task); |
892 |
+ struct futex_q *top_waiter; |
893 |
+@@ -1432,7 +1510,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, |
894 |
+ * attach to the owner. If that fails, no harm done, we only |
895 |
+ * set the FUTEX_WAITERS bit in the user space variable. |
896 |
+ */ |
897 |
+- return attach_to_pi_owner(uaddr, newval, key, ps); |
898 |
++ return attach_to_pi_owner(uaddr, newval, key, ps, exiting); |
899 |
+ } |
900 |
+ |
901 |
+ /** |
902 |
+@@ -1537,26 +1615,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ |
903 |
+ ret = -EINVAL; |
904 |
+ } |
905 |
+ |
906 |
+- if (ret) |
907 |
+- goto out_unlock; |
908 |
+- |
909 |
+- /* |
910 |
+- * This is a point of no return; once we modify the uval there is no |
911 |
+- * going back and subsequent operations must not fail. |
912 |
+- */ |
913 |
+- |
914 |
+- raw_spin_lock(&pi_state->owner->pi_lock); |
915 |
+- WARN_ON(list_empty(&pi_state->list)); |
916 |
+- list_del_init(&pi_state->list); |
917 |
+- raw_spin_unlock(&pi_state->owner->pi_lock); |
918 |
+- |
919 |
+- raw_spin_lock(&new_owner->pi_lock); |
920 |
+- WARN_ON(!list_empty(&pi_state->list)); |
921 |
+- list_add(&pi_state->list, &new_owner->pi_state_list); |
922 |
+- pi_state->owner = new_owner; |
923 |
+- raw_spin_unlock(&new_owner->pi_lock); |
924 |
+- |
925 |
+- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); |
926 |
++ if (!ret) { |
927 |
++ /* |
928 |
++ * This is a point of no return; once we modified the uval |
929 |
++ * there is no going back and subsequent operations must |
930 |
++ * not fail. |
931 |
++ */ |
932 |
++ pi_state_update_owner(pi_state, new_owner); |
933 |
++ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); |
934 |
++ } |
935 |
+ |
936 |
+ out_unlock: |
937 |
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); |
938 |
+@@ -1853,6 +1920,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, |
939 |
+ * @key1: the from futex key |
940 |
+ * @key2: the to futex key |
941 |
+ * @ps: address to store the pi_state pointer |
942 |
++ * @exiting: Pointer to store the task pointer of the owner task |
943 |
++ * which is in the middle of exiting |
944 |
+ * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) |
945 |
+ * |
946 |
+ * Try and get the lock on behalf of the top waiter if we can do it atomically. |
947 |
+@@ -1860,16 +1929,20 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, |
948 |
+ * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. |
949 |
+ * hb1 and hb2 must be held by the caller. |
950 |
+ * |
951 |
++ * @exiting is only set when the return value is -EBUSY. If so, this holds |
952 |
++ * a refcount on the exiting task on return and the caller needs to drop it |
953 |
++ * after waiting for the exit to complete. |
954 |
++ * |
955 |
+ * Return: |
956 |
+ * - 0 - failed to acquire the lock atomically; |
957 |
+ * - >0 - acquired the lock, return value is vpid of the top_waiter |
958 |
+ * - <0 - error |
959 |
+ */ |
960 |
+-static int futex_proxy_trylock_atomic(u32 __user *pifutex, |
961 |
+- struct futex_hash_bucket *hb1, |
962 |
+- struct futex_hash_bucket *hb2, |
963 |
+- union futex_key *key1, union futex_key *key2, |
964 |
+- struct futex_pi_state **ps, int set_waiters) |
965 |
++static int |
966 |
++futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, |
967 |
++ struct futex_hash_bucket *hb2, union futex_key *key1, |
968 |
++ union futex_key *key2, struct futex_pi_state **ps, |
969 |
++ struct task_struct **exiting, int set_waiters) |
970 |
+ { |
971 |
+ struct futex_q *top_waiter = NULL; |
972 |
+ u32 curval; |
973 |
+@@ -1906,7 +1979,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, |
974 |
+ */ |
975 |
+ vpid = task_pid_vnr(top_waiter->task); |
976 |
+ ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, |
977 |
+- set_waiters); |
978 |
++ exiting, set_waiters); |
979 |
+ if (ret == 1) { |
980 |
+ requeue_pi_wake_futex(top_waiter, key2, hb2); |
981 |
+ return vpid; |
982 |
+@@ -2035,6 +2108,8 @@ retry_private: |
983 |
+ } |
984 |
+ |
985 |
+ if (requeue_pi && (task_count - nr_wake < nr_requeue)) { |
986 |
++ struct task_struct *exiting = NULL; |
987 |
++ |
988 |
+ /* |
989 |
+ * Attempt to acquire uaddr2 and wake the top waiter. If we |
990 |
+ * intend to requeue waiters, force setting the FUTEX_WAITERS |
991 |
+@@ -2042,7 +2117,8 @@ retry_private: |
992 |
+ * faults rather in the requeue loop below. |
993 |
+ */ |
994 |
+ ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, |
995 |
+- &key2, &pi_state, nr_requeue); |
996 |
++ &key2, &pi_state, |
997 |
++ &exiting, nr_requeue); |
998 |
+ |
999 |
+ /* |
1000 |
+ * At this point the top_waiter has either taken uaddr2 or is |
1001 |
+@@ -2069,7 +2145,8 @@ retry_private: |
1002 |
+ * If that call succeeds then we have pi_state and an |
1003 |
+ * initial refcount on it. |
1004 |
+ */ |
1005 |
+- ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state); |
1006 |
++ ret = lookup_pi_state(uaddr2, ret, hb2, &key2, |
1007 |
++ &pi_state, &exiting); |
1008 |
+ } |
1009 |
+ |
1010 |
+ switch (ret) { |
1011 |
+@@ -2087,17 +2164,24 @@ retry_private: |
1012 |
+ if (!ret) |
1013 |
+ goto retry; |
1014 |
+ goto out; |
1015 |
++ case -EBUSY: |
1016 |
+ case -EAGAIN: |
1017 |
+ /* |
1018 |
+ * Two reasons for this: |
1019 |
+- * - Owner is exiting and we just wait for the |
1020 |
++ * - EBUSY: Owner is exiting and we just wait for the |
1021 |
+ * exit to complete. |
1022 |
+- * - The user space value changed. |
1023 |
++ * - EAGAIN: The user space value changed. |
1024 |
+ */ |
1025 |
+ double_unlock_hb(hb1, hb2); |
1026 |
+ hb_waiters_dec(hb2); |
1027 |
+ put_futex_key(&key2); |
1028 |
+ put_futex_key(&key1); |
1029 |
++ /* |
1030 |
++ * Handle the case where the owner is in the middle of |
1031 |
++ * exiting. Wait for the exit to complete otherwise |
1032 |
++ * this task might loop forever, aka. live lock. |
1033 |
++ */ |
1034 |
++ wait_for_owner_exiting(ret, exiting); |
1035 |
+ cond_resched(); |
1036 |
+ goto retry; |
1037 |
+ default: |
1038 |
+@@ -2362,18 +2446,13 @@ static void unqueue_me_pi(struct futex_q *q) |
1039 |
+ spin_unlock(q->lock_ptr); |
1040 |
+ } |
1041 |
+ |
1042 |
+-static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, |
1043 |
+- struct task_struct *argowner) |
1044 |
++static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, |
1045 |
++ struct task_struct *argowner) |
1046 |
+ { |
1047 |
++ u32 uval, uninitialized_var(curval), newval, newtid; |
1048 |
+ struct futex_pi_state *pi_state = q->pi_state; |
1049 |
+- u32 uval, uninitialized_var(curval), newval; |
1050 |
+ struct task_struct *oldowner, *newowner; |
1051 |
+- u32 newtid; |
1052 |
+- int ret, err = 0; |
1053 |
+- |
1054 |
+- lockdep_assert_held(q->lock_ptr); |
1055 |
+- |
1056 |
+- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); |
1057 |
++ int err = 0; |
1058 |
+ |
1059 |
+ oldowner = pi_state->owner; |
1060 |
+ |
1061 |
+@@ -2407,14 +2486,12 @@ retry: |
1062 |
+ * We raced against a concurrent self; things are |
1063 |
+ * already fixed up. Nothing to do. |
1064 |
+ */ |
1065 |
+- ret = 0; |
1066 |
+- goto out_unlock; |
1067 |
++ return 0; |
1068 |
+ } |
1069 |
+ |
1070 |
+ if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { |
1071 |
+- /* We got the lock after all, nothing to fix. */ |
1072 |
+- ret = 0; |
1073 |
+- goto out_unlock; |
1074 |
++ /* We got the lock. pi_state is correct. Tell caller. */ |
1075 |
++ return 1; |
1076 |
+ } |
1077 |
+ |
1078 |
+ /* |
1079 |
+@@ -2441,8 +2518,7 @@ retry: |
1080 |
+ * We raced against a concurrent self; things are |
1081 |
+ * already fixed up. Nothing to do. |
1082 |
+ */ |
1083 |
+- ret = 0; |
1084 |
+- goto out_unlock; |
1085 |
++ return 1; |
1086 |
+ } |
1087 |
+ newowner = argowner; |
1088 |
+ } |
1089 |
+@@ -2472,22 +2548,9 @@ retry: |
1090 |
+ * We fixed up user space. Now we need to fix the pi_state |
1091 |
+ * itself. |
1092 |
+ */ |
1093 |
+- if (pi_state->owner != NULL) { |
1094 |
+- raw_spin_lock(&pi_state->owner->pi_lock); |
1095 |
+- WARN_ON(list_empty(&pi_state->list)); |
1096 |
+- list_del_init(&pi_state->list); |
1097 |
+- raw_spin_unlock(&pi_state->owner->pi_lock); |
1098 |
+- } |
1099 |
++ pi_state_update_owner(pi_state, newowner); |
1100 |
+ |
1101 |
+- pi_state->owner = newowner; |
1102 |
+- |
1103 |
+- raw_spin_lock(&newowner->pi_lock); |
1104 |
+- WARN_ON(!list_empty(&pi_state->list)); |
1105 |
+- list_add(&pi_state->list, &newowner->pi_state_list); |
1106 |
+- raw_spin_unlock(&newowner->pi_lock); |
1107 |
+- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); |
1108 |
+- |
1109 |
+- return 0; |
1110 |
++ return argowner == current; |
1111 |
+ |
1112 |
+ /* |
1113 |
+ * In order to reschedule or handle a page fault, we need to drop the |
1114 |
+@@ -2508,17 +2571,16 @@ handle_err: |
1115 |
+ |
1116 |
+ switch (err) { |
1117 |
+ case -EFAULT: |
1118 |
+- ret = fault_in_user_writeable(uaddr); |
1119 |
++ err = fault_in_user_writeable(uaddr); |
1120 |
+ break; |
1121 |
+ |
1122 |
+ case -EAGAIN: |
1123 |
+ cond_resched(); |
1124 |
+- ret = 0; |
1125 |
++ err = 0; |
1126 |
+ break; |
1127 |
+ |
1128 |
+ default: |
1129 |
+ WARN_ON_ONCE(1); |
1130 |
+- ret = err; |
1131 |
+ break; |
1132 |
+ } |
1133 |
+ |
1134 |
+@@ -2528,17 +2590,44 @@ handle_err: |
1135 |
+ /* |
1136 |
+ * Check if someone else fixed it for us: |
1137 |
+ */ |
1138 |
+- if (pi_state->owner != oldowner) { |
1139 |
+- ret = 0; |
1140 |
+- goto out_unlock; |
1141 |
+- } |
1142 |
++ if (pi_state->owner != oldowner) |
1143 |
++ return argowner == current; |
1144 |
+ |
1145 |
+- if (ret) |
1146 |
+- goto out_unlock; |
1147 |
++ /* Retry if err was -EAGAIN or the fault in succeeded */ |
1148 |
++ if (!err) |
1149 |
++ goto retry; |
1150 |
+ |
1151 |
+- goto retry; |
1152 |
++ /* |
1153 |
++ * fault_in_user_writeable() failed so user state is immutable. At |
1154 |
++ * best we can make the kernel state consistent but user state will |
1155 |
++ * be most likely hosed and any subsequent unlock operation will be |
1156 |
++ * rejected due to PI futex rule [10]. |
1157 |
++ * |
1158 |
++ * Ensure that the rtmutex owner is also the pi_state owner despite |
1159 |
++ * the user space value claiming something different. There is no |
1160 |
++ * point in unlocking the rtmutex if current is the owner as it |
1161 |
++ * would need to wait until the next waiter has taken the rtmutex |
1162 |
++ * to guarantee consistent state. Keep it simple. Userspace asked |
1163 |
++ * for this wreckaged state. |
1164 |
++ * |
1165 |
++ * The rtmutex has an owner - either current or some other |
1166 |
++ * task. See the EAGAIN loop above. |
1167 |
++ */ |
1168 |
++ pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex)); |
1169 |
+ |
1170 |
+-out_unlock: |
1171 |
++ return err; |
1172 |
++} |
1173 |
++ |
1174 |
++static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, |
1175 |
++ struct task_struct *argowner) |
1176 |
++{ |
1177 |
++ struct futex_pi_state *pi_state = q->pi_state; |
1178 |
++ int ret; |
1179 |
++ |
1180 |
++ lockdep_assert_held(q->lock_ptr); |
1181 |
++ |
1182 |
++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); |
1183 |
++ ret = __fixup_pi_state_owner(uaddr, q, argowner); |
1184 |
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); |
1185 |
+ return ret; |
1186 |
+ } |
1187 |
+@@ -2562,8 +2651,6 @@ static long futex_wait_restart(struct restart_block *restart); |
1188 |
+ */ |
1189 |
+ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) |
1190 |
+ { |
1191 |
+- int ret = 0; |
1192 |
+- |
1193 |
+ if (locked) { |
1194 |
+ /* |
1195 |
+ * Got the lock. We might not be the anticipated owner if we |
1196 |
+@@ -2574,8 +2661,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) |
1197 |
+ * stable state, anything else needs more attention. |
1198 |
+ */ |
1199 |
+ if (q->pi_state->owner != current) |
1200 |
+- ret = fixup_pi_state_owner(uaddr, q, current); |
1201 |
+- goto out; |
1202 |
++ return fixup_pi_state_owner(uaddr, q, current); |
1203 |
++ return 1; |
1204 |
+ } |
1205 |
+ |
1206 |
+ /* |
1207 |
+@@ -2586,24 +2673,17 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) |
1208 |
+ * Another speculative read; pi_state->owner == current is unstable |
1209 |
+ * but needs our attention. |
1210 |
+ */ |
1211 |
+- if (q->pi_state->owner == current) { |
1212 |
+- ret = fixup_pi_state_owner(uaddr, q, NULL); |
1213 |
+- goto out; |
1214 |
+- } |
1215 |
++ if (q->pi_state->owner == current) |
1216 |
++ return fixup_pi_state_owner(uaddr, q, NULL); |
1217 |
+ |
1218 |
+ /* |
1219 |
+ * Paranoia check. If we did not take the lock, then we should not be |
1220 |
+- * the owner of the rt_mutex. |
1221 |
++ * the owner of the rt_mutex. Warn and establish consistent state. |
1222 |
+ */ |
1223 |
+- if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) { |
1224 |
+- printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " |
1225 |
+- "pi-state %p\n", ret, |
1226 |
+- q->pi_state->pi_mutex.owner, |
1227 |
+- q->pi_state->owner); |
1228 |
+- } |
1229 |
++ if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current)) |
1230 |
++ return fixup_pi_state_owner(uaddr, q, current); |
1231 |
+ |
1232 |
+-out: |
1233 |
+- return ret ? ret : locked; |
1234 |
++ return 0; |
1235 |
+ } |
1236 |
+ |
1237 |
+ /** |
1238 |
+@@ -2824,7 +2904,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, |
1239 |
+ ktime_t *time, int trylock) |
1240 |
+ { |
1241 |
+ struct hrtimer_sleeper timeout, *to = NULL; |
1242 |
+- struct futex_pi_state *pi_state = NULL; |
1243 |
++ struct task_struct *exiting = NULL; |
1244 |
+ struct rt_mutex_waiter rt_waiter; |
1245 |
+ struct futex_hash_bucket *hb; |
1246 |
+ struct futex_q q = futex_q_init; |
1247 |
+@@ -2852,7 +2932,8 @@ retry: |
1248 |
+ retry_private: |
1249 |
+ hb = queue_lock(&q); |
1250 |
+ |
1251 |
+- ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0); |
1252 |
++ ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, |
1253 |
++ &exiting, 0); |
1254 |
+ if (unlikely(ret)) { |
1255 |
+ /* |
1256 |
+ * Atomic work succeeded and we got the lock, |
1257 |
+@@ -2865,15 +2946,22 @@ retry_private: |
1258 |
+ goto out_unlock_put_key; |
1259 |
+ case -EFAULT: |
1260 |
+ goto uaddr_faulted; |
1261 |
++ case -EBUSY: |
1262 |
+ case -EAGAIN: |
1263 |
+ /* |
1264 |
+ * Two reasons for this: |
1265 |
+- * - Task is exiting and we just wait for the |
1266 |
++ * - EBUSY: Task is exiting and we just wait for the |
1267 |
+ * exit to complete. |
1268 |
+- * - The user space value changed. |
1269 |
++ * - EAGAIN: The user space value changed. |
1270 |
+ */ |
1271 |
+ queue_unlock(hb); |
1272 |
+ put_futex_key(&q.key); |
1273 |
++ /* |
1274 |
++ * Handle the case where the owner is in the middle of |
1275 |
++ * exiting. Wait for the exit to complete otherwise |
1276 |
++ * this task might loop forever, aka. live lock. |
1277 |
++ */ |
1278 |
++ wait_for_owner_exiting(ret, exiting); |
1279 |
+ cond_resched(); |
1280 |
+ goto retry; |
1281 |
+ default: |
1282 |
+@@ -2958,23 +3046,9 @@ no_block: |
1283 |
+ if (res) |
1284 |
+ ret = (res < 0) ? res : 0; |
1285 |
+ |
1286 |
+- /* |
1287 |
+- * If fixup_owner() faulted and was unable to handle the fault, unlock |
1288 |
+- * it and return the fault to userspace. |
1289 |
+- */ |
1290 |
+- if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) { |
1291 |
+- pi_state = q.pi_state; |
1292 |
+- get_pi_state(pi_state); |
1293 |
+- } |
1294 |
+- |
1295 |
+ /* Unqueue and drop the lock */ |
1296 |
+ unqueue_me_pi(&q); |
1297 |
+ |
1298 |
+- if (pi_state) { |
1299 |
+- rt_mutex_futex_unlock(&pi_state->pi_mutex); |
1300 |
+- put_pi_state(pi_state); |
1301 |
+- } |
1302 |
+- |
1303 |
+ goto out_put_key; |
1304 |
+ |
1305 |
+ out_unlock_put_key: |
1306 |
+@@ -3240,7 +3314,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
1307 |
+ u32 __user *uaddr2) |
1308 |
+ { |
1309 |
+ struct hrtimer_sleeper timeout, *to = NULL; |
1310 |
+- struct futex_pi_state *pi_state = NULL; |
1311 |
+ struct rt_mutex_waiter rt_waiter; |
1312 |
+ struct futex_hash_bucket *hb; |
1313 |
+ union futex_key key2 = FUTEX_KEY_INIT; |
1314 |
+@@ -3325,16 +3398,17 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
1315 |
+ if (q.pi_state && (q.pi_state->owner != current)) { |
1316 |
+ spin_lock(q.lock_ptr); |
1317 |
+ ret = fixup_pi_state_owner(uaddr2, &q, current); |
1318 |
+- if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { |
1319 |
+- pi_state = q.pi_state; |
1320 |
+- get_pi_state(pi_state); |
1321 |
+- } |
1322 |
+ /* |
1323 |
+ * Drop the reference to the pi state which |
1324 |
+ * the requeue_pi() code acquired for us. |
1325 |
+ */ |
1326 |
+ put_pi_state(q.pi_state); |
1327 |
+ spin_unlock(q.lock_ptr); |
1328 |
++ /* |
1329 |
++ * Adjust the return value. It's either -EFAULT or |
1330 |
++ * success (1) but the caller expects 0 for success. |
1331 |
++ */ |
1332 |
++ ret = ret < 0 ? ret : 0; |
1333 |
+ } |
1334 |
+ } else { |
1335 |
+ struct rt_mutex *pi_mutex; |
1336 |
+@@ -3365,25 +3439,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
1337 |
+ if (res) |
1338 |
+ ret = (res < 0) ? res : 0; |
1339 |
+ |
1340 |
+- /* |
1341 |
+- * If fixup_pi_state_owner() faulted and was unable to handle |
1342 |
+- * the fault, unlock the rt_mutex and return the fault to |
1343 |
+- * userspace. |
1344 |
+- */ |
1345 |
+- if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { |
1346 |
+- pi_state = q.pi_state; |
1347 |
+- get_pi_state(pi_state); |
1348 |
+- } |
1349 |
+- |
1350 |
+ /* Unqueue and drop the lock. */ |
1351 |
+ unqueue_me_pi(&q); |
1352 |
+ } |
1353 |
+ |
1354 |
+- if (pi_state) { |
1355 |
+- rt_mutex_futex_unlock(&pi_state->pi_mutex); |
1356 |
+- put_pi_state(pi_state); |
1357 |
+- } |
1358 |
+- |
1359 |
+ if (ret == -EINTR) { |
1360 |
+ /* |
1361 |
+ * We've already been requeued, but cannot restart by calling |
1362 |
+@@ -3625,7 +3684,7 @@ static inline int fetch_robust_entry(struct robust_list __user **entry, |
1363 |
+ * |
1364 |
+ * We silently return on any sign of list-walking problem. |
1365 |
+ */ |
1366 |
+-void exit_robust_list(struct task_struct *curr) |
1367 |
++static void exit_robust_list(struct task_struct *curr) |
1368 |
+ { |
1369 |
+ struct robust_list_head __user *head = curr->robust_list; |
1370 |
+ struct robust_list __user *entry, *next_entry, *pending; |
1371 |
+@@ -3690,6 +3749,114 @@ void exit_robust_list(struct task_struct *curr) |
1372 |
+ } |
1373 |
+ } |
1374 |
+ |
1375 |
++static void futex_cleanup(struct task_struct *tsk) |
1376 |
++{ |
1377 |
++ if (unlikely(tsk->robust_list)) { |
1378 |
++ exit_robust_list(tsk); |
1379 |
++ tsk->robust_list = NULL; |
1380 |
++ } |
1381 |
++ |
1382 |
++#ifdef CONFIG_COMPAT |
1383 |
++ if (unlikely(tsk->compat_robust_list)) { |
1384 |
++ compat_exit_robust_list(tsk); |
1385 |
++ tsk->compat_robust_list = NULL; |
1386 |
++ } |
1387 |
++#endif |
1388 |
++ |
1389 |
++ if (unlikely(!list_empty(&tsk->pi_state_list))) |
1390 |
++ exit_pi_state_list(tsk); |
1391 |
++} |
1392 |
++ |
1393 |
++/** |
1394 |
++ * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD |
1395 |
++ * @tsk: task to set the state on |
1396 |
++ * |
1397 |
++ * Set the futex exit state of the task lockless. The futex waiter code |
1398 |
++ * observes that state when a task is exiting and loops until the task has |
1399 |
++ * actually finished the futex cleanup. The worst case for this is that the |
1400 |
++ * waiter runs through the wait loop until the state becomes visible. |
1401 |
++ * |
1402 |
++ * This is called from the recursive fault handling path in do_exit(). |
1403 |
++ * |
1404 |
++ * This is best effort. Either the futex exit code has run already or |
1405 |
++ * not. If the OWNER_DIED bit has been set on the futex then the waiter can |
1406 |
++ * take it over. If not, the problem is pushed back to user space. If the |
1407 |
++ * futex exit code did not run yet, then an already queued waiter might |
1408 |
++ * block forever, but there is nothing which can be done about that. |
1409 |
++ */ |
1410 |
++void futex_exit_recursive(struct task_struct *tsk) |
1411 |
++{ |
1412 |
++ /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */ |
1413 |
++ if (tsk->futex_state == FUTEX_STATE_EXITING) |
1414 |
++ mutex_unlock(&tsk->futex_exit_mutex); |
1415 |
++ tsk->futex_state = FUTEX_STATE_DEAD; |
1416 |
++} |
1417 |
++ |
1418 |
++static void futex_cleanup_begin(struct task_struct *tsk) |
1419 |
++{ |
1420 |
++ /* |
1421 |
++ * Prevent various race issues against a concurrent incoming waiter |
1422 |
++ * including live locks by forcing the waiter to block on |
1423 |
++ * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in |
1424 |
++ * attach_to_pi_owner(). |
1425 |
++ */ |
1426 |
++ mutex_lock(&tsk->futex_exit_mutex); |
1427 |
++ |
1428 |
++ /* |
1429 |
++ * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. |
1430 |
++ * |
1431 |
++ * This ensures that all subsequent checks of tsk->futex_state in |
1432 |
++ * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with |
1433 |
++ * tsk->pi_lock held. |
1434 |
++ * |
1435 |
++ * It guarantees also that a pi_state which was queued right before |
1436 |
++ * the state change under tsk->pi_lock by a concurrent waiter must |
1437 |
++ * be observed in exit_pi_state_list(). |
1438 |
++ */ |
1439 |
++ raw_spin_lock_irq(&tsk->pi_lock); |
1440 |
++ tsk->futex_state = FUTEX_STATE_EXITING; |
1441 |
++ raw_spin_unlock_irq(&tsk->pi_lock); |
1442 |
++} |
1443 |
++ |
1444 |
++static void futex_cleanup_end(struct task_struct *tsk, int state) |
1445 |
++{ |
1446 |
++ /* |
1447 |
++ * Lockless store. The only side effect is that an observer might |
1448 |
++ * take another loop until it becomes visible. |
1449 |
++ */ |
1450 |
++ tsk->futex_state = state; |
1451 |
++ /* |
1452 |
++ * Drop the exit protection. This unblocks waiters which observed |
1453 |
++ * FUTEX_STATE_EXITING to reevaluate the state. |
1454 |
++ */ |
1455 |
++ mutex_unlock(&tsk->futex_exit_mutex); |
1456 |
++} |
1457 |
++ |
1458 |
++void futex_exec_release(struct task_struct *tsk) |
1459 |
++{ |
1460 |
++ /* |
1461 |
++ * The state handling is done for consistency, but in the case of |
1462 |
++ * exec() there is no way to prevent futher damage as the PID stays |
1463 |
++ * the same. But for the unlikely and arguably buggy case that a |
1464 |
++ * futex is held on exec(), this provides at least as much state |
1465 |
++ * consistency protection which is possible. |
1466 |
++ */ |
1467 |
++ futex_cleanup_begin(tsk); |
1468 |
++ futex_cleanup(tsk); |
1469 |
++ /* |
1470 |
++ * Reset the state to FUTEX_STATE_OK. The task is alive and about |
1471 |
++ * exec a new binary. |
1472 |
++ */ |
1473 |
++ futex_cleanup_end(tsk, FUTEX_STATE_OK); |
1474 |
++} |
1475 |
++ |
1476 |
++void futex_exit_release(struct task_struct *tsk) |
1477 |
++{ |
1478 |
++ futex_cleanup_begin(tsk); |
1479 |
++ futex_cleanup(tsk); |
1480 |
++ futex_cleanup_end(tsk, FUTEX_STATE_DEAD); |
1481 |
++} |
1482 |
++ |
1483 |
+ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, |
1484 |
+ u32 __user *uaddr2, u32 val2, u32 val3) |
1485 |
+ { |
1486 |
+@@ -3817,7 +3984,7 @@ static void __user *futex_uaddr(struct robust_list __user *entry, |
1487 |
+ * |
1488 |
+ * We silently return on any sign of list-walking problem. |
1489 |
+ */ |
1490 |
+-void compat_exit_robust_list(struct task_struct *curr) |
1491 |
++static void compat_exit_robust_list(struct task_struct *curr) |
1492 |
+ { |
1493 |
+ struct compat_robust_list_head __user *head = curr->compat_robust_list; |
1494 |
+ struct robust_list __user *entry, *next_entry, *pending; |
1495 |
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c |
1496 |
+index 9562aaa2afdce..a5ec4f68527e5 100644 |
1497 |
+--- a/kernel/locking/rtmutex.c |
1498 |
++++ b/kernel/locking/rtmutex.c |
1499 |
+@@ -1719,8 +1719,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock, |
1500 |
+ * possible because it belongs to the pi_state which is about to be freed |
1501 |
+ * and it is not longer visible to other tasks. |
1502 |
+ */ |
1503 |
+-void rt_mutex_proxy_unlock(struct rt_mutex *lock, |
1504 |
+- struct task_struct *proxy_owner) |
1505 |
++void rt_mutex_proxy_unlock(struct rt_mutex *lock) |
1506 |
+ { |
1507 |
+ debug_rt_mutex_proxy_unlock(lock); |
1508 |
+ rt_mutex_set_owner(lock, NULL); |
1509 |
+diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h |
1510 |
+index d1d62f942be22..ca6fb489007b6 100644 |
1511 |
+--- a/kernel/locking/rtmutex_common.h |
1512 |
++++ b/kernel/locking/rtmutex_common.h |
1513 |
+@@ -133,8 +133,7 @@ enum rtmutex_chainwalk { |
1514 |
+ extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); |
1515 |
+ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, |
1516 |
+ struct task_struct *proxy_owner); |
1517 |
+-extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, |
1518 |
+- struct task_struct *proxy_owner); |
1519 |
++extern void rt_mutex_proxy_unlock(struct rt_mutex *lock); |
1520 |
+ extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); |
1521 |
+ extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, |
1522 |
+ struct rt_mutex_waiter *waiter, |
1523 |
+diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c |
1524 |
+index 87ce9736043da..360129e475407 100644 |
1525 |
+--- a/kernel/trace/ring_buffer.c |
1526 |
++++ b/kernel/trace/ring_buffer.c |
1527 |
+@@ -4393,6 +4393,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) |
1528 |
+ |
1529 |
+ if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
1530 |
+ return; |
1531 |
++ /* prevent another thread from changing buffer sizes */ |
1532 |
++ mutex_lock(&buffer->mutex); |
1533 |
+ |
1534 |
+ atomic_inc(&buffer->resize_disabled); |
1535 |
+ atomic_inc(&cpu_buffer->record_disabled); |
1536 |
+@@ -4416,6 +4418,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) |
1537 |
+ |
1538 |
+ atomic_dec(&cpu_buffer->record_disabled); |
1539 |
+ atomic_dec(&buffer->resize_disabled); |
1540 |
++ |
1541 |
++ mutex_unlock(&buffer->mutex); |
1542 |
+ } |
1543 |
+ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); |
1544 |
+ |
1545 |
+diff --git a/mm/slub.c b/mm/slub.c |
1546 |
+index 02295fa61583c..eac80b0516fe8 100644 |
1547 |
+--- a/mm/slub.c |
1548 |
++++ b/mm/slub.c |
1549 |
+@@ -5766,10 +5766,8 @@ static int sysfs_slab_add(struct kmem_cache *s) |
1550 |
+ |
1551 |
+ s->kobj.kset = kset; |
1552 |
+ err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name); |
1553 |
+- if (err) { |
1554 |
+- kobject_put(&s->kobj); |
1555 |
++ if (err) |
1556 |
+ goto out; |
1557 |
+- } |
1558 |
+ |
1559 |
+ err = sysfs_create_group(&s->kobj, &slab_attr_group); |
1560 |
+ if (err) |
1561 |
+diff --git a/tools/build/Makefile b/tools/build/Makefile |
1562 |
+index 727050c40f096..8a55378e8b7ce 100644 |
1563 |
+--- a/tools/build/Makefile |
1564 |
++++ b/tools/build/Makefile |
1565 |
+@@ -15,10 +15,6 @@ endef |
1566 |
+ $(call allow-override,CC,$(CROSS_COMPILE)gcc) |
1567 |
+ $(call allow-override,LD,$(CROSS_COMPILE)ld) |
1568 |
+ |
1569 |
+-HOSTCC ?= gcc |
1570 |
+-HOSTLD ?= ld |
1571 |
+-HOSTAR ?= ar |
1572 |
+- |
1573 |
+ export HOSTCC HOSTLD HOSTAR |
1574 |
+ |
1575 |
+ ifeq ($(V),1) |
1576 |
+diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile |
1577 |
+index baa92279c137e..15f32f67cf340 100644 |
1578 |
+--- a/tools/objtool/Makefile |
1579 |
++++ b/tools/objtool/Makefile |
1580 |
+@@ -7,15 +7,6 @@ ARCH := x86 |
1581 |
+ endif |
1582 |
+ |
1583 |
+ # always use the host compiler |
1584 |
+-ifneq ($(LLVM),) |
1585 |
+-HOSTAR ?= llvm-ar |
1586 |
+-HOSTCC ?= clang |
1587 |
+-HOSTLD ?= ld.lld |
1588 |
+-else |
1589 |
+-HOSTAR ?= ar |
1590 |
+-HOSTCC ?= gcc |
1591 |
+-HOSTLD ?= ld |
1592 |
+-endif |
1593 |
+ AR = $(HOSTAR) |
1594 |
+ CC = $(HOSTCC) |
1595 |
+ LD = $(HOSTLD) |
1596 |
+diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf |
1597 |
+index 0be4116953790..678aa7feb84d0 100644 |
1598 |
+--- a/tools/perf/Makefile.perf |
1599 |
++++ b/tools/perf/Makefile.perf |
1600 |
+@@ -148,10 +148,6 @@ endef |
1601 |
+ |
1602 |
+ LD += $(EXTRA_LDFLAGS) |
1603 |
+ |
1604 |
+-HOSTCC ?= gcc |
1605 |
+-HOSTLD ?= ld |
1606 |
+-HOSTAR ?= ar |
1607 |
+- |
1608 |
+ PKG_CONFIG = $(CROSS_COMPILE)pkg-config |
1609 |
+ LLVM_CONFIG ?= llvm-config |
1610 |
+ |
1611 |
+diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config |
1612 |
+index fc116c060b98d..32ff7baf39df4 100644 |
1613 |
+--- a/tools/power/acpi/Makefile.config |
1614 |
++++ b/tools/power/acpi/Makefile.config |
1615 |
+@@ -57,7 +57,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM} |
1616 |
+ CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- |
1617 |
+ CROSS_COMPILE ?= $(CROSS) |
1618 |
+ LD = $(CC) |
1619 |
+-HOSTCC = gcc |
1620 |
+ |
1621 |
+ # check if compiler option is supported |
1622 |
+ cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;} |
1623 |
+diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include |
1624 |
+index 8fc6b1ca47dca..42dbe05b18077 100644 |
1625 |
+--- a/tools/scripts/Makefile.include |
1626 |
++++ b/tools/scripts/Makefile.include |
1627 |
+@@ -60,6 +60,16 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld) |
1628 |
+ $(call allow-override,CXX,$(CROSS_COMPILE)g++) |
1629 |
+ $(call allow-override,STRIP,$(CROSS_COMPILE)strip) |
1630 |
+ |
1631 |
++ifneq ($(LLVM),) |
1632 |
++HOSTAR ?= llvm-ar |
1633 |
++HOSTCC ?= clang |
1634 |
++HOSTLD ?= ld.lld |
1635 |
++else |
1636 |
++HOSTAR ?= ar |
1637 |
++HOSTCC ?= gcc |
1638 |
++HOSTLD ?= ld |
1639 |
++endif |
1640 |
++ |
1641 |
+ ifeq ($(CC_NO_CLANG), 1) |
1642 |
+ EXTRA_WARNINGS += -Wstrict-aliasing=3 |
1643 |
+ endif |