1 |
commit: 9b355777092f37c5c68d40d8401b5bdbf48b3a11 |
2 |
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
3 |
AuthorDate: Sat Jun 25 19:45:57 2022 +0000 |
4 |
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
5 |
CommitDate: Sat Jun 25 19:45:57 2022 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=9b355777 |
7 |
|
8 |
Linux patch 5.4.201 |
9 |
|
10 |
Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org> |
11 |
|
12 |
0000_README | 4 + |
13 |
1200_linux-5.4.201.patch | 431 +++++++++++++++++++++++++++++++++++++++++++++++ |
14 |
2 files changed, 435 insertions(+) |
15 |
|
16 |
diff --git a/0000_README b/0000_README |
17 |
index c037210c..bbfa7b3c 100644 |
18 |
--- a/0000_README |
19 |
+++ b/0000_README |
20 |
@@ -843,6 +843,10 @@ Patch: 1199_linux-5.4.200.patch |
21 |
From: http://www.kernel.org |
22 |
Desc: Linux 5.4.200 |
23 |
|
24 |
+Patch: 1200_linux-5.4.201.patch |
25 |
+From: http://www.kernel.org |
26 |
+Desc: Linux 5.4.201 |
27 |
+ |
28 |
Patch: 1500_XATTR_USER_PREFIX.patch |
29 |
From: https://bugs.gentoo.org/show_bug.cgi?id=470644 |
30 |
Desc: Support for namespace user.pax.* on tmpfs. |
31 |
|
32 |
diff --git a/1200_linux-5.4.201.patch b/1200_linux-5.4.201.patch |
33 |
new file mode 100644 |
34 |
index 00000000..43053d56 |
35 |
--- /dev/null |
36 |
+++ b/1200_linux-5.4.201.patch |
37 |
@@ -0,0 +1,431 @@ |
38 |
+diff --git a/Documentation/hwmon/hwmon-kernel-api.rst b/Documentation/hwmon/hwmon-kernel-api.rst |
39 |
+index 23f27fe78e379..c41eb61081036 100644 |
40 |
+--- a/Documentation/hwmon/hwmon-kernel-api.rst |
41 |
++++ b/Documentation/hwmon/hwmon-kernel-api.rst |
42 |
+@@ -72,7 +72,7 @@ hwmon_device_register_with_info is the most comprehensive and preferred means |
43 |
+ to register a hardware monitoring device. It creates the standard sysfs |
44 |
+ attributes in the hardware monitoring core, letting the driver focus on reading |
45 |
+ from and writing to the chip instead of having to bother with sysfs attributes. |
46 |
+-The parent device parameter as well as the chip parameter must not be NULL. Its |
47 |
++The parent device parameter cannot be NULL with non-NULL chip info. Its |
48 |
+ parameters are described in more detail below. |
49 |
+ |
50 |
+ devm_hwmon_device_register_with_info is similar to |
51 |
+diff --git a/Makefile b/Makefile |
52 |
+index 32da9117e9d76..75be5870cc55f 100644 |
53 |
+--- a/Makefile |
54 |
++++ b/Makefile |
55 |
+@@ -1,7 +1,7 @@ |
56 |
+ # SPDX-License-Identifier: GPL-2.0 |
57 |
+ VERSION = 5 |
58 |
+ PATCHLEVEL = 4 |
59 |
+-SUBLEVEL = 200 |
60 |
++SUBLEVEL = 201 |
61 |
+ EXTRAVERSION = |
62 |
+ NAME = Kleptomaniac Octopus |
63 |
+ |
64 |
+diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S |
65 |
+index db767b072601e..7b054c67acd81 100644 |
66 |
+--- a/arch/arm64/mm/cache.S |
67 |
++++ b/arch/arm64/mm/cache.S |
68 |
+@@ -228,8 +228,6 @@ ENDPIPROC(__dma_flush_area) |
69 |
+ * - dir - DMA direction |
70 |
+ */ |
71 |
+ ENTRY(__dma_map_area) |
72 |
+- cmp w2, #DMA_FROM_DEVICE |
73 |
+- b.eq __dma_inv_area |
74 |
+ b __dma_clean_area |
75 |
+ ENDPIPROC(__dma_map_area) |
76 |
+ |
77 |
+diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c |
78 |
+index 4438c00acb656..28ca07360e970 100644 |
79 |
+--- a/arch/s390/mm/pgtable.c |
80 |
++++ b/arch/s390/mm/pgtable.c |
81 |
+@@ -716,7 +716,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
82 |
+ pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; |
83 |
+ ptev = pte_val(*ptep); |
84 |
+ if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) |
85 |
+- page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); |
86 |
++ page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0); |
87 |
+ pgste_set_unlock(ptep, pgste); |
88 |
+ preempt_enable(); |
89 |
+ } |
90 |
+diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c |
91 |
+index c73b93b9bb87d..a2175394cd253 100644 |
92 |
+--- a/drivers/hwmon/hwmon.c |
93 |
++++ b/drivers/hwmon/hwmon.c |
94 |
+@@ -715,12 +715,11 @@ EXPORT_SYMBOL_GPL(hwmon_device_register_with_groups); |
95 |
+ |
96 |
+ /** |
97 |
+ * hwmon_device_register_with_info - register w/ hwmon |
98 |
+- * @dev: the parent device (mandatory) |
99 |
+- * @name: hwmon name attribute (mandatory) |
100 |
+- * @drvdata: driver data to attach to created device (optional) |
101 |
+- * @chip: pointer to hwmon chip information (mandatory) |
102 |
++ * @dev: the parent device |
103 |
++ * @name: hwmon name attribute |
104 |
++ * @drvdata: driver data to attach to created device |
105 |
++ * @chip: pointer to hwmon chip information |
106 |
+ * @extra_groups: pointer to list of additional non-standard attribute groups |
107 |
+- * (optional) |
108 |
+ * |
109 |
+ * hwmon_device_unregister() must be called when the device is no |
110 |
+ * longer needed. |
111 |
+@@ -733,10 +732,13 @@ hwmon_device_register_with_info(struct device *dev, const char *name, |
112 |
+ const struct hwmon_chip_info *chip, |
113 |
+ const struct attribute_group **extra_groups) |
114 |
+ { |
115 |
+- if (!dev || !name || !chip) |
116 |
++ if (!name) |
117 |
++ return ERR_PTR(-EINVAL); |
118 |
++ |
119 |
++ if (chip && (!chip->ops || !chip->ops->is_visible || !chip->info)) |
120 |
+ return ERR_PTR(-EINVAL); |
121 |
+ |
122 |
+- if (!chip->ops || !chip->ops->is_visible || !chip->info) |
123 |
++ if (chip && !dev) |
124 |
+ return ERR_PTR(-EINVAL); |
125 |
+ |
126 |
+ return __hwmon_device_register(dev, name, drvdata, chip, extra_groups); |
127 |
+diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c |
128 |
+index 06b382304d926..81bc36a43b32d 100644 |
129 |
+--- a/drivers/md/dm-table.c |
130 |
++++ b/drivers/md/dm-table.c |
131 |
+@@ -872,8 +872,7 @@ EXPORT_SYMBOL(dm_consume_args); |
132 |
+ static bool __table_type_bio_based(enum dm_queue_mode table_type) |
133 |
+ { |
134 |
+ return (table_type == DM_TYPE_BIO_BASED || |
135 |
+- table_type == DM_TYPE_DAX_BIO_BASED || |
136 |
+- table_type == DM_TYPE_NVME_BIO_BASED); |
137 |
++ table_type == DM_TYPE_DAX_BIO_BASED); |
138 |
+ } |
139 |
+ |
140 |
+ static bool __table_type_request_based(enum dm_queue_mode table_type) |
141 |
+@@ -929,8 +928,6 @@ bool dm_table_supports_dax(struct dm_table *t, |
142 |
+ return true; |
143 |
+ } |
144 |
+ |
145 |
+-static bool dm_table_does_not_support_partial_completion(struct dm_table *t); |
146 |
+- |
147 |
+ static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev, |
148 |
+ sector_t start, sector_t len, void *data) |
149 |
+ { |
150 |
+@@ -960,7 +957,6 @@ static int dm_table_determine_type(struct dm_table *t) |
151 |
+ goto verify_bio_based; |
152 |
+ } |
153 |
+ BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED); |
154 |
+- BUG_ON(t->type == DM_TYPE_NVME_BIO_BASED); |
155 |
+ goto verify_rq_based; |
156 |
+ } |
157 |
+ |
158 |
+@@ -999,15 +995,6 @@ verify_bio_based: |
159 |
+ if (dm_table_supports_dax(t, device_not_dax_capable, &page_size) || |
160 |
+ (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) { |
161 |
+ t->type = DM_TYPE_DAX_BIO_BASED; |
162 |
+- } else { |
163 |
+- /* Check if upgrading to NVMe bio-based is valid or required */ |
164 |
+- tgt = dm_table_get_immutable_target(t); |
165 |
+- if (tgt && !tgt->max_io_len && dm_table_does_not_support_partial_completion(t)) { |
166 |
+- t->type = DM_TYPE_NVME_BIO_BASED; |
167 |
+- goto verify_rq_based; /* must be stacked directly on NVMe (blk-mq) */ |
168 |
+- } else if (list_empty(devices) && live_md_type == DM_TYPE_NVME_BIO_BASED) { |
169 |
+- t->type = DM_TYPE_NVME_BIO_BASED; |
170 |
+- } |
171 |
+ } |
172 |
+ return 0; |
173 |
+ } |
174 |
+@@ -1024,8 +1011,7 @@ verify_rq_based: |
175 |
+ * (e.g. request completion process for partial completion.) |
176 |
+ */ |
177 |
+ if (t->num_targets > 1) { |
178 |
+- DMERR("%s DM doesn't support multiple targets", |
179 |
+- t->type == DM_TYPE_NVME_BIO_BASED ? "nvme bio-based" : "request-based"); |
180 |
++ DMERR("request-based DM doesn't support multiple targets"); |
181 |
+ return -EINVAL; |
182 |
+ } |
183 |
+ |
184 |
+@@ -1714,20 +1700,6 @@ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, |
185 |
+ return q && !blk_queue_add_random(q); |
186 |
+ } |
187 |
+ |
188 |
+-static int device_is_partial_completion(struct dm_target *ti, struct dm_dev *dev, |
189 |
+- sector_t start, sector_t len, void *data) |
190 |
+-{ |
191 |
+- char b[BDEVNAME_SIZE]; |
192 |
+- |
193 |
+- /* For now, NVMe devices are the only devices of this class */ |
194 |
+- return (strncmp(bdevname(dev->bdev, b), "nvme", 4) != 0); |
195 |
+-} |
196 |
+- |
197 |
+-static bool dm_table_does_not_support_partial_completion(struct dm_table *t) |
198 |
+-{ |
199 |
+- return !dm_table_any_dev_attr(t, device_is_partial_completion, NULL); |
200 |
+-} |
201 |
+- |
202 |
+ static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev, |
203 |
+ sector_t start, sector_t len, void *data) |
204 |
+ { |
205 |
+diff --git a/drivers/md/dm.c b/drivers/md/dm.c |
206 |
+index 37b8bb4d80f0f..77e28f77c59f4 100644 |
207 |
+--- a/drivers/md/dm.c |
208 |
++++ b/drivers/md/dm.c |
209 |
+@@ -1000,7 +1000,7 @@ static void clone_endio(struct bio *bio) |
210 |
+ struct mapped_device *md = tio->io->md; |
211 |
+ dm_endio_fn endio = tio->ti->type->end_io; |
212 |
+ |
213 |
+- if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) { |
214 |
++ if (unlikely(error == BLK_STS_TARGET)) { |
215 |
+ if (bio_op(bio) == REQ_OP_DISCARD && |
216 |
+ !bio->bi_disk->queue->limits.max_discard_sectors) |
217 |
+ disable_discard(md); |
218 |
+@@ -1325,7 +1325,6 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) |
219 |
+ sector = clone->bi_iter.bi_sector; |
220 |
+ |
221 |
+ if (unlikely(swap_bios_limit(ti, clone))) { |
222 |
+- struct mapped_device *md = io->md; |
223 |
+ int latch = get_swap_bios(); |
224 |
+ if (unlikely(latch != md->swap_bios)) |
225 |
+ __set_swap_bios_limit(md, latch); |
226 |
+@@ -1340,24 +1339,17 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) |
227 |
+ /* the bio has been remapped so dispatch it */ |
228 |
+ trace_block_bio_remap(clone->bi_disk->queue, clone, |
229 |
+ bio_dev(io->orig_bio), sector); |
230 |
+- if (md->type == DM_TYPE_NVME_BIO_BASED) |
231 |
+- ret = direct_make_request(clone); |
232 |
+- else |
233 |
+- ret = generic_make_request(clone); |
234 |
++ ret = generic_make_request(clone); |
235 |
+ break; |
236 |
+ case DM_MAPIO_KILL: |
237 |
+- if (unlikely(swap_bios_limit(ti, clone))) { |
238 |
+- struct mapped_device *md = io->md; |
239 |
++ if (unlikely(swap_bios_limit(ti, clone))) |
240 |
+ up(&md->swap_bios_semaphore); |
241 |
+- } |
242 |
+ free_tio(tio); |
243 |
+ dec_pending(io, BLK_STS_IOERR); |
244 |
+ break; |
245 |
+ case DM_MAPIO_REQUEUE: |
246 |
+- if (unlikely(swap_bios_limit(ti, clone))) { |
247 |
+- struct mapped_device *md = io->md; |
248 |
++ if (unlikely(swap_bios_limit(ti, clone))) |
249 |
+ up(&md->swap_bios_semaphore); |
250 |
+- } |
251 |
+ free_tio(tio); |
252 |
+ dec_pending(io, BLK_STS_DM_REQUEUE); |
253 |
+ break; |
254 |
+@@ -1732,51 +1724,6 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, |
255 |
+ return ret; |
256 |
+ } |
257 |
+ |
258 |
+-/* |
259 |
+- * Optimized variant of __split_and_process_bio that leverages the |
260 |
+- * fact that targets that use it do _not_ have a need to split bios. |
261 |
+- */ |
262 |
+-static blk_qc_t __process_bio(struct mapped_device *md, struct dm_table *map, |
263 |
+- struct bio *bio, struct dm_target *ti) |
264 |
+-{ |
265 |
+- struct clone_info ci; |
266 |
+- blk_qc_t ret = BLK_QC_T_NONE; |
267 |
+- int error = 0; |
268 |
+- |
269 |
+- init_clone_info(&ci, md, map, bio); |
270 |
+- |
271 |
+- if (bio->bi_opf & REQ_PREFLUSH) { |
272 |
+- struct bio flush_bio; |
273 |
+- |
274 |
+- /* |
275 |
+- * Use an on-stack bio for this, it's safe since we don't |
276 |
+- * need to reference it after submit. It's just used as |
277 |
+- * the basis for the clone(s). |
278 |
+- */ |
279 |
+- bio_init(&flush_bio, NULL, 0); |
280 |
+- flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; |
281 |
+- ci.bio = &flush_bio; |
282 |
+- ci.sector_count = 0; |
283 |
+- error = __send_empty_flush(&ci); |
284 |
+- bio_uninit(ci.bio); |
285 |
+- /* dec_pending submits any data associated with flush */ |
286 |
+- } else { |
287 |
+- struct dm_target_io *tio; |
288 |
+- |
289 |
+- ci.bio = bio; |
290 |
+- ci.sector_count = bio_sectors(bio); |
291 |
+- if (__process_abnormal_io(&ci, ti, &error)) |
292 |
+- goto out; |
293 |
+- |
294 |
+- tio = alloc_tio(&ci, ti, 0, GFP_NOIO); |
295 |
+- ret = __clone_and_map_simple_bio(&ci, tio, NULL); |
296 |
+- } |
297 |
+-out: |
298 |
+- /* drop the extra reference count */ |
299 |
+- dec_pending(ci.io, errno_to_blk_status(error)); |
300 |
+- return ret; |
301 |
+-} |
302 |
+- |
303 |
+ static blk_qc_t dm_process_bio(struct mapped_device *md, |
304 |
+ struct dm_table *map, struct bio *bio) |
305 |
+ { |
306 |
+@@ -1807,8 +1754,6 @@ static blk_qc_t dm_process_bio(struct mapped_device *md, |
307 |
+ /* regular IO is split by __split_and_process_bio */ |
308 |
+ } |
309 |
+ |
310 |
+- if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED) |
311 |
+- return __process_bio(md, map, bio, ti); |
312 |
+ return __split_and_process_bio(md, map, bio); |
313 |
+ } |
314 |
+ |
315 |
+@@ -2200,12 +2145,10 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, |
316 |
+ if (request_based) |
317 |
+ dm_stop_queue(q); |
318 |
+ |
319 |
+- if (request_based || md->type == DM_TYPE_NVME_BIO_BASED) { |
320 |
++ if (request_based) { |
321 |
+ /* |
322 |
+- * Leverage the fact that request-based DM targets and |
323 |
+- * NVMe bio based targets are immutable singletons |
324 |
+- * - used to optimize both dm_request_fn and dm_mq_queue_rq; |
325 |
+- * and __process_bio. |
326 |
++ * Leverage the fact that request-based DM targets are |
327 |
++ * immutable singletons - used to optimize dm_mq_queue_rq. |
328 |
+ */ |
329 |
+ md->immutable_target = dm_table_get_immutable_target(t); |
330 |
+ } |
331 |
+@@ -2334,7 +2277,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) |
332 |
+ break; |
333 |
+ case DM_TYPE_BIO_BASED: |
334 |
+ case DM_TYPE_DAX_BIO_BASED: |
335 |
+- case DM_TYPE_NVME_BIO_BASED: |
336 |
+ dm_init_congested_fn(md); |
337 |
+ break; |
338 |
+ case DM_TYPE_NONE: |
339 |
+@@ -3070,7 +3012,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu |
340 |
+ switch (type) { |
341 |
+ case DM_TYPE_BIO_BASED: |
342 |
+ case DM_TYPE_DAX_BIO_BASED: |
343 |
+- case DM_TYPE_NVME_BIO_BASED: |
344 |
+ pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size); |
345 |
+ front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); |
346 |
+ io_front_pad = roundup(front_pad, __alignof__(struct dm_io)) + offsetof(struct dm_io, tio); |
347 |
+diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c |
348 |
+index 271bd08f4a255..3f053b11e2cee 100644 |
349 |
+--- a/drivers/usb/gadget/function/u_ether.c |
350 |
++++ b/drivers/usb/gadget/function/u_ether.c |
351 |
+@@ -772,9 +772,13 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, |
352 |
+ dev->qmult = qmult; |
353 |
+ snprintf(net->name, sizeof(net->name), "%s%%d", netname); |
354 |
+ |
355 |
+- if (get_ether_addr(dev_addr, net->dev_addr)) |
356 |
++ if (get_ether_addr(dev_addr, net->dev_addr)) { |
357 |
++ net->addr_assign_type = NET_ADDR_RANDOM; |
358 |
+ dev_warn(&g->dev, |
359 |
+ "using random %s ethernet address\n", "self"); |
360 |
++ } else { |
361 |
++ net->addr_assign_type = NET_ADDR_SET; |
362 |
++ } |
363 |
+ if (get_ether_addr(host_addr, dev->host_mac)) |
364 |
+ dev_warn(&g->dev, |
365 |
+ "using random %s ethernet address\n", "host"); |
366 |
+@@ -831,6 +835,9 @@ struct net_device *gether_setup_name_default(const char *netname) |
367 |
+ INIT_LIST_HEAD(&dev->tx_reqs); |
368 |
+ INIT_LIST_HEAD(&dev->rx_reqs); |
369 |
+ |
370 |
++ /* by default we always have a random MAC address */ |
371 |
++ net->addr_assign_type = NET_ADDR_RANDOM; |
372 |
++ |
373 |
+ skb_queue_head_init(&dev->rx_frames); |
374 |
+ |
375 |
+ /* network device setup */ |
376 |
+@@ -868,7 +875,6 @@ int gether_register_netdev(struct net_device *net) |
377 |
+ g = dev->gadget; |
378 |
+ |
379 |
+ memcpy(net->dev_addr, dev->dev_mac, ETH_ALEN); |
380 |
+- net->addr_assign_type = NET_ADDR_RANDOM; |
381 |
+ |
382 |
+ status = register_netdev(net); |
383 |
+ if (status < 0) { |
384 |
+@@ -908,6 +914,7 @@ int gether_set_dev_addr(struct net_device *net, const char *dev_addr) |
385 |
+ if (get_ether_addr(dev_addr, new_addr)) |
386 |
+ return -EINVAL; |
387 |
+ memcpy(dev->dev_mac, new_addr, ETH_ALEN); |
388 |
++ net->addr_assign_type = NET_ADDR_SET; |
389 |
+ return 0; |
390 |
+ } |
391 |
+ EXPORT_SYMBOL_GPL(gether_set_dev_addr); |
392 |
+diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h |
393 |
+index a53d7d2c2d95c..60631f3abddbd 100644 |
394 |
+--- a/include/linux/device-mapper.h |
395 |
++++ b/include/linux/device-mapper.h |
396 |
+@@ -28,7 +28,6 @@ enum dm_queue_mode { |
397 |
+ DM_TYPE_BIO_BASED = 1, |
398 |
+ DM_TYPE_REQUEST_BASED = 2, |
399 |
+ DM_TYPE_DAX_BIO_BASED = 3, |
400 |
+- DM_TYPE_NVME_BIO_BASED = 4, |
401 |
+ }; |
402 |
+ |
403 |
+ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; |
404 |
+diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c |
405 |
+index 959f4f0c85460..d9bee15e36a50 100644 |
406 |
+--- a/net/ipv4/inet_hashtables.c |
407 |
++++ b/net/ipv4/inet_hashtables.c |
408 |
+@@ -675,12 +675,14 @@ EXPORT_SYMBOL_GPL(inet_unhash); |
409 |
+ * Note that we use 32bit integers (vs RFC 'short integers') |
410 |
+ * because 2^16 is not a multiple of num_ephemeral and this |
411 |
+ * property might be used by clever attacker. |
412 |
+- * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, |
413 |
+- * we use 256 instead to really give more isolation and |
414 |
+- * privacy, this only consumes 1 KB of kernel memory. |
415 |
++ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though |
416 |
++ * attacks were since demonstrated, thus we use 65536 instead to really |
417 |
++ * give more isolation and privacy, at the expense of 256kB of kernel |
418 |
++ * memory. |
419 |
+ */ |
420 |
+-#define INET_TABLE_PERTURB_SHIFT 8 |
421 |
+-static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT]; |
422 |
++#define INET_TABLE_PERTURB_SHIFT 16 |
423 |
++#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT) |
424 |
++static u32 *table_perturb; |
425 |
+ |
426 |
+ int __inet_hash_connect(struct inet_timewait_death_row *death_row, |
427 |
+ struct sock *sk, u64 port_offset, |
428 |
+@@ -723,10 +725,11 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, |
429 |
+ if (likely(remaining > 1)) |
430 |
+ remaining &= ~1U; |
431 |
+ |
432 |
+- net_get_random_once(table_perturb, sizeof(table_perturb)); |
433 |
+- index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); |
434 |
++ net_get_random_once(table_perturb, |
435 |
++ INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); |
436 |
++ index = port_offset & (INET_TABLE_PERTURB_SIZE - 1); |
437 |
+ |
438 |
+- offset = READ_ONCE(table_perturb[index]) + port_offset; |
439 |
++ offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); |
440 |
+ offset %= remaining; |
441 |
+ |
442 |
+ /* In first pass we try ports of @low parity. |
443 |
+@@ -782,6 +785,12 @@ next_port: |
444 |
+ return -EADDRNOTAVAIL; |
445 |
+ |
446 |
+ ok: |
447 |
++ /* Here we want to add a little bit of randomness to the next source |
448 |
++ * port that will be chosen. We use a max() with a random here so that |
449 |
++ * on low contention the randomness is maximal and on high contention |
450 |
++ * it may be inexistent. |
451 |
++ */ |
452 |
++ i = max_t(int, i, (prandom_u32() & 7) * 2); |
453 |
+ WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); |
454 |
+ |
455 |
+ /* Head lock still held and bh's disabled */ |
456 |
+@@ -855,6 +864,12 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, |
457 |
+ low_limit, |
458 |
+ high_limit); |
459 |
+ init_hashinfo_lhash2(h); |
460 |
++ |
461 |
++ /* this one is used for source ports of outgoing connections */ |
462 |
++ table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE, |
463 |
++ sizeof(*table_perturb), GFP_KERNEL); |
464 |
++ if (!table_perturb) |
465 |
++ panic("TCP: failed to alloc table_perturb"); |
466 |
+ } |
467 |
+ |
468 |
+ int inet_hashinfo2_init_mod(struct inet_hashinfo *h) |