1 |
commit: 32d8aab8c0070a58fbb2a4f1d9cda28915ec17c2 |
2 |
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
3 |
AuthorDate: Fri Feb 8 15:20:49 2019 +0000 |
4 |
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> |
5 |
CommitDate: Fri Feb 8 15:20:49 2019 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=32d8aab8 |
7 |
|
8 |
proj/linux-patches: Linux patch 4.4.174 |
9 |
|
10 |
Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org> |
11 |
|
12 |
0000_README | 4 + |
13 |
1173_linux-4.4.174.patch | 3075 ++++++++++++++++++++++++++++++++++++++++++++++ |
14 |
2 files changed, 3079 insertions(+) |
15 |
|
16 |
diff --git a/0000_README b/0000_README |
17 |
index b00cafe..e836b73 100644 |
18 |
--- a/0000_README |
19 |
+++ b/0000_README |
20 |
@@ -735,6 +735,10 @@ Patch: 1172_linux-4.4.173.patch |
21 |
From: http://www.kernel.org |
22 |
Desc: Linux 4.4.173 |
23 |
|
24 |
+Patch: 1173_linux-4.4.174.patch |
25 |
+From: http://www.kernel.org |
26 |
+Desc: Linux 4.4.174 |
27 |
+ |
28 |
Patch: 1500_XATTR_USER_PREFIX.patch |
29 |
From: https://bugs.gentoo.org/show_bug.cgi?id=470644 |
30 |
Desc: Support for namespace user.pax.* on tmpfs. |
31 |
|
32 |
diff --git a/1173_linux-4.4.174.patch b/1173_linux-4.4.174.patch |
33 |
new file mode 100644 |
34 |
index 0000000..3060cab |
35 |
--- /dev/null |
36 |
+++ b/1173_linux-4.4.174.patch |
37 |
@@ -0,0 +1,3075 @@ |
38 |
+diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt |
39 |
+index 2ea4c45cf1c8..7c229f59016f 100644 |
40 |
+--- a/Documentation/networking/ip-sysctl.txt |
41 |
++++ b/Documentation/networking/ip-sysctl.txt |
42 |
+@@ -112,14 +112,11 @@ min_adv_mss - INTEGER |
43 |
+ |
44 |
+ IP Fragmentation: |
45 |
+ |
46 |
+-ipfrag_high_thresh - INTEGER |
47 |
+- Maximum memory used to reassemble IP fragments. When |
48 |
+- ipfrag_high_thresh bytes of memory is allocated for this purpose, |
49 |
+- the fragment handler will toss packets until ipfrag_low_thresh |
50 |
+- is reached. This also serves as a maximum limit to namespaces |
51 |
+- different from the initial one. |
52 |
+- |
53 |
+-ipfrag_low_thresh - INTEGER |
54 |
++ipfrag_high_thresh - LONG INTEGER |
55 |
++ Maximum memory used to reassemble IP fragments. |
56 |
++ |
57 |
++ipfrag_low_thresh - LONG INTEGER |
58 |
++ (Obsolete since linux-4.17) |
59 |
+ Maximum memory used to reassemble IP fragments before the kernel |
60 |
+ begins to remove incomplete fragment queues to free up resources. |
61 |
+ The kernel still accepts new fragments for defragmentation. |
62 |
+diff --git a/Makefile b/Makefile |
63 |
+index db7665e32da8..1fa281069379 100644 |
64 |
+--- a/Makefile |
65 |
++++ b/Makefile |
66 |
+@@ -1,6 +1,6 @@ |
67 |
+ VERSION = 4 |
68 |
+ PATCHLEVEL = 4 |
69 |
+-SUBLEVEL = 173 |
70 |
++SUBLEVEL = 174 |
71 |
+ EXTRAVERSION = |
72 |
+ NAME = Blurry Fish Butt |
73 |
+ |
74 |
+diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h |
75 |
+index e50b31d18462..e97cdfd6cba9 100644 |
76 |
+--- a/include/linux/rhashtable.h |
77 |
++++ b/include/linux/rhashtable.h |
78 |
+@@ -133,23 +133,23 @@ struct rhashtable_params { |
79 |
+ /** |
80 |
+ * struct rhashtable - Hash table handle |
81 |
+ * @tbl: Bucket table |
82 |
+- * @nelems: Number of elements in table |
83 |
+ * @key_len: Key length for hashfn |
84 |
+ * @elasticity: Maximum chain length before rehash |
85 |
+ * @p: Configuration parameters |
86 |
+ * @run_work: Deferred worker to expand/shrink asynchronously |
87 |
+ * @mutex: Mutex to protect current/future table swapping |
88 |
+ * @lock: Spin lock to protect walker list |
89 |
++ * @nelems: Number of elements in table |
90 |
+ */ |
91 |
+ struct rhashtable { |
92 |
+ struct bucket_table __rcu *tbl; |
93 |
+- atomic_t nelems; |
94 |
+ unsigned int key_len; |
95 |
+ unsigned int elasticity; |
96 |
+ struct rhashtable_params p; |
97 |
+ struct work_struct run_work; |
98 |
+ struct mutex mutex; |
99 |
+ spinlock_t lock; |
100 |
++ atomic_t nelems; |
101 |
+ }; |
102 |
+ |
103 |
+ /** |
104 |
+@@ -343,7 +343,8 @@ int rhashtable_init(struct rhashtable *ht, |
105 |
+ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, |
106 |
+ const void *key, |
107 |
+ struct rhash_head *obj, |
108 |
+- struct bucket_table *old_tbl); |
109 |
++ struct bucket_table *old_tbl, |
110 |
++ void **data); |
111 |
+ int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); |
112 |
+ |
113 |
+ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); |
114 |
+@@ -514,18 +515,8 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, |
115 |
+ return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len); |
116 |
+ } |
117 |
+ |
118 |
+-/** |
119 |
+- * rhashtable_lookup_fast - search hash table, inlined version |
120 |
+- * @ht: hash table |
121 |
+- * @key: the pointer to the key |
122 |
+- * @params: hash table parameters |
123 |
+- * |
124 |
+- * Computes the hash value for the key and traverses the bucket chain looking |
125 |
+- * for a entry with an identical key. The first matching entry is returned. |
126 |
+- * |
127 |
+- * Returns the first entry on which the compare function returned true. |
128 |
+- */ |
129 |
+-static inline void *rhashtable_lookup_fast( |
130 |
++/* Internal function, do not use. */ |
131 |
++static inline struct rhash_head *__rhashtable_lookup( |
132 |
+ struct rhashtable *ht, const void *key, |
133 |
+ const struct rhashtable_params params) |
134 |
+ { |
135 |
+@@ -537,8 +528,6 @@ static inline void *rhashtable_lookup_fast( |
136 |
+ struct rhash_head *he; |
137 |
+ unsigned int hash; |
138 |
+ |
139 |
+- rcu_read_lock(); |
140 |
+- |
141 |
+ tbl = rht_dereference_rcu(ht->tbl, ht); |
142 |
+ restart: |
143 |
+ hash = rht_key_hashfn(ht, tbl, key, params); |
144 |
+@@ -547,8 +536,7 @@ restart: |
145 |
+ params.obj_cmpfn(&arg, rht_obj(ht, he)) : |
146 |
+ rhashtable_compare(&arg, rht_obj(ht, he))) |
147 |
+ continue; |
148 |
+- rcu_read_unlock(); |
149 |
+- return rht_obj(ht, he); |
150 |
++ return he; |
151 |
+ } |
152 |
+ |
153 |
+ /* Ensure we see any new tables. */ |
154 |
+@@ -557,13 +545,64 @@ restart: |
155 |
+ tbl = rht_dereference_rcu(tbl->future_tbl, ht); |
156 |
+ if (unlikely(tbl)) |
157 |
+ goto restart; |
158 |
+- rcu_read_unlock(); |
159 |
+ |
160 |
+ return NULL; |
161 |
+ } |
162 |
+ |
163 |
+-/* Internal function, please use rhashtable_insert_fast() instead */ |
164 |
+-static inline int __rhashtable_insert_fast( |
165 |
++/** |
166 |
++ * rhashtable_lookup - search hash table |
167 |
++ * @ht: hash table |
168 |
++ * @key: the pointer to the key |
169 |
++ * @params: hash table parameters |
170 |
++ * |
171 |
++ * Computes the hash value for the key and traverses the bucket chain looking |
172 |
++ * for a entry with an identical key. The first matching entry is returned. |
173 |
++ * |
174 |
++ * This must only be called under the RCU read lock. |
175 |
++ * |
176 |
++ * Returns the first entry on which the compare function returned true. |
177 |
++ */ |
178 |
++static inline void *rhashtable_lookup( |
179 |
++ struct rhashtable *ht, const void *key, |
180 |
++ const struct rhashtable_params params) |
181 |
++{ |
182 |
++ struct rhash_head *he = __rhashtable_lookup(ht, key, params); |
183 |
++ |
184 |
++ return he ? rht_obj(ht, he) : NULL; |
185 |
++} |
186 |
++ |
187 |
++/** |
188 |
++ * rhashtable_lookup_fast - search hash table, without RCU read lock |
189 |
++ * @ht: hash table |
190 |
++ * @key: the pointer to the key |
191 |
++ * @params: hash table parameters |
192 |
++ * |
193 |
++ * Computes the hash value for the key and traverses the bucket chain looking |
194 |
++ * for a entry with an identical key. The first matching entry is returned. |
195 |
++ * |
196 |
++ * Only use this function when you have other mechanisms guaranteeing |
197 |
++ * that the object won't go away after the RCU read lock is released. |
198 |
++ * |
199 |
++ * Returns the first entry on which the compare function returned true. |
200 |
++ */ |
201 |
++static inline void *rhashtable_lookup_fast( |
202 |
++ struct rhashtable *ht, const void *key, |
203 |
++ const struct rhashtable_params params) |
204 |
++{ |
205 |
++ void *obj; |
206 |
++ |
207 |
++ rcu_read_lock(); |
208 |
++ obj = rhashtable_lookup(ht, key, params); |
209 |
++ rcu_read_unlock(); |
210 |
++ |
211 |
++ return obj; |
212 |
++} |
213 |
++ |
214 |
++/* Internal function, please use rhashtable_insert_fast() instead. This |
215 |
++ * function returns the existing element already in hashes in there is a clash, |
216 |
++ * otherwise it returns an error via ERR_PTR(). |
217 |
++ */ |
218 |
++static inline void *__rhashtable_insert_fast( |
219 |
+ struct rhashtable *ht, const void *key, struct rhash_head *obj, |
220 |
+ const struct rhashtable_params params) |
221 |
+ { |
222 |
+@@ -576,6 +615,7 @@ static inline int __rhashtable_insert_fast( |
223 |
+ spinlock_t *lock; |
224 |
+ unsigned int elasticity; |
225 |
+ unsigned int hash; |
226 |
++ void *data = NULL; |
227 |
+ int err; |
228 |
+ |
229 |
+ restart: |
230 |
+@@ -600,11 +640,14 @@ restart: |
231 |
+ |
232 |
+ new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); |
233 |
+ if (unlikely(new_tbl)) { |
234 |
+- tbl = rhashtable_insert_slow(ht, key, obj, new_tbl); |
235 |
++ tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data); |
236 |
+ if (!IS_ERR_OR_NULL(tbl)) |
237 |
+ goto slow_path; |
238 |
+ |
239 |
+ err = PTR_ERR(tbl); |
240 |
++ if (err == -EEXIST) |
241 |
++ err = 0; |
242 |
++ |
243 |
+ goto out; |
244 |
+ } |
245 |
+ |
246 |
+@@ -618,25 +661,25 @@ slow_path: |
247 |
+ err = rhashtable_insert_rehash(ht, tbl); |
248 |
+ rcu_read_unlock(); |
249 |
+ if (err) |
250 |
+- return err; |
251 |
++ return ERR_PTR(err); |
252 |
+ |
253 |
+ goto restart; |
254 |
+ } |
255 |
+ |
256 |
+- err = -EEXIST; |
257 |
++ err = 0; |
258 |
+ elasticity = ht->elasticity; |
259 |
+ rht_for_each(head, tbl, hash) { |
260 |
+ if (key && |
261 |
+ unlikely(!(params.obj_cmpfn ? |
262 |
+ params.obj_cmpfn(&arg, rht_obj(ht, head)) : |
263 |
+- rhashtable_compare(&arg, rht_obj(ht, head))))) |
264 |
++ rhashtable_compare(&arg, rht_obj(ht, head))))) { |
265 |
++ data = rht_obj(ht, head); |
266 |
+ goto out; |
267 |
++ } |
268 |
+ if (!--elasticity) |
269 |
+ goto slow_path; |
270 |
+ } |
271 |
+ |
272 |
+- err = 0; |
273 |
+- |
274 |
+ head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); |
275 |
+ |
276 |
+ RCU_INIT_POINTER(obj->next, head); |
277 |
+@@ -651,7 +694,7 @@ out: |
278 |
+ spin_unlock_bh(lock); |
279 |
+ rcu_read_unlock(); |
280 |
+ |
281 |
+- return err; |
282 |
++ return err ? ERR_PTR(err) : data; |
283 |
+ } |
284 |
+ |
285 |
+ /** |
286 |
+@@ -674,7 +717,13 @@ static inline int rhashtable_insert_fast( |
287 |
+ struct rhashtable *ht, struct rhash_head *obj, |
288 |
+ const struct rhashtable_params params) |
289 |
+ { |
290 |
+- return __rhashtable_insert_fast(ht, NULL, obj, params); |
291 |
++ void *ret; |
292 |
++ |
293 |
++ ret = __rhashtable_insert_fast(ht, NULL, obj, params); |
294 |
++ if (IS_ERR(ret)) |
295 |
++ return PTR_ERR(ret); |
296 |
++ |
297 |
++ return ret == NULL ? 0 : -EEXIST; |
298 |
+ } |
299 |
+ |
300 |
+ /** |
301 |
+@@ -703,11 +752,15 @@ static inline int rhashtable_lookup_insert_fast( |
302 |
+ const struct rhashtable_params params) |
303 |
+ { |
304 |
+ const char *key = rht_obj(ht, obj); |
305 |
++ void *ret; |
306 |
+ |
307 |
+ BUG_ON(ht->p.obj_hashfn); |
308 |
+ |
309 |
+- return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, |
310 |
+- params); |
311 |
++ ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params); |
312 |
++ if (IS_ERR(ret)) |
313 |
++ return PTR_ERR(ret); |
314 |
++ |
315 |
++ return ret == NULL ? 0 : -EEXIST; |
316 |
+ } |
317 |
+ |
318 |
+ /** |
319 |
+@@ -735,6 +788,32 @@ static inline int rhashtable_lookup_insert_fast( |
320 |
+ static inline int rhashtable_lookup_insert_key( |
321 |
+ struct rhashtable *ht, const void *key, struct rhash_head *obj, |
322 |
+ const struct rhashtable_params params) |
323 |
++{ |
324 |
++ void *ret; |
325 |
++ |
326 |
++ BUG_ON(!ht->p.obj_hashfn || !key); |
327 |
++ |
328 |
++ ret = __rhashtable_insert_fast(ht, key, obj, params); |
329 |
++ if (IS_ERR(ret)) |
330 |
++ return PTR_ERR(ret); |
331 |
++ |
332 |
++ return ret == NULL ? 0 : -EEXIST; |
333 |
++} |
334 |
++ |
335 |
++/** |
336 |
++ * rhashtable_lookup_get_insert_key - lookup and insert object into hash table |
337 |
++ * @ht: hash table |
338 |
++ * @obj: pointer to hash head inside object |
339 |
++ * @params: hash table parameters |
340 |
++ * @data: pointer to element data already in hashes |
341 |
++ * |
342 |
++ * Just like rhashtable_lookup_insert_key(), but this function returns the |
343 |
++ * object if it exists, NULL if it does not and the insertion was successful, |
344 |
++ * and an ERR_PTR otherwise. |
345 |
++ */ |
346 |
++static inline void *rhashtable_lookup_get_insert_key( |
347 |
++ struct rhashtable *ht, const void *key, struct rhash_head *obj, |
348 |
++ const struct rhashtable_params params) |
349 |
+ { |
350 |
+ BUG_ON(!ht->p.obj_hashfn || !key); |
351 |
+ |
352 |
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h |
353 |
+index 6d39d81d3c38..502787c29ce9 100644 |
354 |
+--- a/include/linux/skbuff.h |
355 |
++++ b/include/linux/skbuff.h |
356 |
+@@ -556,9 +556,14 @@ struct sk_buff { |
357 |
+ struct skb_mstamp skb_mstamp; |
358 |
+ }; |
359 |
+ }; |
360 |
+- struct rb_node rbnode; /* used in netem & tcp stack */ |
361 |
++ struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */ |
362 |
+ }; |
363 |
+- struct sock *sk; |
364 |
++ |
365 |
++ union { |
366 |
++ struct sock *sk; |
367 |
++ int ip_defrag_offset; |
368 |
++ }; |
369 |
++ |
370 |
+ struct net_device *dev; |
371 |
+ |
372 |
+ /* |
373 |
+@@ -2273,7 +2278,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) |
374 |
+ kfree_skb(skb); |
375 |
+ } |
376 |
+ |
377 |
+-void skb_rbtree_purge(struct rb_root *root); |
378 |
++unsigned int skb_rbtree_purge(struct rb_root *root); |
379 |
+ |
380 |
+ void *netdev_alloc_frag(unsigned int fragsz); |
381 |
+ |
382 |
+@@ -2791,6 +2796,7 @@ static inline unsigned char *skb_push_rcsum(struct sk_buff *skb, |
383 |
+ return skb->data; |
384 |
+ } |
385 |
+ |
386 |
++int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len); |
387 |
+ /** |
388 |
+ * pskb_trim_rcsum - trim received skb and update checksum |
389 |
+ * @skb: buffer to trim |
390 |
+@@ -2805,9 +2811,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) |
391 |
+ { |
392 |
+ if (likely(len >= skb->len)) |
393 |
+ return 0; |
394 |
+- if (skb->ip_summed == CHECKSUM_COMPLETE) |
395 |
+- skb->ip_summed = CHECKSUM_NONE; |
396 |
+- return __pskb_trim(skb, len); |
397 |
++ return pskb_trim_rcsum_slow(skb, len); |
398 |
+ } |
399 |
+ |
400 |
+ #define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode) |
401 |
+diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h |
402 |
+index c26a6e4dc306..6260ec146142 100644 |
403 |
+--- a/include/net/inet_frag.h |
404 |
++++ b/include/net/inet_frag.h |
405 |
+@@ -1,13 +1,19 @@ |
406 |
+ #ifndef __NET_FRAG_H__ |
407 |
+ #define __NET_FRAG_H__ |
408 |
+ |
409 |
++#include <linux/rhashtable.h> |
410 |
++ |
411 |
+ struct netns_frags { |
412 |
+- /* Keep atomic mem on separate cachelines in structs that include it */ |
413 |
+- atomic_t mem ____cacheline_aligned_in_smp; |
414 |
+ /* sysctls */ |
415 |
++ long high_thresh; |
416 |
++ long low_thresh; |
417 |
+ int timeout; |
418 |
+- int high_thresh; |
419 |
+- int low_thresh; |
420 |
++ struct inet_frags *f; |
421 |
++ |
422 |
++ struct rhashtable rhashtable ____cacheline_aligned_in_smp; |
423 |
++ |
424 |
++ /* Keep atomic mem on separate cachelines in structs that include it */ |
425 |
++ atomic_long_t mem ____cacheline_aligned_in_smp; |
426 |
+ }; |
427 |
+ |
428 |
+ /** |
429 |
+@@ -23,74 +29,68 @@ enum { |
430 |
+ INET_FRAG_COMPLETE = BIT(2), |
431 |
+ }; |
432 |
+ |
433 |
++struct frag_v4_compare_key { |
434 |
++ __be32 saddr; |
435 |
++ __be32 daddr; |
436 |
++ u32 user; |
437 |
++ u32 vif; |
438 |
++ __be16 id; |
439 |
++ u16 protocol; |
440 |
++}; |
441 |
++ |
442 |
++struct frag_v6_compare_key { |
443 |
++ struct in6_addr saddr; |
444 |
++ struct in6_addr daddr; |
445 |
++ u32 user; |
446 |
++ __be32 id; |
447 |
++ u32 iif; |
448 |
++}; |
449 |
++ |
450 |
+ /** |
451 |
+ * struct inet_frag_queue - fragment queue |
452 |
+ * |
453 |
+- * @lock: spinlock protecting the queue |
454 |
++ * @node: rhash node |
455 |
++ * @key: keys identifying this frag. |
456 |
+ * @timer: queue expiration timer |
457 |
+- * @list: hash bucket list |
458 |
++ * @lock: spinlock protecting this frag |
459 |
+ * @refcnt: reference count of the queue |
460 |
+ * @fragments: received fragments head |
461 |
++ * @rb_fragments: received fragments rb-tree root |
462 |
+ * @fragments_tail: received fragments tail |
463 |
++ * @last_run_head: the head of the last "run". see ip_fragment.c |
464 |
+ * @stamp: timestamp of the last received fragment |
465 |
+ * @len: total length of the original datagram |
466 |
+ * @meat: length of received fragments so far |
467 |
+ * @flags: fragment queue flags |
468 |
+ * @max_size: maximum received fragment size |
469 |
+ * @net: namespace that this frag belongs to |
470 |
+- * @list_evictor: list of queues to forcefully evict (e.g. due to low memory) |
471 |
++ * @rcu: rcu head for freeing deferall |
472 |
+ */ |
473 |
+ struct inet_frag_queue { |
474 |
+- spinlock_t lock; |
475 |
++ struct rhash_head node; |
476 |
++ union { |
477 |
++ struct frag_v4_compare_key v4; |
478 |
++ struct frag_v6_compare_key v6; |
479 |
++ } key; |
480 |
+ struct timer_list timer; |
481 |
+- struct hlist_node list; |
482 |
++ spinlock_t lock; |
483 |
+ atomic_t refcnt; |
484 |
+- struct sk_buff *fragments; |
485 |
++ struct sk_buff *fragments; /* Used in IPv6. */ |
486 |
++ struct rb_root rb_fragments; /* Used in IPv4. */ |
487 |
+ struct sk_buff *fragments_tail; |
488 |
++ struct sk_buff *last_run_head; |
489 |
+ ktime_t stamp; |
490 |
+ int len; |
491 |
+ int meat; |
492 |
+ __u8 flags; |
493 |
+ u16 max_size; |
494 |
+- struct netns_frags *net; |
495 |
+- struct hlist_node list_evictor; |
496 |
+-}; |
497 |
+- |
498 |
+-#define INETFRAGS_HASHSZ 1024 |
499 |
+- |
500 |
+-/* averaged: |
501 |
+- * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ / |
502 |
+- * rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or |
503 |
+- * struct frag_queue)) |
504 |
+- */ |
505 |
+-#define INETFRAGS_MAXDEPTH 128 |
506 |
+- |
507 |
+-struct inet_frag_bucket { |
508 |
+- struct hlist_head chain; |
509 |
+- spinlock_t chain_lock; |
510 |
++ struct netns_frags *net; |
511 |
++ struct rcu_head rcu; |
512 |
+ }; |
513 |
+ |
514 |
+ struct inet_frags { |
515 |
+- struct inet_frag_bucket hash[INETFRAGS_HASHSZ]; |
516 |
+- |
517 |
+- struct work_struct frags_work; |
518 |
+- unsigned int next_bucket; |
519 |
+- unsigned long last_rebuild_jiffies; |
520 |
+- bool rebuild; |
521 |
+- |
522 |
+- /* The first call to hashfn is responsible to initialize |
523 |
+- * rnd. This is best done with net_get_random_once. |
524 |
+- * |
525 |
+- * rnd_seqlock is used to let hash insertion detect |
526 |
+- * when it needs to re-lookup the hash chain to use. |
527 |
+- */ |
528 |
+- u32 rnd; |
529 |
+- seqlock_t rnd_seqlock; |
530 |
+ int qsize; |
531 |
+ |
532 |
+- unsigned int (*hashfn)(const struct inet_frag_queue *); |
533 |
+- bool (*match)(const struct inet_frag_queue *q, |
534 |
+- const void *arg); |
535 |
+ void (*constructor)(struct inet_frag_queue *q, |
536 |
+ const void *arg); |
537 |
+ void (*destructor)(struct inet_frag_queue *); |
538 |
+@@ -98,56 +98,47 @@ struct inet_frags { |
539 |
+ void (*frag_expire)(unsigned long data); |
540 |
+ struct kmem_cache *frags_cachep; |
541 |
+ const char *frags_cache_name; |
542 |
++ struct rhashtable_params rhash_params; |
543 |
+ }; |
544 |
+ |
545 |
+ int inet_frags_init(struct inet_frags *); |
546 |
+ void inet_frags_fini(struct inet_frags *); |
547 |
+ |
548 |
+-static inline void inet_frags_init_net(struct netns_frags *nf) |
549 |
++static inline int inet_frags_init_net(struct netns_frags *nf) |
550 |
+ { |
551 |
+- atomic_set(&nf->mem, 0); |
552 |
++ atomic_long_set(&nf->mem, 0); |
553 |
++ return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params); |
554 |
+ } |
555 |
+-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); |
556 |
++void inet_frags_exit_net(struct netns_frags *nf); |
557 |
+ |
558 |
+-void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); |
559 |
+-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f); |
560 |
+-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, |
561 |
+- struct inet_frags *f, void *key, unsigned int hash); |
562 |
++void inet_frag_kill(struct inet_frag_queue *q); |
563 |
++void inet_frag_destroy(struct inet_frag_queue *q); |
564 |
++struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key); |
565 |
+ |
566 |
+-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, |
567 |
+- const char *prefix); |
568 |
++/* Free all skbs in the queue; return the sum of their truesizes. */ |
569 |
++unsigned int inet_frag_rbtree_purge(struct rb_root *root); |
570 |
+ |
571 |
+-static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) |
572 |
++static inline void inet_frag_put(struct inet_frag_queue *q) |
573 |
+ { |
574 |
+ if (atomic_dec_and_test(&q->refcnt)) |
575 |
+- inet_frag_destroy(q, f); |
576 |
+-} |
577 |
+- |
578 |
+-static inline bool inet_frag_evicting(struct inet_frag_queue *q) |
579 |
+-{ |
580 |
+- return !hlist_unhashed(&q->list_evictor); |
581 |
++ inet_frag_destroy(q); |
582 |
+ } |
583 |
+ |
584 |
+ /* Memory Tracking Functions. */ |
585 |
+ |
586 |
+-static inline int frag_mem_limit(struct netns_frags *nf) |
587 |
+-{ |
588 |
+- return atomic_read(&nf->mem); |
589 |
+-} |
590 |
+- |
591 |
+-static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) |
592 |
++static inline long frag_mem_limit(const struct netns_frags *nf) |
593 |
+ { |
594 |
+- atomic_sub(i, &nf->mem); |
595 |
++ return atomic_long_read(&nf->mem); |
596 |
+ } |
597 |
+ |
598 |
+-static inline void add_frag_mem_limit(struct netns_frags *nf, int i) |
599 |
++static inline void sub_frag_mem_limit(struct netns_frags *nf, long val) |
600 |
+ { |
601 |
+- atomic_add(i, &nf->mem); |
602 |
++ atomic_long_sub(val, &nf->mem); |
603 |
+ } |
604 |
+ |
605 |
+-static inline int sum_frag_mem_limit(struct netns_frags *nf) |
606 |
++static inline void add_frag_mem_limit(struct netns_frags *nf, long val) |
607 |
+ { |
608 |
+- return atomic_read(&nf->mem); |
609 |
++ atomic_long_add(val, &nf->mem); |
610 |
+ } |
611 |
+ |
612 |
+ /* RFC 3168 support : |
613 |
+diff --git a/include/net/ip.h b/include/net/ip.h |
614 |
+index 0530bcdbc212..7b968927477d 100644 |
615 |
+--- a/include/net/ip.h |
616 |
++++ b/include/net/ip.h |
617 |
+@@ -524,7 +524,6 @@ static inline struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *s |
618 |
+ return skb; |
619 |
+ } |
620 |
+ #endif |
621 |
+-int ip_frag_mem(struct net *net); |
622 |
+ |
623 |
+ /* |
624 |
+ * Functions provided by ip_forward.c |
625 |
+diff --git a/include/net/ipv6.h b/include/net/ipv6.h |
626 |
+index 0e01d570fa22..c07cf9596b6f 100644 |
627 |
+--- a/include/net/ipv6.h |
628 |
++++ b/include/net/ipv6.h |
629 |
+@@ -320,13 +320,6 @@ static inline bool ipv6_accept_ra(struct inet6_dev *idev) |
630 |
+ idev->cnf.accept_ra; |
631 |
+ } |
632 |
+ |
633 |
+-#if IS_ENABLED(CONFIG_IPV6) |
634 |
+-static inline int ip6_frag_mem(struct net *net) |
635 |
+-{ |
636 |
+- return sum_frag_mem_limit(&net->ipv6.frags); |
637 |
+-} |
638 |
+-#endif |
639 |
+- |
640 |
+ #define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */ |
641 |
+ #define IPV6_FRAG_LOW_THRESH (3 * 1024*1024) /* 3145728 */ |
642 |
+ #define IPV6_FRAG_TIMEOUT (60 * HZ) /* 60 seconds */ |
643 |
+@@ -505,17 +498,8 @@ enum ip6_defrag_users { |
644 |
+ __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX, |
645 |
+ }; |
646 |
+ |
647 |
+-struct ip6_create_arg { |
648 |
+- __be32 id; |
649 |
+- u32 user; |
650 |
+- const struct in6_addr *src; |
651 |
+- const struct in6_addr *dst; |
652 |
+- int iif; |
653 |
+- u8 ecn; |
654 |
+-}; |
655 |
+- |
656 |
+ void ip6_frag_init(struct inet_frag_queue *q, const void *a); |
657 |
+-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a); |
658 |
++extern const struct rhashtable_params ip6_rhash_params; |
659 |
+ |
660 |
+ /* |
661 |
+ * Equivalent of ipv4 struct ip |
662 |
+@@ -523,19 +507,13 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a); |
663 |
+ struct frag_queue { |
664 |
+ struct inet_frag_queue q; |
665 |
+ |
666 |
+- __be32 id; /* fragment id */ |
667 |
+- u32 user; |
668 |
+- struct in6_addr saddr; |
669 |
+- struct in6_addr daddr; |
670 |
+- |
671 |
+ int iif; |
672 |
+ unsigned int csum; |
673 |
+ __u16 nhoffset; |
674 |
+ u8 ecn; |
675 |
+ }; |
676 |
+ |
677 |
+-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, |
678 |
+- struct inet_frags *frags); |
679 |
++void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq); |
680 |
+ |
681 |
+ static inline bool ipv6_addr_any(const struct in6_addr *a) |
682 |
+ { |
683 |
+diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h |
684 |
+index 25a9ad8bcef1..9de808ebce05 100644 |
685 |
+--- a/include/uapi/linux/snmp.h |
686 |
++++ b/include/uapi/linux/snmp.h |
687 |
+@@ -55,6 +55,7 @@ enum |
688 |
+ IPSTATS_MIB_ECT1PKTS, /* InECT1Pkts */ |
689 |
+ IPSTATS_MIB_ECT0PKTS, /* InECT0Pkts */ |
690 |
+ IPSTATS_MIB_CEPKTS, /* InCEPkts */ |
691 |
++ IPSTATS_MIB_REASM_OVERLAPS, /* ReasmOverlaps */ |
692 |
+ __IPSTATS_MIB_MAX |
693 |
+ }; |
694 |
+ |
695 |
+diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c |
696 |
+index 8a62cbfe1f2f..4e886ccd40db 100644 |
697 |
+--- a/kernel/rcu/tree.c |
698 |
++++ b/kernel/rcu/tree.c |
699 |
+@@ -3817,7 +3817,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) |
700 |
+ continue; |
701 |
+ rdp = per_cpu_ptr(rsp->rda, cpu); |
702 |
+ pr_cont(" %d-%c%c%c", cpu, |
703 |
+- "O."[cpu_online(cpu)], |
704 |
++ "O."[!!cpu_online(cpu)], |
705 |
+ "o."[!!(rdp->grpmask & rnp->expmaskinit)], |
706 |
+ "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]); |
707 |
+ } |
708 |
+diff --git a/lib/rhashtable.c b/lib/rhashtable.c |
709 |
+index 37ea94b636a3..7bb8649429bf 100644 |
710 |
+--- a/lib/rhashtable.c |
711 |
++++ b/lib/rhashtable.c |
712 |
+@@ -250,8 +250,10 @@ static int rhashtable_rehash_table(struct rhashtable *ht) |
713 |
+ if (!new_tbl) |
714 |
+ return 0; |
715 |
+ |
716 |
+- for (old_hash = 0; old_hash < old_tbl->size; old_hash++) |
717 |
++ for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { |
718 |
+ rhashtable_rehash_chain(ht, old_hash); |
719 |
++ cond_resched(); |
720 |
++ } |
721 |
+ |
722 |
+ /* Publish the new table pointer. */ |
723 |
+ rcu_assign_pointer(ht->tbl, new_tbl); |
724 |
+@@ -441,7 +443,8 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); |
725 |
+ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, |
726 |
+ const void *key, |
727 |
+ struct rhash_head *obj, |
728 |
+- struct bucket_table *tbl) |
729 |
++ struct bucket_table *tbl, |
730 |
++ void **data) |
731 |
+ { |
732 |
+ struct rhash_head *head; |
733 |
+ unsigned int hash; |
734 |
+@@ -452,8 +455,11 @@ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, |
735 |
+ spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); |
736 |
+ |
737 |
+ err = -EEXIST; |
738 |
+- if (key && rhashtable_lookup_fast(ht, key, ht->p)) |
739 |
+- goto exit; |
740 |
++ if (key) { |
741 |
++ *data = rhashtable_lookup_fast(ht, key, ht->p); |
742 |
++ if (*data) |
743 |
++ goto exit; |
744 |
++ } |
745 |
+ |
746 |
+ err = -E2BIG; |
747 |
+ if (unlikely(rht_grow_above_max(ht, tbl))) |
748 |
+@@ -838,6 +844,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, |
749 |
+ for (i = 0; i < tbl->size; i++) { |
750 |
+ struct rhash_head *pos, *next; |
751 |
+ |
752 |
++ cond_resched(); |
753 |
+ for (pos = rht_dereference(tbl->buckets[i], ht), |
754 |
+ next = !rht_is_a_nulls(pos) ? |
755 |
+ rht_dereference(pos->next, ht) : NULL; |
756 |
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c |
757 |
+index 8a57bbaf7452..fea7c24e99d0 100644 |
758 |
+--- a/net/core/skbuff.c |
759 |
++++ b/net/core/skbuff.c |
760 |
+@@ -1502,6 +1502,21 @@ done: |
761 |
+ } |
762 |
+ EXPORT_SYMBOL(___pskb_trim); |
763 |
+ |
764 |
++/* Note : use pskb_trim_rcsum() instead of calling this directly |
765 |
++ */ |
766 |
++int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) |
767 |
++{ |
768 |
++ if (skb->ip_summed == CHECKSUM_COMPLETE) { |
769 |
++ int delta = skb->len - len; |
770 |
++ |
771 |
++ skb->csum = csum_block_sub(skb->csum, |
772 |
++ skb_checksum(skb, len, delta, 0), |
773 |
++ len); |
774 |
++ } |
775 |
++ return __pskb_trim(skb, len); |
776 |
++} |
777 |
++EXPORT_SYMBOL(pskb_trim_rcsum_slow); |
778 |
++ |
779 |
+ /** |
780 |
+ * __pskb_pull_tail - advance tail of skb header |
781 |
+ * @skb: buffer to reallocate |
782 |
+@@ -2380,23 +2395,27 @@ EXPORT_SYMBOL(skb_queue_purge); |
783 |
+ /** |
784 |
+ * skb_rbtree_purge - empty a skb rbtree |
785 |
+ * @root: root of the rbtree to empty |
786 |
++ * Return value: the sum of truesizes of all purged skbs. |
787 |
+ * |
788 |
+ * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from |
789 |
+ * the list and one reference dropped. This function does not take |
790 |
+ * any lock. Synchronization should be handled by the caller (e.g., TCP |
791 |
+ * out-of-order queue is protected by the socket lock). |
792 |
+ */ |
793 |
+-void skb_rbtree_purge(struct rb_root *root) |
794 |
++unsigned int skb_rbtree_purge(struct rb_root *root) |
795 |
+ { |
796 |
+ struct rb_node *p = rb_first(root); |
797 |
++ unsigned int sum = 0; |
798 |
+ |
799 |
+ while (p) { |
800 |
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); |
801 |
+ |
802 |
+ p = rb_next(p); |
803 |
+ rb_erase(&skb->rbnode, root); |
804 |
++ sum += skb->truesize; |
805 |
+ kfree_skb(skb); |
806 |
+ } |
807 |
++ return sum; |
808 |
+ } |
809 |
+ |
810 |
+ /** |
811 |
+diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h |
812 |
+index b4e17a7c0df0..fdbebe51446f 100644 |
813 |
+--- a/net/ieee802154/6lowpan/6lowpan_i.h |
814 |
++++ b/net/ieee802154/6lowpan/6lowpan_i.h |
815 |
+@@ -16,37 +16,19 @@ typedef unsigned __bitwise__ lowpan_rx_result; |
816 |
+ #define LOWPAN_DISPATCH_FRAG1 0xc0 |
817 |
+ #define LOWPAN_DISPATCH_FRAGN 0xe0 |
818 |
+ |
819 |
+-struct lowpan_create_arg { |
820 |
++struct frag_lowpan_compare_key { |
821 |
+ u16 tag; |
822 |
+ u16 d_size; |
823 |
+- const struct ieee802154_addr *src; |
824 |
+- const struct ieee802154_addr *dst; |
825 |
++ struct ieee802154_addr src; |
826 |
++ struct ieee802154_addr dst; |
827 |
+ }; |
828 |
+ |
829 |
+-/* Equivalent of ipv4 struct ip |
830 |
++/* Equivalent of ipv4 struct ipq |
831 |
+ */ |
832 |
+ struct lowpan_frag_queue { |
833 |
+ struct inet_frag_queue q; |
834 |
+- |
835 |
+- u16 tag; |
836 |
+- u16 d_size; |
837 |
+- struct ieee802154_addr saddr; |
838 |
+- struct ieee802154_addr daddr; |
839 |
+ }; |
840 |
+ |
841 |
+-static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a) |
842 |
+-{ |
843 |
+- switch (a->mode) { |
844 |
+- case IEEE802154_ADDR_LONG: |
845 |
+- return (((__force u64)a->extended_addr) >> 32) ^ |
846 |
+- (((__force u64)a->extended_addr) & 0xffffffff); |
847 |
+- case IEEE802154_ADDR_SHORT: |
848 |
+- return (__force u32)(a->short_addr); |
849 |
+- default: |
850 |
+- return 0; |
851 |
+- } |
852 |
+-} |
853 |
+- |
854 |
+ /* private device info */ |
855 |
+ struct lowpan_dev_info { |
856 |
+ struct net_device *wdev; /* wpan device ptr */ |
857 |
+diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c |
858 |
+index 12e8cf4bda9f..6183730d38db 100644 |
859 |
+--- a/net/ieee802154/6lowpan/reassembly.c |
860 |
++++ b/net/ieee802154/6lowpan/reassembly.c |
861 |
+@@ -37,47 +37,15 @@ static struct inet_frags lowpan_frags; |
862 |
+ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, |
863 |
+ struct sk_buff *prev, struct net_device *ldev); |
864 |
+ |
865 |
+-static unsigned int lowpan_hash_frag(u16 tag, u16 d_size, |
866 |
+- const struct ieee802154_addr *saddr, |
867 |
+- const struct ieee802154_addr *daddr) |
868 |
+-{ |
869 |
+- net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd)); |
870 |
+- return jhash_3words(ieee802154_addr_hash(saddr), |
871 |
+- ieee802154_addr_hash(daddr), |
872 |
+- (__force u32)(tag + (d_size << 16)), |
873 |
+- lowpan_frags.rnd); |
874 |
+-} |
875 |
+- |
876 |
+-static unsigned int lowpan_hashfn(const struct inet_frag_queue *q) |
877 |
+-{ |
878 |
+- const struct lowpan_frag_queue *fq; |
879 |
+- |
880 |
+- fq = container_of(q, struct lowpan_frag_queue, q); |
881 |
+- return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr); |
882 |
+-} |
883 |
+- |
884 |
+-static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a) |
885 |
+-{ |
886 |
+- const struct lowpan_frag_queue *fq; |
887 |
+- const struct lowpan_create_arg *arg = a; |
888 |
+- |
889 |
+- fq = container_of(q, struct lowpan_frag_queue, q); |
890 |
+- return fq->tag == arg->tag && fq->d_size == arg->d_size && |
891 |
+- ieee802154_addr_equal(&fq->saddr, arg->src) && |
892 |
+- ieee802154_addr_equal(&fq->daddr, arg->dst); |
893 |
+-} |
894 |
+- |
895 |
+ static void lowpan_frag_init(struct inet_frag_queue *q, const void *a) |
896 |
+ { |
897 |
+- const struct lowpan_create_arg *arg = a; |
898 |
++ const struct frag_lowpan_compare_key *key = a; |
899 |
+ struct lowpan_frag_queue *fq; |
900 |
+ |
901 |
+ fq = container_of(q, struct lowpan_frag_queue, q); |
902 |
+ |
903 |
+- fq->tag = arg->tag; |
904 |
+- fq->d_size = arg->d_size; |
905 |
+- fq->saddr = *arg->src; |
906 |
+- fq->daddr = *arg->dst; |
907 |
++ BUILD_BUG_ON(sizeof(*key) > sizeof(q->key)); |
908 |
++ memcpy(&q->key, key, sizeof(*key)); |
909 |
+ } |
910 |
+ |
911 |
+ static void lowpan_frag_expire(unsigned long data) |
912 |
+@@ -93,10 +61,10 @@ static void lowpan_frag_expire(unsigned long data) |
913 |
+ if (fq->q.flags & INET_FRAG_COMPLETE) |
914 |
+ goto out; |
915 |
+ |
916 |
+- inet_frag_kill(&fq->q, &lowpan_frags); |
917 |
++ inet_frag_kill(&fq->q); |
918 |
+ out: |
919 |
+ spin_unlock(&fq->q.lock); |
920 |
+- inet_frag_put(&fq->q, &lowpan_frags); |
921 |
++ inet_frag_put(&fq->q); |
922 |
+ } |
923 |
+ |
924 |
+ static inline struct lowpan_frag_queue * |
925 |
+@@ -104,25 +72,20 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb, |
926 |
+ const struct ieee802154_addr *src, |
927 |
+ const struct ieee802154_addr *dst) |
928 |
+ { |
929 |
+- struct inet_frag_queue *q; |
930 |
+- struct lowpan_create_arg arg; |
931 |
+- unsigned int hash; |
932 |
+ struct netns_ieee802154_lowpan *ieee802154_lowpan = |
933 |
+ net_ieee802154_lowpan(net); |
934 |
++ struct frag_lowpan_compare_key key = {}; |
935 |
++ struct inet_frag_queue *q; |
936 |
+ |
937 |
+- arg.tag = cb->d_tag; |
938 |
+- arg.d_size = cb->d_size; |
939 |
+- arg.src = src; |
940 |
+- arg.dst = dst; |
941 |
+- |
942 |
+- hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst); |
943 |
++ key.tag = cb->d_tag; |
944 |
++ key.d_size = cb->d_size; |
945 |
++ key.src = *src; |
946 |
++ key.dst = *dst; |
947 |
+ |
948 |
+- q = inet_frag_find(&ieee802154_lowpan->frags, |
949 |
+- &lowpan_frags, &arg, hash); |
950 |
+- if (IS_ERR_OR_NULL(q)) { |
951 |
+- inet_frag_maybe_warn_overflow(q, pr_fmt()); |
952 |
++ q = inet_frag_find(&ieee802154_lowpan->frags, &key); |
953 |
++ if (!q) |
954 |
+ return NULL; |
955 |
+- } |
956 |
++ |
957 |
+ return container_of(q, struct lowpan_frag_queue, q); |
958 |
+ } |
959 |
+ |
960 |
+@@ -229,7 +192,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, |
961 |
+ struct sk_buff *fp, *head = fq->q.fragments; |
962 |
+ int sum_truesize; |
963 |
+ |
964 |
+- inet_frag_kill(&fq->q, &lowpan_frags); |
965 |
++ inet_frag_kill(&fq->q); |
966 |
+ |
967 |
+ /* Make the one we just received the head. */ |
968 |
+ if (prev) { |
969 |
+@@ -408,7 +371,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) |
970 |
+ struct lowpan_frag_queue *fq; |
971 |
+ struct net *net = dev_net(skb->dev); |
972 |
+ struct lowpan_802154_cb *cb = lowpan_802154_cb(skb); |
973 |
+- struct ieee802154_hdr hdr; |
974 |
++ struct ieee802154_hdr hdr = {}; |
975 |
+ int err; |
976 |
+ |
977 |
+ if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) |
978 |
+@@ -437,7 +400,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) |
979 |
+ ret = lowpan_frag_queue(fq, skb, frag_type); |
980 |
+ spin_unlock(&fq->q.lock); |
981 |
+ |
982 |
+- inet_frag_put(&fq->q, &lowpan_frags); |
983 |
++ inet_frag_put(&fq->q); |
984 |
+ return ret; |
985 |
+ } |
986 |
+ |
987 |
+@@ -447,24 +410,22 @@ err: |
988 |
+ } |
989 |
+ |
990 |
+ #ifdef CONFIG_SYSCTL |
991 |
+-static int zero; |
992 |
+ |
993 |
+ static struct ctl_table lowpan_frags_ns_ctl_table[] = { |
994 |
+ { |
995 |
+ .procname = "6lowpanfrag_high_thresh", |
996 |
+ .data = &init_net.ieee802154_lowpan.frags.high_thresh, |
997 |
+- .maxlen = sizeof(int), |
998 |
++ .maxlen = sizeof(unsigned long), |
999 |
+ .mode = 0644, |
1000 |
+- .proc_handler = proc_dointvec_minmax, |
1001 |
++ .proc_handler = proc_doulongvec_minmax, |
1002 |
+ .extra1 = &init_net.ieee802154_lowpan.frags.low_thresh |
1003 |
+ }, |
1004 |
+ { |
1005 |
+ .procname = "6lowpanfrag_low_thresh", |
1006 |
+ .data = &init_net.ieee802154_lowpan.frags.low_thresh, |
1007 |
+- .maxlen = sizeof(int), |
1008 |
++ .maxlen = sizeof(unsigned long), |
1009 |
+ .mode = 0644, |
1010 |
+- .proc_handler = proc_dointvec_minmax, |
1011 |
+- .extra1 = &zero, |
1012 |
++ .proc_handler = proc_doulongvec_minmax, |
1013 |
+ .extra2 = &init_net.ieee802154_lowpan.frags.high_thresh |
1014 |
+ }, |
1015 |
+ { |
1016 |
+@@ -580,14 +541,20 @@ static int __net_init lowpan_frags_init_net(struct net *net) |
1017 |
+ { |
1018 |
+ struct netns_ieee802154_lowpan *ieee802154_lowpan = |
1019 |
+ net_ieee802154_lowpan(net); |
1020 |
++ int res; |
1021 |
+ |
1022 |
+ ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; |
1023 |
+ ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; |
1024 |
+ ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; |
1025 |
++ ieee802154_lowpan->frags.f = &lowpan_frags; |
1026 |
+ |
1027 |
+- inet_frags_init_net(&ieee802154_lowpan->frags); |
1028 |
+- |
1029 |
+- return lowpan_frags_ns_sysctl_register(net); |
1030 |
++ res = inet_frags_init_net(&ieee802154_lowpan->frags); |
1031 |
++ if (res < 0) |
1032 |
++ return res; |
1033 |
++ res = lowpan_frags_ns_sysctl_register(net); |
1034 |
++ if (res < 0) |
1035 |
++ inet_frags_exit_net(&ieee802154_lowpan->frags); |
1036 |
++ return res; |
1037 |
+ } |
1038 |
+ |
1039 |
+ static void __net_exit lowpan_frags_exit_net(struct net *net) |
1040 |
+@@ -596,7 +563,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net) |
1041 |
+ net_ieee802154_lowpan(net); |
1042 |
+ |
1043 |
+ lowpan_frags_ns_sysctl_unregister(net); |
1044 |
+- inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags); |
1045 |
++ inet_frags_exit_net(&ieee802154_lowpan->frags); |
1046 |
+ } |
1047 |
+ |
1048 |
+ static struct pernet_operations lowpan_frags_ops = { |
1049 |
+@@ -604,33 +571,64 @@ static struct pernet_operations lowpan_frags_ops = { |
1050 |
+ .exit = lowpan_frags_exit_net, |
1051 |
+ }; |
1052 |
+ |
1053 |
+-int __init lowpan_net_frag_init(void) |
1054 |
++static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed) |
1055 |
+ { |
1056 |
+- int ret; |
1057 |
++ return jhash2(data, |
1058 |
++ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed); |
1059 |
++} |
1060 |
+ |
1061 |
+- ret = lowpan_frags_sysctl_register(); |
1062 |
+- if (ret) |
1063 |
+- return ret; |
1064 |
++static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed) |
1065 |
++{ |
1066 |
++ const struct inet_frag_queue *fq = data; |
1067 |
+ |
1068 |
+- ret = register_pernet_subsys(&lowpan_frags_ops); |
1069 |
+- if (ret) |
1070 |
+- goto err_pernet; |
1071 |
++ return jhash2((const u32 *)&fq->key, |
1072 |
++ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed); |
1073 |
++} |
1074 |
++ |
1075 |
++static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) |
1076 |
++{ |
1077 |
++ const struct frag_lowpan_compare_key *key = arg->key; |
1078 |
++ const struct inet_frag_queue *fq = ptr; |
1079 |
++ |
1080 |
++ return !!memcmp(&fq->key, key, sizeof(*key)); |
1081 |
++} |
1082 |
++ |
1083 |
++static const struct rhashtable_params lowpan_rhash_params = { |
1084 |
++ .head_offset = offsetof(struct inet_frag_queue, node), |
1085 |
++ .hashfn = lowpan_key_hashfn, |
1086 |
++ .obj_hashfn = lowpan_obj_hashfn, |
1087 |
++ .obj_cmpfn = lowpan_obj_cmpfn, |
1088 |
++ .automatic_shrinking = true, |
1089 |
++}; |
1090 |
++ |
1091 |
++int __init lowpan_net_frag_init(void) |
1092 |
++{ |
1093 |
++ int ret; |
1094 |
+ |
1095 |
+- lowpan_frags.hashfn = lowpan_hashfn; |
1096 |
+ lowpan_frags.constructor = lowpan_frag_init; |
1097 |
+ lowpan_frags.destructor = NULL; |
1098 |
+ lowpan_frags.skb_free = NULL; |
1099 |
+ lowpan_frags.qsize = sizeof(struct frag_queue); |
1100 |
+- lowpan_frags.match = lowpan_frag_match; |
1101 |
+ lowpan_frags.frag_expire = lowpan_frag_expire; |
1102 |
+ lowpan_frags.frags_cache_name = lowpan_frags_cache_name; |
1103 |
++ lowpan_frags.rhash_params = lowpan_rhash_params; |
1104 |
+ ret = inet_frags_init(&lowpan_frags); |
1105 |
+ if (ret) |
1106 |
+- goto err_pernet; |
1107 |
++ goto out; |
1108 |
+ |
1109 |
++ ret = lowpan_frags_sysctl_register(); |
1110 |
++ if (ret) |
1111 |
++ goto err_sysctl; |
1112 |
++ |
1113 |
++ ret = register_pernet_subsys(&lowpan_frags_ops); |
1114 |
++ if (ret) |
1115 |
++ goto err_pernet; |
1116 |
++out: |
1117 |
+ return ret; |
1118 |
+ err_pernet: |
1119 |
+ lowpan_frags_sysctl_unregister(); |
1120 |
++err_sysctl: |
1121 |
++ inet_frags_fini(&lowpan_frags); |
1122 |
+ return ret; |
1123 |
+ } |
1124 |
+ |
1125 |
+diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c |
1126 |
+index b2001b20e029..c03e5f5859e1 100644 |
1127 |
+--- a/net/ipv4/inet_fragment.c |
1128 |
++++ b/net/ipv4/inet_fragment.c |
1129 |
+@@ -25,12 +25,6 @@ |
1130 |
+ #include <net/inet_frag.h> |
1131 |
+ #include <net/inet_ecn.h> |
1132 |
+ |
1133 |
+-#define INETFRAGS_EVICT_BUCKETS 128 |
1134 |
+-#define INETFRAGS_EVICT_MAX 512 |
1135 |
+- |
1136 |
+-/* don't rebuild inetfrag table with new secret more often than this */ |
1137 |
+-#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ) |
1138 |
+- |
1139 |
+ /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements |
1140 |
+ * Value : 0xff if frame should be dropped. |
1141 |
+ * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field |
1142 |
+@@ -52,157 +46,8 @@ const u8 ip_frag_ecn_table[16] = { |
1143 |
+ }; |
1144 |
+ EXPORT_SYMBOL(ip_frag_ecn_table); |
1145 |
+ |
1146 |
+-static unsigned int |
1147 |
+-inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q) |
1148 |
+-{ |
1149 |
+- return f->hashfn(q) & (INETFRAGS_HASHSZ - 1); |
1150 |
+-} |
1151 |
+- |
1152 |
+-static bool inet_frag_may_rebuild(struct inet_frags *f) |
1153 |
+-{ |
1154 |
+- return time_after(jiffies, |
1155 |
+- f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL); |
1156 |
+-} |
1157 |
+- |
1158 |
+-static void inet_frag_secret_rebuild(struct inet_frags *f) |
1159 |
+-{ |
1160 |
+- int i; |
1161 |
+- |
1162 |
+- write_seqlock_bh(&f->rnd_seqlock); |
1163 |
+- |
1164 |
+- if (!inet_frag_may_rebuild(f)) |
1165 |
+- goto out; |
1166 |
+- |
1167 |
+- get_random_bytes(&f->rnd, sizeof(u32)); |
1168 |
+- |
1169 |
+- for (i = 0; i < INETFRAGS_HASHSZ; i++) { |
1170 |
+- struct inet_frag_bucket *hb; |
1171 |
+- struct inet_frag_queue *q; |
1172 |
+- struct hlist_node *n; |
1173 |
+- |
1174 |
+- hb = &f->hash[i]; |
1175 |
+- spin_lock(&hb->chain_lock); |
1176 |
+- |
1177 |
+- hlist_for_each_entry_safe(q, n, &hb->chain, list) { |
1178 |
+- unsigned int hval = inet_frag_hashfn(f, q); |
1179 |
+- |
1180 |
+- if (hval != i) { |
1181 |
+- struct inet_frag_bucket *hb_dest; |
1182 |
+- |
1183 |
+- hlist_del(&q->list); |
1184 |
+- |
1185 |
+- /* Relink to new hash chain. */ |
1186 |
+- hb_dest = &f->hash[hval]; |
1187 |
+- |
1188 |
+- /* This is the only place where we take |
1189 |
+- * another chain_lock while already holding |
1190 |
+- * one. As this will not run concurrently, |
1191 |
+- * we cannot deadlock on hb_dest lock below, if its |
1192 |
+- * already locked it will be released soon since |
1193 |
+- * other caller cannot be waiting for hb lock |
1194 |
+- * that we've taken above. |
1195 |
+- */ |
1196 |
+- spin_lock_nested(&hb_dest->chain_lock, |
1197 |
+- SINGLE_DEPTH_NESTING); |
1198 |
+- hlist_add_head(&q->list, &hb_dest->chain); |
1199 |
+- spin_unlock(&hb_dest->chain_lock); |
1200 |
+- } |
1201 |
+- } |
1202 |
+- spin_unlock(&hb->chain_lock); |
1203 |
+- } |
1204 |
+- |
1205 |
+- f->rebuild = false; |
1206 |
+- f->last_rebuild_jiffies = jiffies; |
1207 |
+-out: |
1208 |
+- write_sequnlock_bh(&f->rnd_seqlock); |
1209 |
+-} |
1210 |
+- |
1211 |
+-static bool inet_fragq_should_evict(const struct inet_frag_queue *q) |
1212 |
+-{ |
1213 |
+- if (!hlist_unhashed(&q->list_evictor)) |
1214 |
+- return false; |
1215 |
+- |
1216 |
+- return q->net->low_thresh == 0 || |
1217 |
+- frag_mem_limit(q->net) >= q->net->low_thresh; |
1218 |
+-} |
1219 |
+- |
1220 |
+-static unsigned int |
1221 |
+-inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) |
1222 |
+-{ |
1223 |
+- struct inet_frag_queue *fq; |
1224 |
+- struct hlist_node *n; |
1225 |
+- unsigned int evicted = 0; |
1226 |
+- HLIST_HEAD(expired); |
1227 |
+- |
1228 |
+- spin_lock(&hb->chain_lock); |
1229 |
+- |
1230 |
+- hlist_for_each_entry_safe(fq, n, &hb->chain, list) { |
1231 |
+- if (!inet_fragq_should_evict(fq)) |
1232 |
+- continue; |
1233 |
+- |
1234 |
+- if (!del_timer(&fq->timer)) |
1235 |
+- continue; |
1236 |
+- |
1237 |
+- hlist_add_head(&fq->list_evictor, &expired); |
1238 |
+- ++evicted; |
1239 |
+- } |
1240 |
+- |
1241 |
+- spin_unlock(&hb->chain_lock); |
1242 |
+- |
1243 |
+- hlist_for_each_entry_safe(fq, n, &expired, list_evictor) |
1244 |
+- f->frag_expire((unsigned long) fq); |
1245 |
+- |
1246 |
+- return evicted; |
1247 |
+-} |
1248 |
+- |
1249 |
+-static void inet_frag_worker(struct work_struct *work) |
1250 |
+-{ |
1251 |
+- unsigned int budget = INETFRAGS_EVICT_BUCKETS; |
1252 |
+- unsigned int i, evicted = 0; |
1253 |
+- struct inet_frags *f; |
1254 |
+- |
1255 |
+- f = container_of(work, struct inet_frags, frags_work); |
1256 |
+- |
1257 |
+- BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ); |
1258 |
+- |
1259 |
+- local_bh_disable(); |
1260 |
+- |
1261 |
+- for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { |
1262 |
+- evicted += inet_evict_bucket(f, &f->hash[i]); |
1263 |
+- i = (i + 1) & (INETFRAGS_HASHSZ - 1); |
1264 |
+- if (evicted > INETFRAGS_EVICT_MAX) |
1265 |
+- break; |
1266 |
+- } |
1267 |
+- |
1268 |
+- f->next_bucket = i; |
1269 |
+- |
1270 |
+- local_bh_enable(); |
1271 |
+- |
1272 |
+- if (f->rebuild && inet_frag_may_rebuild(f)) |
1273 |
+- inet_frag_secret_rebuild(f); |
1274 |
+-} |
1275 |
+- |
1276 |
+-static void inet_frag_schedule_worker(struct inet_frags *f) |
1277 |
+-{ |
1278 |
+- if (unlikely(!work_pending(&f->frags_work))) |
1279 |
+- schedule_work(&f->frags_work); |
1280 |
+-} |
1281 |
+- |
1282 |
+ int inet_frags_init(struct inet_frags *f) |
1283 |
+ { |
1284 |
+- int i; |
1285 |
+- |
1286 |
+- INIT_WORK(&f->frags_work, inet_frag_worker); |
1287 |
+- |
1288 |
+- for (i = 0; i < INETFRAGS_HASHSZ; i++) { |
1289 |
+- struct inet_frag_bucket *hb = &f->hash[i]; |
1290 |
+- |
1291 |
+- spin_lock_init(&hb->chain_lock); |
1292 |
+- INIT_HLIST_HEAD(&hb->chain); |
1293 |
+- } |
1294 |
+- |
1295 |
+- seqlock_init(&f->rnd_seqlock); |
1296 |
+- f->last_rebuild_jiffies = 0; |
1297 |
+ f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0, |
1298 |
+ NULL); |
1299 |
+ if (!f->frags_cachep) |
1300 |
+@@ -214,73 +59,53 @@ EXPORT_SYMBOL(inet_frags_init); |
1301 |
+ |
1302 |
+ void inet_frags_fini(struct inet_frags *f) |
1303 |
+ { |
1304 |
+- cancel_work_sync(&f->frags_work); |
1305 |
++ /* We must wait that all inet_frag_destroy_rcu() have completed. */ |
1306 |
++ rcu_barrier(); |
1307 |
++ |
1308 |
+ kmem_cache_destroy(f->frags_cachep); |
1309 |
++ f->frags_cachep = NULL; |
1310 |
+ } |
1311 |
+ EXPORT_SYMBOL(inet_frags_fini); |
1312 |
+ |
1313 |
+-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) |
1314 |
++static void inet_frags_free_cb(void *ptr, void *arg) |
1315 |
+ { |
1316 |
+- unsigned int seq; |
1317 |
+- int i; |
1318 |
++ struct inet_frag_queue *fq = ptr; |
1319 |
+ |
1320 |
+- nf->low_thresh = 0; |
1321 |
+- |
1322 |
+-evict_again: |
1323 |
+- local_bh_disable(); |
1324 |
+- seq = read_seqbegin(&f->rnd_seqlock); |
1325 |
+- |
1326 |
+- for (i = 0; i < INETFRAGS_HASHSZ ; i++) |
1327 |
+- inet_evict_bucket(f, &f->hash[i]); |
1328 |
+- |
1329 |
+- local_bh_enable(); |
1330 |
+- cond_resched(); |
1331 |
+- |
1332 |
+- if (read_seqretry(&f->rnd_seqlock, seq) || |
1333 |
+- sum_frag_mem_limit(nf)) |
1334 |
+- goto evict_again; |
1335 |
+-} |
1336 |
+-EXPORT_SYMBOL(inet_frags_exit_net); |
1337 |
+- |
1338 |
+-static struct inet_frag_bucket * |
1339 |
+-get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f) |
1340 |
+-__acquires(hb->chain_lock) |
1341 |
+-{ |
1342 |
+- struct inet_frag_bucket *hb; |
1343 |
+- unsigned int seq, hash; |
1344 |
+- |
1345 |
+- restart: |
1346 |
+- seq = read_seqbegin(&f->rnd_seqlock); |
1347 |
+- |
1348 |
+- hash = inet_frag_hashfn(f, fq); |
1349 |
+- hb = &f->hash[hash]; |
1350 |
++ /* If we can not cancel the timer, it means this frag_queue |
1351 |
++ * is already disappearing, we have nothing to do. |
1352 |
++ * Otherwise, we own a refcount until the end of this function. |
1353 |
++ */ |
1354 |
++ if (!del_timer(&fq->timer)) |
1355 |
++ return; |
1356 |
+ |
1357 |
+- spin_lock(&hb->chain_lock); |
1358 |
+- if (read_seqretry(&f->rnd_seqlock, seq)) { |
1359 |
+- spin_unlock(&hb->chain_lock); |
1360 |
+- goto restart; |
1361 |
++ spin_lock_bh(&fq->lock); |
1362 |
++ if (!(fq->flags & INET_FRAG_COMPLETE)) { |
1363 |
++ fq->flags |= INET_FRAG_COMPLETE; |
1364 |
++ atomic_dec(&fq->refcnt); |
1365 |
+ } |
1366 |
++ spin_unlock_bh(&fq->lock); |
1367 |
+ |
1368 |
+- return hb; |
1369 |
++ inet_frag_put(fq); |
1370 |
+ } |
1371 |
+ |
1372 |
+-static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) |
1373 |
++void inet_frags_exit_net(struct netns_frags *nf) |
1374 |
+ { |
1375 |
+- struct inet_frag_bucket *hb; |
1376 |
++ nf->high_thresh = 0; /* prevent creation of new frags */ |
1377 |
+ |
1378 |
+- hb = get_frag_bucket_locked(fq, f); |
1379 |
+- hlist_del(&fq->list); |
1380 |
+- fq->flags |= INET_FRAG_COMPLETE; |
1381 |
+- spin_unlock(&hb->chain_lock); |
1382 |
++ rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL); |
1383 |
+ } |
1384 |
++EXPORT_SYMBOL(inet_frags_exit_net); |
1385 |
+ |
1386 |
+-void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) |
1387 |
++void inet_frag_kill(struct inet_frag_queue *fq) |
1388 |
+ { |
1389 |
+ if (del_timer(&fq->timer)) |
1390 |
+ atomic_dec(&fq->refcnt); |
1391 |
+ |
1392 |
+ if (!(fq->flags & INET_FRAG_COMPLETE)) { |
1393 |
+- fq_unlink(fq, f); |
1394 |
++ struct netns_frags *nf = fq->net; |
1395 |
++ |
1396 |
++ fq->flags |= INET_FRAG_COMPLETE; |
1397 |
++ rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params); |
1398 |
+ atomic_dec(&fq->refcnt); |
1399 |
+ } |
1400 |
+ } |
1401 |
+@@ -294,11 +119,23 @@ static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, |
1402 |
+ kfree_skb(skb); |
1403 |
+ } |
1404 |
+ |
1405 |
+-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) |
1406 |
++static void inet_frag_destroy_rcu(struct rcu_head *head) |
1407 |
++{ |
1408 |
++ struct inet_frag_queue *q = container_of(head, struct inet_frag_queue, |
1409 |
++ rcu); |
1410 |
++ struct inet_frags *f = q->net->f; |
1411 |
++ |
1412 |
++ if (f->destructor) |
1413 |
++ f->destructor(q); |
1414 |
++ kmem_cache_free(f->frags_cachep, q); |
1415 |
++} |
1416 |
++ |
1417 |
++void inet_frag_destroy(struct inet_frag_queue *q) |
1418 |
+ { |
1419 |
+ struct sk_buff *fp; |
1420 |
+ struct netns_frags *nf; |
1421 |
+ unsigned int sum, sum_truesize = 0; |
1422 |
++ struct inet_frags *f; |
1423 |
+ |
1424 |
+ WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); |
1425 |
+ WARN_ON(del_timer(&q->timer) != 0); |
1426 |
+@@ -306,64 +143,35 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) |
1427 |
+ /* Release all fragment data. */ |
1428 |
+ fp = q->fragments; |
1429 |
+ nf = q->net; |
1430 |
+- while (fp) { |
1431 |
+- struct sk_buff *xp = fp->next; |
1432 |
+- |
1433 |
+- sum_truesize += fp->truesize; |
1434 |
+- frag_kfree_skb(nf, f, fp); |
1435 |
+- fp = xp; |
1436 |
++ f = nf->f; |
1437 |
++ if (fp) { |
1438 |
++ do { |
1439 |
++ struct sk_buff *xp = fp->next; |
1440 |
++ |
1441 |
++ sum_truesize += fp->truesize; |
1442 |
++ frag_kfree_skb(nf, f, fp); |
1443 |
++ fp = xp; |
1444 |
++ } while (fp); |
1445 |
++ } else { |
1446 |
++ sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments); |
1447 |
+ } |
1448 |
+ sum = sum_truesize + f->qsize; |
1449 |
+ |
1450 |
+- if (f->destructor) |
1451 |
+- f->destructor(q); |
1452 |
+- kmem_cache_free(f->frags_cachep, q); |
1453 |
++ call_rcu(&q->rcu, inet_frag_destroy_rcu); |
1454 |
+ |
1455 |
+ sub_frag_mem_limit(nf, sum); |
1456 |
+ } |
1457 |
+ EXPORT_SYMBOL(inet_frag_destroy); |
1458 |
+ |
1459 |
+-static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, |
1460 |
+- struct inet_frag_queue *qp_in, |
1461 |
+- struct inet_frags *f, |
1462 |
+- void *arg) |
1463 |
+-{ |
1464 |
+- struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f); |
1465 |
+- struct inet_frag_queue *qp; |
1466 |
+- |
1467 |
+-#ifdef CONFIG_SMP |
1468 |
+- /* With SMP race we have to recheck hash table, because |
1469 |
+- * such entry could have been created on other cpu before |
1470 |
+- * we acquired hash bucket lock. |
1471 |
+- */ |
1472 |
+- hlist_for_each_entry(qp, &hb->chain, list) { |
1473 |
+- if (qp->net == nf && f->match(qp, arg)) { |
1474 |
+- atomic_inc(&qp->refcnt); |
1475 |
+- spin_unlock(&hb->chain_lock); |
1476 |
+- qp_in->flags |= INET_FRAG_COMPLETE; |
1477 |
+- inet_frag_put(qp_in, f); |
1478 |
+- return qp; |
1479 |
+- } |
1480 |
+- } |
1481 |
+-#endif |
1482 |
+- qp = qp_in; |
1483 |
+- if (!mod_timer(&qp->timer, jiffies + nf->timeout)) |
1484 |
+- atomic_inc(&qp->refcnt); |
1485 |
+- |
1486 |
+- atomic_inc(&qp->refcnt); |
1487 |
+- hlist_add_head(&qp->list, &hb->chain); |
1488 |
+- |
1489 |
+- spin_unlock(&hb->chain_lock); |
1490 |
+- |
1491 |
+- return qp; |
1492 |
+-} |
1493 |
+- |
1494 |
+ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, |
1495 |
+ struct inet_frags *f, |
1496 |
+ void *arg) |
1497 |
+ { |
1498 |
+ struct inet_frag_queue *q; |
1499 |
+ |
1500 |
++ if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) |
1501 |
++ return NULL; |
1502 |
++ |
1503 |
+ q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); |
1504 |
+ if (!q) |
1505 |
+ return NULL; |
1506 |
+@@ -374,75 +182,52 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, |
1507 |
+ |
1508 |
+ setup_timer(&q->timer, f->frag_expire, (unsigned long)q); |
1509 |
+ spin_lock_init(&q->lock); |
1510 |
+- atomic_set(&q->refcnt, 1); |
1511 |
++ atomic_set(&q->refcnt, 3); |
1512 |
+ |
1513 |
+ return q; |
1514 |
+ } |
1515 |
+ |
1516 |
+ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, |
1517 |
+- struct inet_frags *f, |
1518 |
+- void *arg) |
1519 |
++ void *arg, |
1520 |
++ struct inet_frag_queue **prev) |
1521 |
+ { |
1522 |
++ struct inet_frags *f = nf->f; |
1523 |
+ struct inet_frag_queue *q; |
1524 |
+ |
1525 |
+ q = inet_frag_alloc(nf, f, arg); |
1526 |
+- if (!q) |
1527 |
+- return NULL; |
1528 |
+- |
1529 |
+- return inet_frag_intern(nf, q, f, arg); |
1530 |
+-} |
1531 |
+- |
1532 |
+-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, |
1533 |
+- struct inet_frags *f, void *key, |
1534 |
+- unsigned int hash) |
1535 |
+-{ |
1536 |
+- struct inet_frag_bucket *hb; |
1537 |
+- struct inet_frag_queue *q; |
1538 |
+- int depth = 0; |
1539 |
+- |
1540 |
+- if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) { |
1541 |
+- inet_frag_schedule_worker(f); |
1542 |
++ if (!q) { |
1543 |
++ *prev = ERR_PTR(-ENOMEM); |
1544 |
+ return NULL; |
1545 |
+ } |
1546 |
+- |
1547 |
+- if (frag_mem_limit(nf) > nf->low_thresh) |
1548 |
+- inet_frag_schedule_worker(f); |
1549 |
+- |
1550 |
+- hash &= (INETFRAGS_HASHSZ - 1); |
1551 |
+- hb = &f->hash[hash]; |
1552 |
+- |
1553 |
+- spin_lock(&hb->chain_lock); |
1554 |
+- hlist_for_each_entry(q, &hb->chain, list) { |
1555 |
+- if (q->net == nf && f->match(q, key)) { |
1556 |
+- atomic_inc(&q->refcnt); |
1557 |
+- spin_unlock(&hb->chain_lock); |
1558 |
+- return q; |
1559 |
+- } |
1560 |
+- depth++; |
1561 |
+- } |
1562 |
+- spin_unlock(&hb->chain_lock); |
1563 |
+- |
1564 |
+- if (depth <= INETFRAGS_MAXDEPTH) |
1565 |
+- return inet_frag_create(nf, f, key); |
1566 |
+- |
1567 |
+- if (inet_frag_may_rebuild(f)) { |
1568 |
+- if (!f->rebuild) |
1569 |
+- f->rebuild = true; |
1570 |
+- inet_frag_schedule_worker(f); |
1571 |
++ mod_timer(&q->timer, jiffies + nf->timeout); |
1572 |
++ |
1573 |
++ *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key, |
1574 |
++ &q->node, f->rhash_params); |
1575 |
++ if (*prev) { |
1576 |
++ q->flags |= INET_FRAG_COMPLETE; |
1577 |
++ inet_frag_kill(q); |
1578 |
++ inet_frag_destroy(q); |
1579 |
++ return NULL; |
1580 |
+ } |
1581 |
+- |
1582 |
+- return ERR_PTR(-ENOBUFS); |
1583 |
++ return q; |
1584 |
+ } |
1585 |
+-EXPORT_SYMBOL(inet_frag_find); |
1586 |
++EXPORT_SYMBOL(inet_frag_create); |
1587 |
+ |
1588 |
+-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, |
1589 |
+- const char *prefix) |
1590 |
++/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ |
1591 |
++struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) |
1592 |
+ { |
1593 |
+- static const char msg[] = "inet_frag_find: Fragment hash bucket" |
1594 |
+- " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH) |
1595 |
+- ". Dropping fragment.\n"; |
1596 |
++ struct inet_frag_queue *fq = NULL, *prev; |
1597 |
+ |
1598 |
+- if (PTR_ERR(q) == -ENOBUFS) |
1599 |
+- net_dbg_ratelimited("%s%s", prefix, msg); |
1600 |
++ rcu_read_lock(); |
1601 |
++ prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); |
1602 |
++ if (!prev) |
1603 |
++ fq = inet_frag_create(nf, key, &prev); |
1604 |
++ if (prev && !IS_ERR(prev)) { |
1605 |
++ fq = prev; |
1606 |
++ if (!atomic_inc_not_zero(&fq->refcnt)) |
1607 |
++ fq = NULL; |
1608 |
++ } |
1609 |
++ rcu_read_unlock(); |
1610 |
++ return fq; |
1611 |
+ } |
1612 |
+-EXPORT_SYMBOL(inet_frag_maybe_warn_overflow); |
1613 |
++EXPORT_SYMBOL(inet_frag_find); |
1614 |
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c |
1615 |
+index 72915658a6b1..9b09a9b5a4fe 100644 |
1616 |
+--- a/net/ipv4/ip_fragment.c |
1617 |
++++ b/net/ipv4/ip_fragment.c |
1618 |
+@@ -58,27 +58,64 @@ |
1619 |
+ static int sysctl_ipfrag_max_dist __read_mostly = 64; |
1620 |
+ static const char ip_frag_cache_name[] = "ip4-frags"; |
1621 |
+ |
1622 |
+-struct ipfrag_skb_cb |
1623 |
+-{ |
1624 |
++/* Use skb->cb to track consecutive/adjacent fragments coming at |
1625 |
++ * the end of the queue. Nodes in the rb-tree queue will |
1626 |
++ * contain "runs" of one or more adjacent fragments. |
1627 |
++ * |
1628 |
++ * Invariants: |
1629 |
++ * - next_frag is NULL at the tail of a "run"; |
1630 |
++ * - the head of a "run" has the sum of all fragment lengths in frag_run_len. |
1631 |
++ */ |
1632 |
++struct ipfrag_skb_cb { |
1633 |
+ struct inet_skb_parm h; |
1634 |
+- int offset; |
1635 |
++ struct sk_buff *next_frag; |
1636 |
++ int frag_run_len; |
1637 |
+ }; |
1638 |
+ |
1639 |
+-#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) |
1640 |
++#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) |
1641 |
++ |
1642 |
++static void ip4_frag_init_run(struct sk_buff *skb) |
1643 |
++{ |
1644 |
++ BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); |
1645 |
++ |
1646 |
++ FRAG_CB(skb)->next_frag = NULL; |
1647 |
++ FRAG_CB(skb)->frag_run_len = skb->len; |
1648 |
++} |
1649 |
++ |
1650 |
++/* Append skb to the last "run". */ |
1651 |
++static void ip4_frag_append_to_last_run(struct inet_frag_queue *q, |
1652 |
++ struct sk_buff *skb) |
1653 |
++{ |
1654 |
++ RB_CLEAR_NODE(&skb->rbnode); |
1655 |
++ FRAG_CB(skb)->next_frag = NULL; |
1656 |
++ |
1657 |
++ FRAG_CB(q->last_run_head)->frag_run_len += skb->len; |
1658 |
++ FRAG_CB(q->fragments_tail)->next_frag = skb; |
1659 |
++ q->fragments_tail = skb; |
1660 |
++} |
1661 |
++ |
1662 |
++/* Create a new "run" with the skb. */ |
1663 |
++static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb) |
1664 |
++{ |
1665 |
++ if (q->last_run_head) |
1666 |
++ rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, |
1667 |
++ &q->last_run_head->rbnode.rb_right); |
1668 |
++ else |
1669 |
++ rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); |
1670 |
++ rb_insert_color(&skb->rbnode, &q->rb_fragments); |
1671 |
++ |
1672 |
++ ip4_frag_init_run(skb); |
1673 |
++ q->fragments_tail = skb; |
1674 |
++ q->last_run_head = skb; |
1675 |
++} |
1676 |
+ |
1677 |
+ /* Describe an entry in the "incomplete datagrams" queue. */ |
1678 |
+ struct ipq { |
1679 |
+ struct inet_frag_queue q; |
1680 |
+ |
1681 |
+- u32 user; |
1682 |
+- __be32 saddr; |
1683 |
+- __be32 daddr; |
1684 |
+- __be16 id; |
1685 |
+- u8 protocol; |
1686 |
+ u8 ecn; /* RFC3168 support */ |
1687 |
+ u16 max_df_size; /* largest frag with DF set seen */ |
1688 |
+ int iif; |
1689 |
+- int vif; /* L3 master device index */ |
1690 |
+ unsigned int rid; |
1691 |
+ struct inet_peer *peer; |
1692 |
+ }; |
1693 |
+@@ -90,49 +127,9 @@ static u8 ip4_frag_ecn(u8 tos) |
1694 |
+ |
1695 |
+ static struct inet_frags ip4_frags; |
1696 |
+ |
1697 |
+-int ip_frag_mem(struct net *net) |
1698 |
+-{ |
1699 |
+- return sum_frag_mem_limit(&net->ipv4.frags); |
1700 |
+-} |
1701 |
+- |
1702 |
+-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, |
1703 |
+- struct net_device *dev); |
1704 |
+- |
1705 |
+-struct ip4_create_arg { |
1706 |
+- struct iphdr *iph; |
1707 |
+- u32 user; |
1708 |
+- int vif; |
1709 |
+-}; |
1710 |
++static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, |
1711 |
++ struct sk_buff *prev_tail, struct net_device *dev); |
1712 |
+ |
1713 |
+-static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) |
1714 |
+-{ |
1715 |
+- net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd)); |
1716 |
+- return jhash_3words((__force u32)id << 16 | prot, |
1717 |
+- (__force u32)saddr, (__force u32)daddr, |
1718 |
+- ip4_frags.rnd); |
1719 |
+-} |
1720 |
+- |
1721 |
+-static unsigned int ip4_hashfn(const struct inet_frag_queue *q) |
1722 |
+-{ |
1723 |
+- const struct ipq *ipq; |
1724 |
+- |
1725 |
+- ipq = container_of(q, struct ipq, q); |
1726 |
+- return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); |
1727 |
+-} |
1728 |
+- |
1729 |
+-static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a) |
1730 |
+-{ |
1731 |
+- const struct ipq *qp; |
1732 |
+- const struct ip4_create_arg *arg = a; |
1733 |
+- |
1734 |
+- qp = container_of(q, struct ipq, q); |
1735 |
+- return qp->id == arg->iph->id && |
1736 |
+- qp->saddr == arg->iph->saddr && |
1737 |
+- qp->daddr == arg->iph->daddr && |
1738 |
+- qp->protocol == arg->iph->protocol && |
1739 |
+- qp->user == arg->user && |
1740 |
+- qp->vif == arg->vif; |
1741 |
+-} |
1742 |
+ |
1743 |
+ static void ip4_frag_init(struct inet_frag_queue *q, const void *a) |
1744 |
+ { |
1745 |
+@@ -141,17 +138,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a) |
1746 |
+ frags); |
1747 |
+ struct net *net = container_of(ipv4, struct net, ipv4); |
1748 |
+ |
1749 |
+- const struct ip4_create_arg *arg = a; |
1750 |
++ const struct frag_v4_compare_key *key = a; |
1751 |
+ |
1752 |
+- qp->protocol = arg->iph->protocol; |
1753 |
+- qp->id = arg->iph->id; |
1754 |
+- qp->ecn = ip4_frag_ecn(arg->iph->tos); |
1755 |
+- qp->saddr = arg->iph->saddr; |
1756 |
+- qp->daddr = arg->iph->daddr; |
1757 |
+- qp->vif = arg->vif; |
1758 |
+- qp->user = arg->user; |
1759 |
++ q->key.v4 = *key; |
1760 |
++ qp->ecn = 0; |
1761 |
+ qp->peer = sysctl_ipfrag_max_dist ? |
1762 |
+- inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) : |
1763 |
++ inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) : |
1764 |
+ NULL; |
1765 |
+ } |
1766 |
+ |
1767 |
+@@ -169,7 +161,7 @@ static void ip4_frag_free(struct inet_frag_queue *q) |
1768 |
+ |
1769 |
+ static void ipq_put(struct ipq *ipq) |
1770 |
+ { |
1771 |
+- inet_frag_put(&ipq->q, &ip4_frags); |
1772 |
++ inet_frag_put(&ipq->q); |
1773 |
+ } |
1774 |
+ |
1775 |
+ /* Kill ipq entry. It is not destroyed immediately, |
1776 |
+@@ -177,7 +169,7 @@ static void ipq_put(struct ipq *ipq) |
1777 |
+ */ |
1778 |
+ static void ipq_kill(struct ipq *ipq) |
1779 |
+ { |
1780 |
+- inet_frag_kill(&ipq->q, &ip4_frags); |
1781 |
++ inet_frag_kill(&ipq->q); |
1782 |
+ } |
1783 |
+ |
1784 |
+ static bool frag_expire_skip_icmp(u32 user) |
1785 |
+@@ -194,8 +186,11 @@ static bool frag_expire_skip_icmp(u32 user) |
1786 |
+ */ |
1787 |
+ static void ip_expire(unsigned long arg) |
1788 |
+ { |
1789 |
+- struct ipq *qp; |
1790 |
++ const struct iphdr *iph; |
1791 |
++ struct sk_buff *head = NULL; |
1792 |
+ struct net *net; |
1793 |
++ struct ipq *qp; |
1794 |
++ int err; |
1795 |
+ |
1796 |
+ qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); |
1797 |
+ net = container_of(qp->q.net, struct net, ipv4.frags); |
1798 |
+@@ -208,51 +203,65 @@ static void ip_expire(unsigned long arg) |
1799 |
+ |
1800 |
+ ipq_kill(qp); |
1801 |
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); |
1802 |
++ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); |
1803 |
+ |
1804 |
+- if (!inet_frag_evicting(&qp->q)) { |
1805 |
+- struct sk_buff *clone, *head = qp->q.fragments; |
1806 |
+- const struct iphdr *iph; |
1807 |
+- int err; |
1808 |
+- |
1809 |
+- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); |
1810 |
++ if (!(qp->q.flags & INET_FRAG_FIRST_IN)) |
1811 |
++ goto out; |
1812 |
+ |
1813 |
+- if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) |
1814 |
++ /* sk_buff::dev and sk_buff::rbnode are unionized. So we |
1815 |
++ * pull the head out of the tree in order to be able to |
1816 |
++ * deal with head->dev. |
1817 |
++ */ |
1818 |
++ if (qp->q.fragments) { |
1819 |
++ head = qp->q.fragments; |
1820 |
++ qp->q.fragments = head->next; |
1821 |
++ } else { |
1822 |
++ head = skb_rb_first(&qp->q.rb_fragments); |
1823 |
++ if (!head) |
1824 |
+ goto out; |
1825 |
++ if (FRAG_CB(head)->next_frag) |
1826 |
++ rb_replace_node(&head->rbnode, |
1827 |
++ &FRAG_CB(head)->next_frag->rbnode, |
1828 |
++ &qp->q.rb_fragments); |
1829 |
++ else |
1830 |
++ rb_erase(&head->rbnode, &qp->q.rb_fragments); |
1831 |
++ memset(&head->rbnode, 0, sizeof(head->rbnode)); |
1832 |
++ barrier(); |
1833 |
++ } |
1834 |
++ if (head == qp->q.fragments_tail) |
1835 |
++ qp->q.fragments_tail = NULL; |
1836 |
+ |
1837 |
+- head->dev = dev_get_by_index_rcu(net, qp->iif); |
1838 |
+- if (!head->dev) |
1839 |
+- goto out; |
1840 |
++ sub_frag_mem_limit(qp->q.net, head->truesize); |
1841 |
++ |
1842 |
++ head->dev = dev_get_by_index_rcu(net, qp->iif); |
1843 |
++ if (!head->dev) |
1844 |
++ goto out; |
1845 |
+ |
1846 |
+ |
1847 |
+- /* skb has no dst, perform route lookup again */ |
1848 |
+- iph = ip_hdr(head); |
1849 |
+- err = ip_route_input_noref(head, iph->daddr, iph->saddr, |
1850 |
++ /* skb has no dst, perform route lookup again */ |
1851 |
++ iph = ip_hdr(head); |
1852 |
++ err = ip_route_input_noref(head, iph->daddr, iph->saddr, |
1853 |
+ iph->tos, head->dev); |
1854 |
+- if (err) |
1855 |
+- goto out; |
1856 |
++ if (err) |
1857 |
++ goto out; |
1858 |
+ |
1859 |
+- /* Only an end host needs to send an ICMP |
1860 |
+- * "Fragment Reassembly Timeout" message, per RFC792. |
1861 |
+- */ |
1862 |
+- if (frag_expire_skip_icmp(qp->user) && |
1863 |
+- (skb_rtable(head)->rt_type != RTN_LOCAL)) |
1864 |
+- goto out; |
1865 |
++ /* Only an end host needs to send an ICMP |
1866 |
++ * "Fragment Reassembly Timeout" message, per RFC792. |
1867 |
++ */ |
1868 |
++ if (frag_expire_skip_icmp(qp->q.key.v4.user) && |
1869 |
++ (skb_rtable(head)->rt_type != RTN_LOCAL)) |
1870 |
++ goto out; |
1871 |
+ |
1872 |
+- clone = skb_clone(head, GFP_ATOMIC); |
1873 |
++ spin_unlock(&qp->q.lock); |
1874 |
++ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); |
1875 |
++ goto out_rcu_unlock; |
1876 |
+ |
1877 |
+- /* Send an ICMP "Fragment Reassembly Timeout" message. */ |
1878 |
+- if (clone) { |
1879 |
+- spin_unlock(&qp->q.lock); |
1880 |
+- icmp_send(clone, ICMP_TIME_EXCEEDED, |
1881 |
+- ICMP_EXC_FRAGTIME, 0); |
1882 |
+- consume_skb(clone); |
1883 |
+- goto out_rcu_unlock; |
1884 |
+- } |
1885 |
+- } |
1886 |
+ out: |
1887 |
+ spin_unlock(&qp->q.lock); |
1888 |
+ out_rcu_unlock: |
1889 |
+ rcu_read_unlock(); |
1890 |
++ if (head) |
1891 |
++ kfree_skb(head); |
1892 |
+ ipq_put(qp); |
1893 |
+ } |
1894 |
+ |
1895 |
+@@ -262,21 +271,20 @@ out_rcu_unlock: |
1896 |
+ static struct ipq *ip_find(struct net *net, struct iphdr *iph, |
1897 |
+ u32 user, int vif) |
1898 |
+ { |
1899 |
++ struct frag_v4_compare_key key = { |
1900 |
++ .saddr = iph->saddr, |
1901 |
++ .daddr = iph->daddr, |
1902 |
++ .user = user, |
1903 |
++ .vif = vif, |
1904 |
++ .id = iph->id, |
1905 |
++ .protocol = iph->protocol, |
1906 |
++ }; |
1907 |
+ struct inet_frag_queue *q; |
1908 |
+- struct ip4_create_arg arg; |
1909 |
+- unsigned int hash; |
1910 |
+- |
1911 |
+- arg.iph = iph; |
1912 |
+- arg.user = user; |
1913 |
+- arg.vif = vif; |
1914 |
+- |
1915 |
+- hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); |
1916 |
+ |
1917 |
+- q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); |
1918 |
+- if (IS_ERR_OR_NULL(q)) { |
1919 |
+- inet_frag_maybe_warn_overflow(q, pr_fmt()); |
1920 |
++ q = inet_frag_find(&net->ipv4.frags, &key); |
1921 |
++ if (!q) |
1922 |
+ return NULL; |
1923 |
+- } |
1924 |
++ |
1925 |
+ return container_of(q, struct ipq, q); |
1926 |
+ } |
1927 |
+ |
1928 |
+@@ -296,7 +304,7 @@ static int ip_frag_too_far(struct ipq *qp) |
1929 |
+ end = atomic_inc_return(&peer->rid); |
1930 |
+ qp->rid = end; |
1931 |
+ |
1932 |
+- rc = qp->q.fragments && (end - start) > max; |
1933 |
++ rc = qp->q.fragments_tail && (end - start) > max; |
1934 |
+ |
1935 |
+ if (rc) { |
1936 |
+ struct net *net; |
1937 |
+@@ -310,7 +318,6 @@ static int ip_frag_too_far(struct ipq *qp) |
1938 |
+ |
1939 |
+ static int ip_frag_reinit(struct ipq *qp) |
1940 |
+ { |
1941 |
+- struct sk_buff *fp; |
1942 |
+ unsigned int sum_truesize = 0; |
1943 |
+ |
1944 |
+ if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) { |
1945 |
+@@ -318,21 +325,16 @@ static int ip_frag_reinit(struct ipq *qp) |
1946 |
+ return -ETIMEDOUT; |
1947 |
+ } |
1948 |
+ |
1949 |
+- fp = qp->q.fragments; |
1950 |
+- do { |
1951 |
+- struct sk_buff *xp = fp->next; |
1952 |
+- |
1953 |
+- sum_truesize += fp->truesize; |
1954 |
+- kfree_skb(fp); |
1955 |
+- fp = xp; |
1956 |
+- } while (fp); |
1957 |
++ sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments); |
1958 |
+ sub_frag_mem_limit(qp->q.net, sum_truesize); |
1959 |
+ |
1960 |
+ qp->q.flags = 0; |
1961 |
+ qp->q.len = 0; |
1962 |
+ qp->q.meat = 0; |
1963 |
+ qp->q.fragments = NULL; |
1964 |
++ qp->q.rb_fragments = RB_ROOT; |
1965 |
+ qp->q.fragments_tail = NULL; |
1966 |
++ qp->q.last_run_head = NULL; |
1967 |
+ qp->iif = 0; |
1968 |
+ qp->ecn = 0; |
1969 |
+ |
1970 |
+@@ -342,11 +344,13 @@ static int ip_frag_reinit(struct ipq *qp) |
1971 |
+ /* Add new segment to existing queue. */ |
1972 |
+ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) |
1973 |
+ { |
1974 |
+- struct sk_buff *prev, *next; |
1975 |
++ struct net *net = container_of(qp->q.net, struct net, ipv4.frags); |
1976 |
++ struct rb_node **rbn, *parent; |
1977 |
++ struct sk_buff *skb1, *prev_tail; |
1978 |
++ int ihl, end, skb1_run_end; |
1979 |
+ struct net_device *dev; |
1980 |
+ unsigned int fragsize; |
1981 |
+ int flags, offset; |
1982 |
+- int ihl, end; |
1983 |
+ int err = -ENOENT; |
1984 |
+ u8 ecn; |
1985 |
+ |
1986 |
+@@ -405,94 +409,68 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) |
1987 |
+ if (err) |
1988 |
+ goto err; |
1989 |
+ |
1990 |
+- /* Find out which fragments are in front and at the back of us |
1991 |
+- * in the chain of fragments so far. We must know where to put |
1992 |
+- * this fragment, right? |
1993 |
+- */ |
1994 |
+- prev = qp->q.fragments_tail; |
1995 |
+- if (!prev || FRAG_CB(prev)->offset < offset) { |
1996 |
+- next = NULL; |
1997 |
+- goto found; |
1998 |
+- } |
1999 |
+- prev = NULL; |
2000 |
+- for (next = qp->q.fragments; next != NULL; next = next->next) { |
2001 |
+- if (FRAG_CB(next)->offset >= offset) |
2002 |
+- break; /* bingo! */ |
2003 |
+- prev = next; |
2004 |
+- } |
2005 |
+- |
2006 |
+-found: |
2007 |
+- /* We found where to put this one. Check for overlap with |
2008 |
+- * preceding fragment, and, if needed, align things so that |
2009 |
+- * any overlaps are eliminated. |
2010 |
++ /* Note : skb->rbnode and skb->dev share the same location. */ |
2011 |
++ dev = skb->dev; |
2012 |
++ /* Makes sure compiler wont do silly aliasing games */ |
2013 |
++ barrier(); |
2014 |
++ |
2015 |
++ /* RFC5722, Section 4, amended by Errata ID : 3089 |
2016 |
++ * When reassembling an IPv6 datagram, if |
2017 |
++ * one or more its constituent fragments is determined to be an |
2018 |
++ * overlapping fragment, the entire datagram (and any constituent |
2019 |
++ * fragments) MUST be silently discarded. |
2020 |
++ * |
2021 |
++ * We do the same here for IPv4 (and increment an snmp counter) but |
2022 |
++ * we do not want to drop the whole queue in response to a duplicate |
2023 |
++ * fragment. |
2024 |
+ */ |
2025 |
+- if (prev) { |
2026 |
+- int i = (FRAG_CB(prev)->offset + prev->len) - offset; |
2027 |
+- |
2028 |
+- if (i > 0) { |
2029 |
+- offset += i; |
2030 |
+- err = -EINVAL; |
2031 |
+- if (end <= offset) |
2032 |
+- goto err; |
2033 |
+- err = -ENOMEM; |
2034 |
+- if (!pskb_pull(skb, i)) |
2035 |
+- goto err; |
2036 |
+- if (skb->ip_summed != CHECKSUM_UNNECESSARY) |
2037 |
+- skb->ip_summed = CHECKSUM_NONE; |
2038 |
+- } |
2039 |
+- } |
2040 |
+ |
2041 |
+- err = -ENOMEM; |
2042 |
+- |
2043 |
+- while (next && FRAG_CB(next)->offset < end) { |
2044 |
+- int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */ |
2045 |
+- |
2046 |
+- if (i < next->len) { |
2047 |
+- /* Eat head of the next overlapped fragment |
2048 |
+- * and leave the loop. The next ones cannot overlap. |
2049 |
+- */ |
2050 |
+- if (!pskb_pull(next, i)) |
2051 |
+- goto err; |
2052 |
+- FRAG_CB(next)->offset += i; |
2053 |
+- qp->q.meat -= i; |
2054 |
+- if (next->ip_summed != CHECKSUM_UNNECESSARY) |
2055 |
+- next->ip_summed = CHECKSUM_NONE; |
2056 |
+- break; |
2057 |
+- } else { |
2058 |
+- struct sk_buff *free_it = next; |
2059 |
+- |
2060 |
+- /* Old fragment is completely overridden with |
2061 |
+- * new one drop it. |
2062 |
+- */ |
2063 |
+- next = next->next; |
2064 |
+- |
2065 |
+- if (prev) |
2066 |
+- prev->next = next; |
2067 |
++ err = -EINVAL; |
2068 |
++ /* Find out where to put this fragment. */ |
2069 |
++ prev_tail = qp->q.fragments_tail; |
2070 |
++ if (!prev_tail) |
2071 |
++ ip4_frag_create_run(&qp->q, skb); /* First fragment. */ |
2072 |
++ else if (prev_tail->ip_defrag_offset + prev_tail->len < end) { |
2073 |
++ /* This is the common case: skb goes to the end. */ |
2074 |
++ /* Detect and discard overlaps. */ |
2075 |
++ if (offset < prev_tail->ip_defrag_offset + prev_tail->len) |
2076 |
++ goto discard_qp; |
2077 |
++ if (offset == prev_tail->ip_defrag_offset + prev_tail->len) |
2078 |
++ ip4_frag_append_to_last_run(&qp->q, skb); |
2079 |
++ else |
2080 |
++ ip4_frag_create_run(&qp->q, skb); |
2081 |
++ } else { |
2082 |
++ /* Binary search. Note that skb can become the first fragment, |
2083 |
++ * but not the last (covered above). |
2084 |
++ */ |
2085 |
++ rbn = &qp->q.rb_fragments.rb_node; |
2086 |
++ do { |
2087 |
++ parent = *rbn; |
2088 |
++ skb1 = rb_to_skb(parent); |
2089 |
++ skb1_run_end = skb1->ip_defrag_offset + |
2090 |
++ FRAG_CB(skb1)->frag_run_len; |
2091 |
++ if (end <= skb1->ip_defrag_offset) |
2092 |
++ rbn = &parent->rb_left; |
2093 |
++ else if (offset >= skb1_run_end) |
2094 |
++ rbn = &parent->rb_right; |
2095 |
++ else if (offset >= skb1->ip_defrag_offset && |
2096 |
++ end <= skb1_run_end) |
2097 |
++ goto err; /* No new data, potential duplicate */ |
2098 |
+ else |
2099 |
+- qp->q.fragments = next; |
2100 |
+- |
2101 |
+- qp->q.meat -= free_it->len; |
2102 |
+- sub_frag_mem_limit(qp->q.net, free_it->truesize); |
2103 |
+- kfree_skb(free_it); |
2104 |
+- } |
2105 |
++ goto discard_qp; /* Found an overlap */ |
2106 |
++ } while (*rbn); |
2107 |
++ /* Here we have parent properly set, and rbn pointing to |
2108 |
++ * one of its NULL left/right children. Insert skb. |
2109 |
++ */ |
2110 |
++ ip4_frag_init_run(skb); |
2111 |
++ rb_link_node(&skb->rbnode, parent, rbn); |
2112 |
++ rb_insert_color(&skb->rbnode, &qp->q.rb_fragments); |
2113 |
+ } |
2114 |
+ |
2115 |
+- FRAG_CB(skb)->offset = offset; |
2116 |
+- |
2117 |
+- /* Insert this fragment in the chain of fragments. */ |
2118 |
+- skb->next = next; |
2119 |
+- if (!next) |
2120 |
+- qp->q.fragments_tail = skb; |
2121 |
+- if (prev) |
2122 |
+- prev->next = skb; |
2123 |
+- else |
2124 |
+- qp->q.fragments = skb; |
2125 |
+- |
2126 |
+- dev = skb->dev; |
2127 |
+- if (dev) { |
2128 |
++ if (dev) |
2129 |
+ qp->iif = dev->ifindex; |
2130 |
+- skb->dev = NULL; |
2131 |
+- } |
2132 |
++ skb->ip_defrag_offset = offset; |
2133 |
++ |
2134 |
+ qp->q.stamp = skb->tstamp; |
2135 |
+ qp->q.meat += skb->len; |
2136 |
+ qp->ecn |= ecn; |
2137 |
+@@ -514,7 +492,7 @@ found: |
2138 |
+ unsigned long orefdst = skb->_skb_refdst; |
2139 |
+ |
2140 |
+ skb->_skb_refdst = 0UL; |
2141 |
+- err = ip_frag_reasm(qp, prev, dev); |
2142 |
++ err = ip_frag_reasm(qp, skb, prev_tail, dev); |
2143 |
+ skb->_skb_refdst = orefdst; |
2144 |
+ return err; |
2145 |
+ } |
2146 |
+@@ -522,20 +500,23 @@ found: |
2147 |
+ skb_dst_drop(skb); |
2148 |
+ return -EINPROGRESS; |
2149 |
+ |
2150 |
++discard_qp: |
2151 |
++ inet_frag_kill(&qp->q); |
2152 |
++ IP_INC_STATS_BH(net, IPSTATS_MIB_REASM_OVERLAPS); |
2153 |
+ err: |
2154 |
+ kfree_skb(skb); |
2155 |
+ return err; |
2156 |
+ } |
2157 |
+ |
2158 |
+- |
2159 |
+ /* Build a new IP datagram from all its fragments. */ |
2160 |
+- |
2161 |
+-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, |
2162 |
+- struct net_device *dev) |
2163 |
++static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, |
2164 |
++ struct sk_buff *prev_tail, struct net_device *dev) |
2165 |
+ { |
2166 |
+ struct net *net = container_of(qp->q.net, struct net, ipv4.frags); |
2167 |
+ struct iphdr *iph; |
2168 |
+- struct sk_buff *fp, *head = qp->q.fragments; |
2169 |
++ struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments); |
2170 |
++ struct sk_buff **nextp; /* To build frag_list. */ |
2171 |
++ struct rb_node *rbn; |
2172 |
+ int len; |
2173 |
+ int ihlen; |
2174 |
+ int err; |
2175 |
+@@ -549,26 +530,27 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, |
2176 |
+ goto out_fail; |
2177 |
+ } |
2178 |
+ /* Make the one we just received the head. */ |
2179 |
+- if (prev) { |
2180 |
+- head = prev->next; |
2181 |
+- fp = skb_clone(head, GFP_ATOMIC); |
2182 |
++ if (head != skb) { |
2183 |
++ fp = skb_clone(skb, GFP_ATOMIC); |
2184 |
+ if (!fp) |
2185 |
+ goto out_nomem; |
2186 |
+- |
2187 |
+- fp->next = head->next; |
2188 |
+- if (!fp->next) |
2189 |
++ FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; |
2190 |
++ if (RB_EMPTY_NODE(&skb->rbnode)) |
2191 |
++ FRAG_CB(prev_tail)->next_frag = fp; |
2192 |
++ else |
2193 |
++ rb_replace_node(&skb->rbnode, &fp->rbnode, |
2194 |
++ &qp->q.rb_fragments); |
2195 |
++ if (qp->q.fragments_tail == skb) |
2196 |
+ qp->q.fragments_tail = fp; |
2197 |
+- prev->next = fp; |
2198 |
+- |
2199 |
+- skb_morph(head, qp->q.fragments); |
2200 |
+- head->next = qp->q.fragments->next; |
2201 |
+- |
2202 |
+- consume_skb(qp->q.fragments); |
2203 |
+- qp->q.fragments = head; |
2204 |
++ skb_morph(skb, head); |
2205 |
++ FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; |
2206 |
++ rb_replace_node(&head->rbnode, &skb->rbnode, |
2207 |
++ &qp->q.rb_fragments); |
2208 |
++ consume_skb(head); |
2209 |
++ head = skb; |
2210 |
+ } |
2211 |
+ |
2212 |
+- WARN_ON(!head); |
2213 |
+- WARN_ON(FRAG_CB(head)->offset != 0); |
2214 |
++ WARN_ON(head->ip_defrag_offset != 0); |
2215 |
+ |
2216 |
+ /* Allocate a new buffer for the datagram. */ |
2217 |
+ ihlen = ip_hdrlen(head); |
2218 |
+@@ -592,35 +574,61 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, |
2219 |
+ clone = alloc_skb(0, GFP_ATOMIC); |
2220 |
+ if (!clone) |
2221 |
+ goto out_nomem; |
2222 |
+- clone->next = head->next; |
2223 |
+- head->next = clone; |
2224 |
+ skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; |
2225 |
+ skb_frag_list_init(head); |
2226 |
+ for (i = 0; i < skb_shinfo(head)->nr_frags; i++) |
2227 |
+ plen += skb_frag_size(&skb_shinfo(head)->frags[i]); |
2228 |
+ clone->len = clone->data_len = head->data_len - plen; |
2229 |
+- head->data_len -= clone->len; |
2230 |
+- head->len -= clone->len; |
2231 |
++ head->truesize += clone->truesize; |
2232 |
+ clone->csum = 0; |
2233 |
+ clone->ip_summed = head->ip_summed; |
2234 |
+ add_frag_mem_limit(qp->q.net, clone->truesize); |
2235 |
++ skb_shinfo(head)->frag_list = clone; |
2236 |
++ nextp = &clone->next; |
2237 |
++ } else { |
2238 |
++ nextp = &skb_shinfo(head)->frag_list; |
2239 |
+ } |
2240 |
+ |
2241 |
+- skb_shinfo(head)->frag_list = head->next; |
2242 |
+ skb_push(head, head->data - skb_network_header(head)); |
2243 |
+ |
2244 |
+- for (fp=head->next; fp; fp = fp->next) { |
2245 |
+- head->data_len += fp->len; |
2246 |
+- head->len += fp->len; |
2247 |
+- if (head->ip_summed != fp->ip_summed) |
2248 |
+- head->ip_summed = CHECKSUM_NONE; |
2249 |
+- else if (head->ip_summed == CHECKSUM_COMPLETE) |
2250 |
+- head->csum = csum_add(head->csum, fp->csum); |
2251 |
+- head->truesize += fp->truesize; |
2252 |
++ /* Traverse the tree in order, to build frag_list. */ |
2253 |
++ fp = FRAG_CB(head)->next_frag; |
2254 |
++ rbn = rb_next(&head->rbnode); |
2255 |
++ rb_erase(&head->rbnode, &qp->q.rb_fragments); |
2256 |
++ while (rbn || fp) { |
2257 |
++ /* fp points to the next sk_buff in the current run; |
2258 |
++ * rbn points to the next run. |
2259 |
++ */ |
2260 |
++ /* Go through the current run. */ |
2261 |
++ while (fp) { |
2262 |
++ *nextp = fp; |
2263 |
++ nextp = &fp->next; |
2264 |
++ fp->prev = NULL; |
2265 |
++ memset(&fp->rbnode, 0, sizeof(fp->rbnode)); |
2266 |
++ fp->sk = NULL; |
2267 |
++ head->data_len += fp->len; |
2268 |
++ head->len += fp->len; |
2269 |
++ if (head->ip_summed != fp->ip_summed) |
2270 |
++ head->ip_summed = CHECKSUM_NONE; |
2271 |
++ else if (head->ip_summed == CHECKSUM_COMPLETE) |
2272 |
++ head->csum = csum_add(head->csum, fp->csum); |
2273 |
++ head->truesize += fp->truesize; |
2274 |
++ fp = FRAG_CB(fp)->next_frag; |
2275 |
++ } |
2276 |
++ /* Move to the next run. */ |
2277 |
++ if (rbn) { |
2278 |
++ struct rb_node *rbnext = rb_next(rbn); |
2279 |
++ |
2280 |
++ fp = rb_to_skb(rbn); |
2281 |
++ rb_erase(rbn, &qp->q.rb_fragments); |
2282 |
++ rbn = rbnext; |
2283 |
++ } |
2284 |
+ } |
2285 |
+ sub_frag_mem_limit(qp->q.net, head->truesize); |
2286 |
+ |
2287 |
++ *nextp = NULL; |
2288 |
+ head->next = NULL; |
2289 |
++ head->prev = NULL; |
2290 |
+ head->dev = dev; |
2291 |
+ head->tstamp = qp->q.stamp; |
2292 |
+ IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size); |
2293 |
+@@ -648,7 +656,9 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, |
2294 |
+ |
2295 |
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); |
2296 |
+ qp->q.fragments = NULL; |
2297 |
++ qp->q.rb_fragments = RB_ROOT; |
2298 |
+ qp->q.fragments_tail = NULL; |
2299 |
++ qp->q.last_run_head = NULL; |
2300 |
+ return 0; |
2301 |
+ |
2302 |
+ out_nomem: |
2303 |
+@@ -656,7 +666,7 @@ out_nomem: |
2304 |
+ err = -ENOMEM; |
2305 |
+ goto out_fail; |
2306 |
+ out_oversize: |
2307 |
+- net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr); |
2308 |
++ net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr); |
2309 |
+ out_fail: |
2310 |
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); |
2311 |
+ return err; |
2312 |
+@@ -734,25 +744,46 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) |
2313 |
+ } |
2314 |
+ EXPORT_SYMBOL(ip_check_defrag); |
2315 |
+ |
2316 |
++unsigned int inet_frag_rbtree_purge(struct rb_root *root) |
2317 |
++{ |
2318 |
++ struct rb_node *p = rb_first(root); |
2319 |
++ unsigned int sum = 0; |
2320 |
++ |
2321 |
++ while (p) { |
2322 |
++ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); |
2323 |
++ |
2324 |
++ p = rb_next(p); |
2325 |
++ rb_erase(&skb->rbnode, root); |
2326 |
++ while (skb) { |
2327 |
++ struct sk_buff *next = FRAG_CB(skb)->next_frag; |
2328 |
++ |
2329 |
++ sum += skb->truesize; |
2330 |
++ kfree_skb(skb); |
2331 |
++ skb = next; |
2332 |
++ } |
2333 |
++ } |
2334 |
++ return sum; |
2335 |
++} |
2336 |
++EXPORT_SYMBOL(inet_frag_rbtree_purge); |
2337 |
++ |
2338 |
+ #ifdef CONFIG_SYSCTL |
2339 |
+-static int zero; |
2340 |
++static int dist_min; |
2341 |
+ |
2342 |
+ static struct ctl_table ip4_frags_ns_ctl_table[] = { |
2343 |
+ { |
2344 |
+ .procname = "ipfrag_high_thresh", |
2345 |
+ .data = &init_net.ipv4.frags.high_thresh, |
2346 |
+- .maxlen = sizeof(int), |
2347 |
++ .maxlen = sizeof(unsigned long), |
2348 |
+ .mode = 0644, |
2349 |
+- .proc_handler = proc_dointvec_minmax, |
2350 |
++ .proc_handler = proc_doulongvec_minmax, |
2351 |
+ .extra1 = &init_net.ipv4.frags.low_thresh |
2352 |
+ }, |
2353 |
+ { |
2354 |
+ .procname = "ipfrag_low_thresh", |
2355 |
+ .data = &init_net.ipv4.frags.low_thresh, |
2356 |
+- .maxlen = sizeof(int), |
2357 |
++ .maxlen = sizeof(unsigned long), |
2358 |
+ .mode = 0644, |
2359 |
+- .proc_handler = proc_dointvec_minmax, |
2360 |
+- .extra1 = &zero, |
2361 |
++ .proc_handler = proc_doulongvec_minmax, |
2362 |
+ .extra2 = &init_net.ipv4.frags.high_thresh |
2363 |
+ }, |
2364 |
+ { |
2365 |
+@@ -781,7 +812,7 @@ static struct ctl_table ip4_frags_ctl_table[] = { |
2366 |
+ .maxlen = sizeof(int), |
2367 |
+ .mode = 0644, |
2368 |
+ .proc_handler = proc_dointvec_minmax, |
2369 |
+- .extra1 = &zero |
2370 |
++ .extra1 = &dist_min, |
2371 |
+ }, |
2372 |
+ { } |
2373 |
+ }; |
2374 |
+@@ -853,6 +884,8 @@ static void __init ip4_frags_ctl_register(void) |
2375 |
+ |
2376 |
+ static int __net_init ipv4_frags_init_net(struct net *net) |
2377 |
+ { |
2378 |
++ int res; |
2379 |
++ |
2380 |
+ /* Fragment cache limits. |
2381 |
+ * |
2382 |
+ * The fragment memory accounting code, (tries to) account for |
2383 |
+@@ -876,15 +909,21 @@ static int __net_init ipv4_frags_init_net(struct net *net) |
2384 |
+ */ |
2385 |
+ net->ipv4.frags.timeout = IP_FRAG_TIME; |
2386 |
+ |
2387 |
+- inet_frags_init_net(&net->ipv4.frags); |
2388 |
++ net->ipv4.frags.f = &ip4_frags; |
2389 |
+ |
2390 |
+- return ip4_frags_ns_ctl_register(net); |
2391 |
++ res = inet_frags_init_net(&net->ipv4.frags); |
2392 |
++ if (res < 0) |
2393 |
++ return res; |
2394 |
++ res = ip4_frags_ns_ctl_register(net); |
2395 |
++ if (res < 0) |
2396 |
++ inet_frags_exit_net(&net->ipv4.frags); |
2397 |
++ return res; |
2398 |
+ } |
2399 |
+ |
2400 |
+ static void __net_exit ipv4_frags_exit_net(struct net *net) |
2401 |
+ { |
2402 |
+ ip4_frags_ns_ctl_unregister(net); |
2403 |
+- inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); |
2404 |
++ inet_frags_exit_net(&net->ipv4.frags); |
2405 |
+ } |
2406 |
+ |
2407 |
+ static struct pernet_operations ip4_frags_ops = { |
2408 |
+@@ -892,18 +931,50 @@ static struct pernet_operations ip4_frags_ops = { |
2409 |
+ .exit = ipv4_frags_exit_net, |
2410 |
+ }; |
2411 |
+ |
2412 |
++ |
2413 |
++static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed) |
2414 |
++{ |
2415 |
++ return jhash2(data, |
2416 |
++ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed); |
2417 |
++} |
2418 |
++ |
2419 |
++static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed) |
2420 |
++{ |
2421 |
++ const struct inet_frag_queue *fq = data; |
2422 |
++ |
2423 |
++ return jhash2((const u32 *)&fq->key.v4, |
2424 |
++ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed); |
2425 |
++} |
2426 |
++ |
2427 |
++static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) |
2428 |
++{ |
2429 |
++ const struct frag_v4_compare_key *key = arg->key; |
2430 |
++ const struct inet_frag_queue *fq = ptr; |
2431 |
++ |
2432 |
++ return !!memcmp(&fq->key, key, sizeof(*key)); |
2433 |
++} |
2434 |
++ |
2435 |
++static const struct rhashtable_params ip4_rhash_params = { |
2436 |
++ .head_offset = offsetof(struct inet_frag_queue, node), |
2437 |
++ .key_offset = offsetof(struct inet_frag_queue, key), |
2438 |
++ .key_len = sizeof(struct frag_v4_compare_key), |
2439 |
++ .hashfn = ip4_key_hashfn, |
2440 |
++ .obj_hashfn = ip4_obj_hashfn, |
2441 |
++ .obj_cmpfn = ip4_obj_cmpfn, |
2442 |
++ .automatic_shrinking = true, |
2443 |
++}; |
2444 |
++ |
2445 |
+ void __init ipfrag_init(void) |
2446 |
+ { |
2447 |
+- ip4_frags_ctl_register(); |
2448 |
+- register_pernet_subsys(&ip4_frags_ops); |
2449 |
+- ip4_frags.hashfn = ip4_hashfn; |
2450 |
+ ip4_frags.constructor = ip4_frag_init; |
2451 |
+ ip4_frags.destructor = ip4_frag_free; |
2452 |
+ ip4_frags.skb_free = NULL; |
2453 |
+ ip4_frags.qsize = sizeof(struct ipq); |
2454 |
+- ip4_frags.match = ip4_frag_match; |
2455 |
+ ip4_frags.frag_expire = ip_expire; |
2456 |
+ ip4_frags.frags_cache_name = ip_frag_cache_name; |
2457 |
++ ip4_frags.rhash_params = ip4_rhash_params; |
2458 |
+ if (inet_frags_init(&ip4_frags)) |
2459 |
+ panic("IP: failed to allocate ip4_frags cache\n"); |
2460 |
++ ip4_frags_ctl_register(); |
2461 |
++ register_pernet_subsys(&ip4_frags_ops); |
2462 |
+ } |
2463 |
+diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c |
2464 |
+index 3abd9d7a3adf..b001ad668108 100644 |
2465 |
+--- a/net/ipv4/proc.c |
2466 |
++++ b/net/ipv4/proc.c |
2467 |
+@@ -52,7 +52,6 @@ |
2468 |
+ static int sockstat_seq_show(struct seq_file *seq, void *v) |
2469 |
+ { |
2470 |
+ struct net *net = seq->private; |
2471 |
+- unsigned int frag_mem; |
2472 |
+ int orphans, sockets; |
2473 |
+ |
2474 |
+ local_bh_disable(); |
2475 |
+@@ -72,8 +71,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) |
2476 |
+ sock_prot_inuse_get(net, &udplite_prot)); |
2477 |
+ seq_printf(seq, "RAW: inuse %d\n", |
2478 |
+ sock_prot_inuse_get(net, &raw_prot)); |
2479 |
+- frag_mem = ip_frag_mem(net); |
2480 |
+- seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem); |
2481 |
++ seq_printf(seq, "FRAG: inuse %u memory %lu\n", |
2482 |
++ atomic_read(&net->ipv4.frags.rhashtable.nelems), |
2483 |
++ frag_mem_limit(&net->ipv4.frags)); |
2484 |
+ return 0; |
2485 |
+ } |
2486 |
+ |
2487 |
+@@ -132,6 +132,7 @@ static const struct snmp_mib snmp4_ipextstats_list[] = { |
2488 |
+ SNMP_MIB_ITEM("InECT1Pkts", IPSTATS_MIB_ECT1PKTS), |
2489 |
+ SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS), |
2490 |
+ SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS), |
2491 |
++ SNMP_MIB_ITEM("ReasmOverlaps", IPSTATS_MIB_REASM_OVERLAPS), |
2492 |
+ SNMP_MIB_SENTINEL |
2493 |
+ }; |
2494 |
+ |
2495 |
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c |
2496 |
+index 5a9ae56e7868..664c84e47bab 100644 |
2497 |
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c |
2498 |
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c |
2499 |
+@@ -64,7 +64,6 @@ struct nf_ct_frag6_skb_cb |
2500 |
+ static struct inet_frags nf_frags; |
2501 |
+ |
2502 |
+ #ifdef CONFIG_SYSCTL |
2503 |
+-static int zero; |
2504 |
+ |
2505 |
+ static struct ctl_table nf_ct_frag6_sysctl_table[] = { |
2506 |
+ { |
2507 |
+@@ -77,18 +76,17 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = { |
2508 |
+ { |
2509 |
+ .procname = "nf_conntrack_frag6_low_thresh", |
2510 |
+ .data = &init_net.nf_frag.frags.low_thresh, |
2511 |
+- .maxlen = sizeof(unsigned int), |
2512 |
++ .maxlen = sizeof(unsigned long), |
2513 |
+ .mode = 0644, |
2514 |
+- .proc_handler = proc_dointvec_minmax, |
2515 |
+- .extra1 = &zero, |
2516 |
++ .proc_handler = proc_doulongvec_minmax, |
2517 |
+ .extra2 = &init_net.nf_frag.frags.high_thresh |
2518 |
+ }, |
2519 |
+ { |
2520 |
+ .procname = "nf_conntrack_frag6_high_thresh", |
2521 |
+ .data = &init_net.nf_frag.frags.high_thresh, |
2522 |
+- .maxlen = sizeof(unsigned int), |
2523 |
++ .maxlen = sizeof(unsigned long), |
2524 |
+ .mode = 0644, |
2525 |
+- .proc_handler = proc_dointvec_minmax, |
2526 |
++ .proc_handler = proc_doulongvec_minmax, |
2527 |
+ .extra1 = &init_net.nf_frag.frags.low_thresh |
2528 |
+ }, |
2529 |
+ { } |
2530 |
+@@ -153,23 +151,6 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) |
2531 |
+ return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); |
2532 |
+ } |
2533 |
+ |
2534 |
+-static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr, |
2535 |
+- const struct in6_addr *daddr) |
2536 |
+-{ |
2537 |
+- net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd)); |
2538 |
+- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), |
2539 |
+- (__force u32)id, nf_frags.rnd); |
2540 |
+-} |
2541 |
+- |
2542 |
+- |
2543 |
+-static unsigned int nf_hashfn(const struct inet_frag_queue *q) |
2544 |
+-{ |
2545 |
+- const struct frag_queue *nq; |
2546 |
+- |
2547 |
+- nq = container_of(q, struct frag_queue, q); |
2548 |
+- return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr); |
2549 |
+-} |
2550 |
+- |
2551 |
+ static void nf_skb_free(struct sk_buff *skb) |
2552 |
+ { |
2553 |
+ if (NFCT_FRAG6_CB(skb)->orig) |
2554 |
+@@ -184,34 +165,26 @@ static void nf_ct_frag6_expire(unsigned long data) |
2555 |
+ fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); |
2556 |
+ net = container_of(fq->q.net, struct net, nf_frag.frags); |
2557 |
+ |
2558 |
+- ip6_expire_frag_queue(net, fq, &nf_frags); |
2559 |
++ ip6_expire_frag_queue(net, fq); |
2560 |
+ } |
2561 |
+ |
2562 |
+ /* Creation primitives. */ |
2563 |
+-static inline struct frag_queue *fq_find(struct net *net, __be32 id, |
2564 |
+- u32 user, struct in6_addr *src, |
2565 |
+- struct in6_addr *dst, int iif, u8 ecn) |
2566 |
++static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, |
2567 |
++ const struct ipv6hdr *hdr, int iif) |
2568 |
+ { |
2569 |
++ struct frag_v6_compare_key key = { |
2570 |
++ .id = id, |
2571 |
++ .saddr = hdr->saddr, |
2572 |
++ .daddr = hdr->daddr, |
2573 |
++ .user = user, |
2574 |
++ .iif = iif, |
2575 |
++ }; |
2576 |
+ struct inet_frag_queue *q; |
2577 |
+- struct ip6_create_arg arg; |
2578 |
+- unsigned int hash; |
2579 |
+- |
2580 |
+- arg.id = id; |
2581 |
+- arg.user = user; |
2582 |
+- arg.src = src; |
2583 |
+- arg.dst = dst; |
2584 |
+- arg.iif = iif; |
2585 |
+- arg.ecn = ecn; |
2586 |
+- |
2587 |
+- local_bh_disable(); |
2588 |
+- hash = nf_hash_frag(id, src, dst); |
2589 |
+- |
2590 |
+- q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); |
2591 |
+- local_bh_enable(); |
2592 |
+- if (IS_ERR_OR_NULL(q)) { |
2593 |
+- inet_frag_maybe_warn_overflow(q, pr_fmt()); |
2594 |
++ |
2595 |
++ q = inet_frag_find(&net->nf_frag.frags, &key); |
2596 |
++ if (!q) |
2597 |
+ return NULL; |
2598 |
+- } |
2599 |
++ |
2600 |
+ return container_of(q, struct frag_queue, q); |
2601 |
+ } |
2602 |
+ |
2603 |
+@@ -362,7 +335,7 @@ found: |
2604 |
+ return 0; |
2605 |
+ |
2606 |
+ discard_fq: |
2607 |
+- inet_frag_kill(&fq->q, &nf_frags); |
2608 |
++ inet_frag_kill(&fq->q); |
2609 |
+ err: |
2610 |
+ return -1; |
2611 |
+ } |
2612 |
+@@ -383,7 +356,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) |
2613 |
+ int payload_len; |
2614 |
+ u8 ecn; |
2615 |
+ |
2616 |
+- inet_frag_kill(&fq->q, &nf_frags); |
2617 |
++ inet_frag_kill(&fq->q); |
2618 |
+ |
2619 |
+ WARN_ON(head == NULL); |
2620 |
+ WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); |
2621 |
+@@ -454,6 +427,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) |
2622 |
+ else if (head->ip_summed == CHECKSUM_COMPLETE) |
2623 |
+ head->csum = csum_add(head->csum, fp->csum); |
2624 |
+ head->truesize += fp->truesize; |
2625 |
++ fp->sk = NULL; |
2626 |
+ } |
2627 |
+ sub_frag_mem_limit(fq->q.net, head->truesize); |
2628 |
+ |
2629 |
+@@ -472,6 +446,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) |
2630 |
+ head->csum); |
2631 |
+ |
2632 |
+ fq->q.fragments = NULL; |
2633 |
++ fq->q.rb_fragments = RB_ROOT; |
2634 |
+ fq->q.fragments_tail = NULL; |
2635 |
+ |
2636 |
+ /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ |
2637 |
+@@ -601,9 +576,13 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use |
2638 |
+ hdr = ipv6_hdr(clone); |
2639 |
+ fhdr = (struct frag_hdr *)skb_transport_header(clone); |
2640 |
+ |
2641 |
++ if (clone->len - skb_network_offset(clone) < IPV6_MIN_MTU && |
2642 |
++ fhdr->frag_off & htons(IP6_MF)) |
2643 |
++ goto ret_orig; |
2644 |
++ |
2645 |
+ skb_orphan(skb); |
2646 |
+- fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, |
2647 |
+- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); |
2648 |
++ fq = fq_find(net, fhdr->identification, user, hdr, |
2649 |
++ skb->dev ? skb->dev->ifindex : 0); |
2650 |
+ if (fq == NULL) { |
2651 |
+ pr_debug("Can't find and can't create new queue\n"); |
2652 |
+ goto ret_orig; |
2653 |
+@@ -614,7 +593,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use |
2654 |
+ if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { |
2655 |
+ spin_unlock_bh(&fq->q.lock); |
2656 |
+ pr_debug("Can't insert skb to queue\n"); |
2657 |
+- inet_frag_put(&fq->q, &nf_frags); |
2658 |
++ inet_frag_put(&fq->q); |
2659 |
+ goto ret_orig; |
2660 |
+ } |
2661 |
+ |
2662 |
+@@ -626,7 +605,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use |
2663 |
+ } |
2664 |
+ spin_unlock_bh(&fq->q.lock); |
2665 |
+ |
2666 |
+- inet_frag_put(&fq->q, &nf_frags); |
2667 |
++ inet_frag_put(&fq->q); |
2668 |
+ return ret_skb; |
2669 |
+ |
2670 |
+ ret_orig: |
2671 |
+@@ -650,18 +629,26 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig); |
2672 |
+ |
2673 |
+ static int nf_ct_net_init(struct net *net) |
2674 |
+ { |
2675 |
++ int res; |
2676 |
++ |
2677 |
+ net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; |
2678 |
+ net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; |
2679 |
+ net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; |
2680 |
+- inet_frags_init_net(&net->nf_frag.frags); |
2681 |
+- |
2682 |
+- return nf_ct_frag6_sysctl_register(net); |
2683 |
++ net->nf_frag.frags.f = &nf_frags; |
2684 |
++ |
2685 |
++ res = inet_frags_init_net(&net->nf_frag.frags); |
2686 |
++ if (res < 0) |
2687 |
++ return res; |
2688 |
++ res = nf_ct_frag6_sysctl_register(net); |
2689 |
++ if (res < 0) |
2690 |
++ inet_frags_exit_net(&net->nf_frag.frags); |
2691 |
++ return res; |
2692 |
+ } |
2693 |
+ |
2694 |
+ static void nf_ct_net_exit(struct net *net) |
2695 |
+ { |
2696 |
+ nf_ct_frags6_sysctl_unregister(net); |
2697 |
+- inet_frags_exit_net(&net->nf_frag.frags, &nf_frags); |
2698 |
++ inet_frags_exit_net(&net->nf_frag.frags); |
2699 |
+ } |
2700 |
+ |
2701 |
+ static struct pernet_operations nf_ct_net_ops = { |
2702 |
+@@ -673,14 +660,13 @@ int nf_ct_frag6_init(void) |
2703 |
+ { |
2704 |
+ int ret = 0; |
2705 |
+ |
2706 |
+- nf_frags.hashfn = nf_hashfn; |
2707 |
+ nf_frags.constructor = ip6_frag_init; |
2708 |
+ nf_frags.destructor = NULL; |
2709 |
+ nf_frags.skb_free = nf_skb_free; |
2710 |
+ nf_frags.qsize = sizeof(struct frag_queue); |
2711 |
+- nf_frags.match = ip6_frag_match; |
2712 |
+ nf_frags.frag_expire = nf_ct_frag6_expire; |
2713 |
+ nf_frags.frags_cache_name = nf_frags_cache_name; |
2714 |
++ nf_frags.rhash_params = ip6_rhash_params; |
2715 |
+ ret = inet_frags_init(&nf_frags); |
2716 |
+ if (ret) |
2717 |
+ goto out; |
2718 |
+diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c |
2719 |
+index 679253d0af84..73e766e7bc37 100644 |
2720 |
+--- a/net/ipv6/proc.c |
2721 |
++++ b/net/ipv6/proc.c |
2722 |
+@@ -33,7 +33,6 @@ |
2723 |
+ static int sockstat6_seq_show(struct seq_file *seq, void *v) |
2724 |
+ { |
2725 |
+ struct net *net = seq->private; |
2726 |
+- unsigned int frag_mem = ip6_frag_mem(net); |
2727 |
+ |
2728 |
+ seq_printf(seq, "TCP6: inuse %d\n", |
2729 |
+ sock_prot_inuse_get(net, &tcpv6_prot)); |
2730 |
+@@ -43,7 +42,9 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) |
2731 |
+ sock_prot_inuse_get(net, &udplitev6_prot)); |
2732 |
+ seq_printf(seq, "RAW6: inuse %d\n", |
2733 |
+ sock_prot_inuse_get(net, &rawv6_prot)); |
2734 |
+- seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem); |
2735 |
++ seq_printf(seq, "FRAG6: inuse %u memory %lu\n", |
2736 |
++ atomic_read(&net->ipv6.frags.rhashtable.nelems), |
2737 |
++ frag_mem_limit(&net->ipv6.frags)); |
2738 |
+ return 0; |
2739 |
+ } |
2740 |
+ |
2741 |
+diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c |
2742 |
+index 58f2139ebb5e..ec917f58d105 100644 |
2743 |
+--- a/net/ipv6/reassembly.c |
2744 |
++++ b/net/ipv6/reassembly.c |
2745 |
+@@ -79,94 +79,58 @@ static struct inet_frags ip6_frags; |
2746 |
+ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, |
2747 |
+ struct net_device *dev); |
2748 |
+ |
2749 |
+-/* |
2750 |
+- * callers should be careful not to use the hash value outside the ipfrag_lock |
2751 |
+- * as doing so could race with ipfrag_hash_rnd being recalculated. |
2752 |
+- */ |
2753 |
+-static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, |
2754 |
+- const struct in6_addr *daddr) |
2755 |
+-{ |
2756 |
+- net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd)); |
2757 |
+- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), |
2758 |
+- (__force u32)id, ip6_frags.rnd); |
2759 |
+-} |
2760 |
+- |
2761 |
+-static unsigned int ip6_hashfn(const struct inet_frag_queue *q) |
2762 |
+-{ |
2763 |
+- const struct frag_queue *fq; |
2764 |
+- |
2765 |
+- fq = container_of(q, struct frag_queue, q); |
2766 |
+- return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr); |
2767 |
+-} |
2768 |
+- |
2769 |
+-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a) |
2770 |
+-{ |
2771 |
+- const struct frag_queue *fq; |
2772 |
+- const struct ip6_create_arg *arg = a; |
2773 |
+- |
2774 |
+- fq = container_of(q, struct frag_queue, q); |
2775 |
+- return fq->id == arg->id && |
2776 |
+- fq->user == arg->user && |
2777 |
+- ipv6_addr_equal(&fq->saddr, arg->src) && |
2778 |
+- ipv6_addr_equal(&fq->daddr, arg->dst) && |
2779 |
+- (arg->iif == fq->iif || |
2780 |
+- !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST | |
2781 |
+- IPV6_ADDR_LINKLOCAL))); |
2782 |
+-} |
2783 |
+-EXPORT_SYMBOL(ip6_frag_match); |
2784 |
+- |
2785 |
+ void ip6_frag_init(struct inet_frag_queue *q, const void *a) |
2786 |
+ { |
2787 |
+ struct frag_queue *fq = container_of(q, struct frag_queue, q); |
2788 |
+- const struct ip6_create_arg *arg = a; |
2789 |
++ const struct frag_v6_compare_key *key = a; |
2790 |
+ |
2791 |
+- fq->id = arg->id; |
2792 |
+- fq->user = arg->user; |
2793 |
+- fq->saddr = *arg->src; |
2794 |
+- fq->daddr = *arg->dst; |
2795 |
+- fq->ecn = arg->ecn; |
2796 |
++ q->key.v6 = *key; |
2797 |
++ fq->ecn = 0; |
2798 |
+ } |
2799 |
+ EXPORT_SYMBOL(ip6_frag_init); |
2800 |
+ |
2801 |
+-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, |
2802 |
+- struct inet_frags *frags) |
2803 |
++void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq) |
2804 |
+ { |
2805 |
+ struct net_device *dev = NULL; |
2806 |
++ struct sk_buff *head; |
2807 |
+ |
2808 |
++ rcu_read_lock(); |
2809 |
+ spin_lock(&fq->q.lock); |
2810 |
+ |
2811 |
+ if (fq->q.flags & INET_FRAG_COMPLETE) |
2812 |
+ goto out; |
2813 |
+ |
2814 |
+- inet_frag_kill(&fq->q, frags); |
2815 |
++ inet_frag_kill(&fq->q); |
2816 |
+ |
2817 |
+- rcu_read_lock(); |
2818 |
+ dev = dev_get_by_index_rcu(net, fq->iif); |
2819 |
+ if (!dev) |
2820 |
+- goto out_rcu_unlock; |
2821 |
++ goto out; |
2822 |
+ |
2823 |
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); |
2824 |
+- |
2825 |
+- if (inet_frag_evicting(&fq->q)) |
2826 |
+- goto out_rcu_unlock; |
2827 |
+- |
2828 |
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); |
2829 |
+ |
2830 |
+ /* Don't send error if the first segment did not arrive. */ |
2831 |
+- if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments) |
2832 |
+- goto out_rcu_unlock; |
2833 |
++ head = fq->q.fragments; |
2834 |
++ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head) |
2835 |
++ goto out; |
2836 |
+ |
2837 |
+ /* But use as source device on which LAST ARRIVED |
2838 |
+ * segment was received. And do not use fq->dev |
2839 |
+ * pointer directly, device might already disappeared. |
2840 |
+ */ |
2841 |
+- fq->q.fragments->dev = dev; |
2842 |
+- icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); |
2843 |
+-out_rcu_unlock: |
2844 |
+- rcu_read_unlock(); |
2845 |
++ head->dev = dev; |
2846 |
++ skb_get(head); |
2847 |
++ spin_unlock(&fq->q.lock); |
2848 |
++ |
2849 |
++ icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); |
2850 |
++ kfree_skb(head); |
2851 |
++ goto out_rcu_unlock; |
2852 |
++ |
2853 |
+ out: |
2854 |
+ spin_unlock(&fq->q.lock); |
2855 |
+- inet_frag_put(&fq->q, frags); |
2856 |
++out_rcu_unlock: |
2857 |
++ rcu_read_unlock(); |
2858 |
++ inet_frag_put(&fq->q); |
2859 |
+ } |
2860 |
+ EXPORT_SYMBOL(ip6_expire_frag_queue); |
2861 |
+ |
2862 |
+@@ -178,31 +142,29 @@ static void ip6_frag_expire(unsigned long data) |
2863 |
+ fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); |
2864 |
+ net = container_of(fq->q.net, struct net, ipv6.frags); |
2865 |
+ |
2866 |
+- ip6_expire_frag_queue(net, fq, &ip6_frags); |
2867 |
++ ip6_expire_frag_queue(net, fq); |
2868 |
+ } |
2869 |
+ |
2870 |
+ static struct frag_queue * |
2871 |
+-fq_find(struct net *net, __be32 id, const struct in6_addr *src, |
2872 |
+- const struct in6_addr *dst, int iif, u8 ecn) |
2873 |
++fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) |
2874 |
+ { |
2875 |
++ struct frag_v6_compare_key key = { |
2876 |
++ .id = id, |
2877 |
++ .saddr = hdr->saddr, |
2878 |
++ .daddr = hdr->daddr, |
2879 |
++ .user = IP6_DEFRAG_LOCAL_DELIVER, |
2880 |
++ .iif = iif, |
2881 |
++ }; |
2882 |
+ struct inet_frag_queue *q; |
2883 |
+- struct ip6_create_arg arg; |
2884 |
+- unsigned int hash; |
2885 |
+ |
2886 |
+- arg.id = id; |
2887 |
+- arg.user = IP6_DEFRAG_LOCAL_DELIVER; |
2888 |
+- arg.src = src; |
2889 |
+- arg.dst = dst; |
2890 |
+- arg.iif = iif; |
2891 |
+- arg.ecn = ecn; |
2892 |
++ if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST | |
2893 |
++ IPV6_ADDR_LINKLOCAL))) |
2894 |
++ key.iif = 0; |
2895 |
+ |
2896 |
+- hash = inet6_hash_frag(id, src, dst); |
2897 |
+- |
2898 |
+- q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); |
2899 |
+- if (IS_ERR_OR_NULL(q)) { |
2900 |
+- inet_frag_maybe_warn_overflow(q, pr_fmt()); |
2901 |
++ q = inet_frag_find(&net->ipv6.frags, &key); |
2902 |
++ if (!q) |
2903 |
+ return NULL; |
2904 |
+- } |
2905 |
++ |
2906 |
+ return container_of(q, struct frag_queue, q); |
2907 |
+ } |
2908 |
+ |
2909 |
+@@ -359,7 +321,7 @@ found: |
2910 |
+ return -1; |
2911 |
+ |
2912 |
+ discard_fq: |
2913 |
+- inet_frag_kill(&fq->q, &ip6_frags); |
2914 |
++ inet_frag_kill(&fq->q); |
2915 |
+ err: |
2916 |
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), |
2917 |
+ IPSTATS_MIB_REASMFAILS); |
2918 |
+@@ -386,7 +348,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, |
2919 |
+ int sum_truesize; |
2920 |
+ u8 ecn; |
2921 |
+ |
2922 |
+- inet_frag_kill(&fq->q, &ip6_frags); |
2923 |
++ inet_frag_kill(&fq->q); |
2924 |
+ |
2925 |
+ ecn = ip_frag_ecn_table[fq->ecn]; |
2926 |
+ if (unlikely(ecn == 0xff)) |
2927 |
+@@ -503,6 +465,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, |
2928 |
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); |
2929 |
+ rcu_read_unlock(); |
2930 |
+ fq->q.fragments = NULL; |
2931 |
++ fq->q.rb_fragments = RB_ROOT; |
2932 |
+ fq->q.fragments_tail = NULL; |
2933 |
+ return 1; |
2934 |
+ |
2935 |
+@@ -524,6 +487,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) |
2936 |
+ struct frag_queue *fq; |
2937 |
+ const struct ipv6hdr *hdr = ipv6_hdr(skb); |
2938 |
+ struct net *net = dev_net(skb_dst(skb)->dev); |
2939 |
++ int iif; |
2940 |
+ |
2941 |
+ if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) |
2942 |
+ goto fail_hdr; |
2943 |
+@@ -552,17 +516,22 @@ static int ipv6_frag_rcv(struct sk_buff *skb) |
2944 |
+ return 1; |
2945 |
+ } |
2946 |
+ |
2947 |
+- fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, |
2948 |
+- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); |
2949 |
++ if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && |
2950 |
++ fhdr->frag_off & htons(IP6_MF)) |
2951 |
++ goto fail_hdr; |
2952 |
++ |
2953 |
++ iif = skb->dev ? skb->dev->ifindex : 0; |
2954 |
++ fq = fq_find(net, fhdr->identification, hdr, iif); |
2955 |
+ if (fq) { |
2956 |
+ int ret; |
2957 |
+ |
2958 |
+ spin_lock(&fq->q.lock); |
2959 |
+ |
2960 |
++ fq->iif = iif; |
2961 |
+ ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); |
2962 |
+ |
2963 |
+ spin_unlock(&fq->q.lock); |
2964 |
+- inet_frag_put(&fq->q, &ip6_frags); |
2965 |
++ inet_frag_put(&fq->q); |
2966 |
+ return ret; |
2967 |
+ } |
2968 |
+ |
2969 |
+@@ -583,24 +552,22 @@ static const struct inet6_protocol frag_protocol = { |
2970 |
+ }; |
2971 |
+ |
2972 |
+ #ifdef CONFIG_SYSCTL |
2973 |
+-static int zero; |
2974 |
+ |
2975 |
+ static struct ctl_table ip6_frags_ns_ctl_table[] = { |
2976 |
+ { |
2977 |
+ .procname = "ip6frag_high_thresh", |
2978 |
+ .data = &init_net.ipv6.frags.high_thresh, |
2979 |
+- .maxlen = sizeof(int), |
2980 |
++ .maxlen = sizeof(unsigned long), |
2981 |
+ .mode = 0644, |
2982 |
+- .proc_handler = proc_dointvec_minmax, |
2983 |
++ .proc_handler = proc_doulongvec_minmax, |
2984 |
+ .extra1 = &init_net.ipv6.frags.low_thresh |
2985 |
+ }, |
2986 |
+ { |
2987 |
+ .procname = "ip6frag_low_thresh", |
2988 |
+ .data = &init_net.ipv6.frags.low_thresh, |
2989 |
+- .maxlen = sizeof(int), |
2990 |
++ .maxlen = sizeof(unsigned long), |
2991 |
+ .mode = 0644, |
2992 |
+- .proc_handler = proc_dointvec_minmax, |
2993 |
+- .extra1 = &zero, |
2994 |
++ .proc_handler = proc_doulongvec_minmax, |
2995 |
+ .extra2 = &init_net.ipv6.frags.high_thresh |
2996 |
+ }, |
2997 |
+ { |
2998 |
+@@ -708,19 +675,27 @@ static void ip6_frags_sysctl_unregister(void) |
2999 |
+ |
3000 |
+ static int __net_init ipv6_frags_init_net(struct net *net) |
3001 |
+ { |
3002 |
++ int res; |
3003 |
++ |
3004 |
+ net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; |
3005 |
+ net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; |
3006 |
+ net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; |
3007 |
++ net->ipv6.frags.f = &ip6_frags; |
3008 |
+ |
3009 |
+- inet_frags_init_net(&net->ipv6.frags); |
3010 |
++ res = inet_frags_init_net(&net->ipv6.frags); |
3011 |
++ if (res < 0) |
3012 |
++ return res; |
3013 |
+ |
3014 |
+- return ip6_frags_ns_sysctl_register(net); |
3015 |
++ res = ip6_frags_ns_sysctl_register(net); |
3016 |
++ if (res < 0) |
3017 |
++ inet_frags_exit_net(&net->ipv6.frags); |
3018 |
++ return res; |
3019 |
+ } |
3020 |
+ |
3021 |
+ static void __net_exit ipv6_frags_exit_net(struct net *net) |
3022 |
+ { |
3023 |
+ ip6_frags_ns_sysctl_unregister(net); |
3024 |
+- inet_frags_exit_net(&net->ipv6.frags, &ip6_frags); |
3025 |
++ inet_frags_exit_net(&net->ipv6.frags); |
3026 |
+ } |
3027 |
+ |
3028 |
+ static struct pernet_operations ip6_frags_ops = { |
3029 |
+@@ -728,14 +703,55 @@ static struct pernet_operations ip6_frags_ops = { |
3030 |
+ .exit = ipv6_frags_exit_net, |
3031 |
+ }; |
3032 |
+ |
3033 |
++static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed) |
3034 |
++{ |
3035 |
++ return jhash2(data, |
3036 |
++ sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); |
3037 |
++} |
3038 |
++ |
3039 |
++static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed) |
3040 |
++{ |
3041 |
++ const struct inet_frag_queue *fq = data; |
3042 |
++ |
3043 |
++ return jhash2((const u32 *)&fq->key.v6, |
3044 |
++ sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); |
3045 |
++} |
3046 |
++ |
3047 |
++static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) |
3048 |
++{ |
3049 |
++ const struct frag_v6_compare_key *key = arg->key; |
3050 |
++ const struct inet_frag_queue *fq = ptr; |
3051 |
++ |
3052 |
++ return !!memcmp(&fq->key, key, sizeof(*key)); |
3053 |
++} |
3054 |
++ |
3055 |
++const struct rhashtable_params ip6_rhash_params = { |
3056 |
++ .head_offset = offsetof(struct inet_frag_queue, node), |
3057 |
++ .hashfn = ip6_key_hashfn, |
3058 |
++ .obj_hashfn = ip6_obj_hashfn, |
3059 |
++ .obj_cmpfn = ip6_obj_cmpfn, |
3060 |
++ .automatic_shrinking = true, |
3061 |
++}; |
3062 |
++EXPORT_SYMBOL(ip6_rhash_params); |
3063 |
++ |
3064 |
+ int __init ipv6_frag_init(void) |
3065 |
+ { |
3066 |
+ int ret; |
3067 |
+ |
3068 |
+- ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT); |
3069 |
++ ip6_frags.constructor = ip6_frag_init; |
3070 |
++ ip6_frags.destructor = NULL; |
3071 |
++ ip6_frags.qsize = sizeof(struct frag_queue); |
3072 |
++ ip6_frags.frag_expire = ip6_frag_expire; |
3073 |
++ ip6_frags.frags_cache_name = ip6_frag_cache_name; |
3074 |
++ ip6_frags.rhash_params = ip6_rhash_params; |
3075 |
++ ret = inet_frags_init(&ip6_frags); |
3076 |
+ if (ret) |
3077 |
+ goto out; |
3078 |
+ |
3079 |
++ ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT); |
3080 |
++ if (ret) |
3081 |
++ goto err_protocol; |
3082 |
++ |
3083 |
+ ret = ip6_frags_sysctl_register(); |
3084 |
+ if (ret) |
3085 |
+ goto err_sysctl; |
3086 |
+@@ -744,17 +760,6 @@ int __init ipv6_frag_init(void) |
3087 |
+ if (ret) |
3088 |
+ goto err_pernet; |
3089 |
+ |
3090 |
+- ip6_frags.hashfn = ip6_hashfn; |
3091 |
+- ip6_frags.constructor = ip6_frag_init; |
3092 |
+- ip6_frags.destructor = NULL; |
3093 |
+- ip6_frags.skb_free = NULL; |
3094 |
+- ip6_frags.qsize = sizeof(struct frag_queue); |
3095 |
+- ip6_frags.match = ip6_frag_match; |
3096 |
+- ip6_frags.frag_expire = ip6_frag_expire; |
3097 |
+- ip6_frags.frags_cache_name = ip6_frag_cache_name; |
3098 |
+- ret = inet_frags_init(&ip6_frags); |
3099 |
+- if (ret) |
3100 |
+- goto err_pernet; |
3101 |
+ out: |
3102 |
+ return ret; |
3103 |
+ |
3104 |
+@@ -762,6 +767,8 @@ err_pernet: |
3105 |
+ ip6_frags_sysctl_unregister(); |
3106 |
+ err_sysctl: |
3107 |
+ inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); |
3108 |
++err_protocol: |
3109 |
++ inet_frags_fini(&ip6_frags); |
3110 |
+ goto out; |
3111 |
+ } |
3112 |
+ |