1 |
commit: 206a5e2746ef7fe6e5960e2af948e1eedef7e208 |
2 |
Author: Alice Ferrazzi <alicef <AT> gentoo <DOT> org> |
3 |
AuthorDate: Wed Aug 3 14:12:37 2022 +0000 |
4 |
Commit: Alice Ferrazzi <alicef <AT> gentoo <DOT> org> |
5 |
CommitDate: Wed Aug 3 14:12:44 2022 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=206a5e27 |
7 |
|
8 |
Linux patch 5.10.135 |
9 |
|
10 |
Signed-off-by: Alice Ferrazzi <alicef <AT> gentoo.org> |
11 |
|
12 |
0000_README | 4 + |
13 |
1134_linux-5.10.135.patch | 2841 +++++++++++++++++++++++++++++++++++++++++++++ |
14 |
2 files changed, 2845 insertions(+) |
15 |
|
16 |
diff --git a/0000_README b/0000_README |
17 |
index 7292c57d..19bd6321 100644 |
18 |
--- a/0000_README |
19 |
+++ b/0000_README |
20 |
@@ -579,6 +579,10 @@ Patch: 1133_linux-5.10.134.patch |
21 |
From: http://www.kernel.org |
22 |
Desc: Linux 5.10.134 |
23 |
|
24 |
+Patch: 1134_linux-5.10.135.patch |
25 |
+From: http://www.kernel.org |
26 |
+Desc: Linux 5.10.135 |
27 |
+ |
28 |
Patch: 1500_XATTR_USER_PREFIX.patch |
29 |
From: https://bugs.gentoo.org/show_bug.cgi?id=470644 |
30 |
Desc: Support for namespace user.pax.* on tmpfs. |
31 |
|
32 |
diff --git a/1134_linux-5.10.135.patch b/1134_linux-5.10.135.patch |
33 |
new file mode 100644 |
34 |
index 00000000..435afe17 |
35 |
--- /dev/null |
36 |
+++ b/1134_linux-5.10.135.patch |
37 |
@@ -0,0 +1,2841 @@ |
38 |
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt |
39 |
+index 1a58c580b2366..8b7c26d090459 100644 |
40 |
+--- a/Documentation/admin-guide/kernel-parameters.txt |
41 |
++++ b/Documentation/admin-guide/kernel-parameters.txt |
42 |
+@@ -2873,6 +2873,7 @@ |
43 |
+ no_entry_flush [PPC] |
44 |
+ no_uaccess_flush [PPC] |
45 |
+ mmio_stale_data=off [X86] |
46 |
++ retbleed=off [X86] |
47 |
+ |
48 |
+ Exceptions: |
49 |
+ This does not have any effect on |
50 |
+@@ -2895,6 +2896,7 @@ |
51 |
+ mds=full,nosmt [X86] |
52 |
+ tsx_async_abort=full,nosmt [X86] |
53 |
+ mmio_stale_data=full,nosmt [X86] |
54 |
++ retbleed=auto,nosmt [X86] |
55 |
+ |
56 |
+ mminit_loglevel= |
57 |
+ [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this |
58 |
+diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst |
59 |
+index 0b1f3235aa773..0158dff638873 100644 |
60 |
+--- a/Documentation/networking/ip-sysctl.rst |
61 |
++++ b/Documentation/networking/ip-sysctl.rst |
62 |
+@@ -2629,7 +2629,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max |
63 |
+ Default: 4K |
64 |
+ |
65 |
+ sctp_wmem - vector of 3 INTEGERs: min, default, max |
66 |
+- Currently this tunable has no effect. |
67 |
++ Only the first value ("min") is used, "default" and "max" are |
68 |
++ ignored. |
69 |
++ |
70 |
++ min: Minimum size of send buffer that can be used by SCTP sockets. |
71 |
++ It is guaranteed to each SCTP socket (but not association) even |
72 |
++ under moderate memory pressure. |
73 |
++ |
74 |
++ Default: 4K |
75 |
+ |
76 |
+ addr_scope_policy - INTEGER |
77 |
+ Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 |
78 |
+diff --git a/Makefile b/Makefile |
79 |
+index 00dddc2ac804a..5f4dbcb433075 100644 |
80 |
+--- a/Makefile |
81 |
++++ b/Makefile |
82 |
+@@ -1,7 +1,7 @@ |
83 |
+ # SPDX-License-Identifier: GPL-2.0 |
84 |
+ VERSION = 5 |
85 |
+ PATCHLEVEL = 10 |
86 |
+-SUBLEVEL = 134 |
87 |
++SUBLEVEL = 135 |
88 |
+ EXTRAVERSION = |
89 |
+ NAME = Dare mighty things |
90 |
+ |
91 |
+diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h |
92 |
+index a81dda65c5762..45180a2cc47cb 100644 |
93 |
+--- a/arch/arm/include/asm/dma.h |
94 |
++++ b/arch/arm/include/asm/dma.h |
95 |
+@@ -10,7 +10,7 @@ |
96 |
+ #else |
97 |
+ #define MAX_DMA_ADDRESS ({ \ |
98 |
+ extern phys_addr_t arm_dma_zone_size; \ |
99 |
+- arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \ |
100 |
++ arm_dma_zone_size && arm_dma_zone_size < (0x100000000ULL - PAGE_OFFSET) ? \ |
101 |
+ (PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; }) |
102 |
+ #endif |
103 |
+ |
104 |
+diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c |
105 |
+index b99dd8e1c93f1..7ba6cf8261626 100644 |
106 |
+--- a/arch/arm/lib/xor-neon.c |
107 |
++++ b/arch/arm/lib/xor-neon.c |
108 |
+@@ -26,8 +26,9 @@ MODULE_LICENSE("GPL"); |
109 |
+ * While older versions of GCC do not generate incorrect code, they fail to |
110 |
+ * recognize the parallel nature of these functions, and emit plain ARM code, |
111 |
+ * which is known to be slower than the optimized ARM code in asm-arm/xor.h. |
112 |
++ * |
113 |
++ * #warning This code requires at least version 4.6 of GCC |
114 |
+ */ |
115 |
+-#warning This code requires at least version 4.6 of GCC |
116 |
+ #endif |
117 |
+ |
118 |
+ #pragma GCC diagnostic ignored "-Wunused-variable" |
119 |
+diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h |
120 |
+index 2c6e1c6ecbe78..4120c428dc378 100644 |
121 |
+--- a/arch/s390/include/asm/archrandom.h |
122 |
++++ b/arch/s390/include/asm/archrandom.h |
123 |
+@@ -2,7 +2,7 @@ |
124 |
+ /* |
125 |
+ * Kernel interface for the s390 arch_random_* functions |
126 |
+ * |
127 |
+- * Copyright IBM Corp. 2017, 2020 |
128 |
++ * Copyright IBM Corp. 2017, 2022 |
129 |
+ * |
130 |
+ * Author: Harald Freudenberger <freude@××××××.com> |
131 |
+ * |
132 |
+@@ -14,6 +14,7 @@ |
133 |
+ #ifdef CONFIG_ARCH_RANDOM |
134 |
+ |
135 |
+ #include <linux/static_key.h> |
136 |
++#include <linux/preempt.h> |
137 |
+ #include <linux/atomic.h> |
138 |
+ #include <asm/cpacf.h> |
139 |
+ |
140 |
+@@ -32,7 +33,8 @@ static inline bool __must_check arch_get_random_int(unsigned int *v) |
141 |
+ |
142 |
+ static inline bool __must_check arch_get_random_seed_long(unsigned long *v) |
143 |
+ { |
144 |
+- if (static_branch_likely(&s390_arch_random_available)) { |
145 |
++ if (static_branch_likely(&s390_arch_random_available) && |
146 |
++ in_task()) { |
147 |
+ cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); |
148 |
+ atomic64_add(sizeof(*v), &s390_arch_random_counter); |
149 |
+ return true; |
150 |
+@@ -42,7 +44,8 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v) |
151 |
+ |
152 |
+ static inline bool __must_check arch_get_random_seed_int(unsigned int *v) |
153 |
+ { |
154 |
+- if (static_branch_likely(&s390_arch_random_available)) { |
155 |
++ if (static_branch_likely(&s390_arch_random_available) && |
156 |
++ in_task()) { |
157 |
+ cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); |
158 |
+ atomic64_add(sizeof(*v), &s390_arch_random_counter); |
159 |
+ return true; |
160 |
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c |
161 |
+index 7896b67dda420..2e5762faf7740 100644 |
162 |
+--- a/arch/x86/kernel/cpu/bugs.c |
163 |
++++ b/arch/x86/kernel/cpu/bugs.c |
164 |
+@@ -1476,6 +1476,7 @@ static void __init spectre_v2_select_mitigation(void) |
165 |
+ * enable IBRS around firmware calls. |
166 |
+ */ |
167 |
+ if (boot_cpu_has_bug(X86_BUG_RETBLEED) && |
168 |
++ boot_cpu_has(X86_FEATURE_IBPB) && |
169 |
+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || |
170 |
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) { |
171 |
+ |
172 |
+diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c |
173 |
+index a918ca93e4f7d..df5897c90becc 100644 |
174 |
+--- a/drivers/edac/ghes_edac.c |
175 |
++++ b/drivers/edac/ghes_edac.c |
176 |
+@@ -101,9 +101,14 @@ static void dimm_setup_label(struct dimm_info *dimm, u16 handle) |
177 |
+ |
178 |
+ dmi_memdev_name(handle, &bank, &device); |
179 |
+ |
180 |
+- /* both strings must be non-zero */ |
181 |
+- if (bank && *bank && device && *device) |
182 |
+- snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device); |
183 |
++ /* |
184 |
++ * Set to a NULL string when both bank and device are zero. In this case, |
185 |
++ * the label assigned by default will be preserved. |
186 |
++ */ |
187 |
++ snprintf(dimm->label, sizeof(dimm->label), "%s%s%s", |
188 |
++ (bank && *bank) ? bank : "", |
189 |
++ (bank && *bank && device && *device) ? " " : "", |
190 |
++ (device && *device) ? device : ""); |
191 |
+ } |
192 |
+ |
193 |
+ static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry) |
194 |
+diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c |
195 |
+index 92987daa5e17d..5e72e6cb2f840 100644 |
196 |
+--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c |
197 |
++++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c |
198 |
+@@ -679,7 +679,11 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, |
199 |
+ goto out_free_dma; |
200 |
+ |
201 |
+ for (i = 0; i < npages; i += max) { |
202 |
+- args.end = start + (max << PAGE_SHIFT); |
203 |
++ if (args.start + (max << PAGE_SHIFT) > end) |
204 |
++ args.end = end; |
205 |
++ else |
206 |
++ args.end = args.start + (max << PAGE_SHIFT); |
207 |
++ |
208 |
+ ret = migrate_vma_setup(&args); |
209 |
+ if (ret) |
210 |
+ goto out_free_pfns; |
211 |
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c |
212 |
+index 11d4e3ba9af4c..1dad62ecb8a3a 100644 |
213 |
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c |
214 |
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c |
215 |
+@@ -1907,11 +1907,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, |
216 |
+ * non-zero req_queue_pairs says that user requested a new |
217 |
+ * queue count via ethtool's set_channels, so use this |
218 |
+ * value for queues distribution across traffic classes |
219 |
++ * We need at least one queue pair for the interface |
220 |
++ * to be usable as we see in else statement. |
221 |
+ */ |
222 |
+ if (vsi->req_queue_pairs > 0) |
223 |
+ vsi->num_queue_pairs = vsi->req_queue_pairs; |
224 |
+ else if (pf->flags & I40E_FLAG_MSIX_ENABLED) |
225 |
+ vsi->num_queue_pairs = pf->num_lan_msix; |
226 |
++ else |
227 |
++ vsi->num_queue_pairs = 1; |
228 |
+ } |
229 |
+ |
230 |
+ /* Number of queues per enabled TC */ |
231 |
+diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c |
232 |
+index 060897eb9cabe..7f1bf71844bce 100644 |
233 |
+--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c |
234 |
++++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c |
235 |
+@@ -652,7 +652,8 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring) |
236 |
+ rx_desc = ICE_RX_DESC(rx_ring, i); |
237 |
+ |
238 |
+ if (!(rx_desc->wb.status_error0 & |
239 |
+- cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS))) |
240 |
++ (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) | |
241 |
++ cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S))))) |
242 |
+ continue; |
243 |
+ |
244 |
+ rx_buf = &rx_ring->rx_buf[i]; |
245 |
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c |
246 |
+index aae79fdd51727..810f2bdb91645 100644 |
247 |
+--- a/drivers/net/ethernet/intel/ice/ice_main.c |
248 |
++++ b/drivers/net/ethernet/intel/ice/ice_main.c |
249 |
+@@ -5203,10 +5203,12 @@ int ice_vsi_cfg(struct ice_vsi *vsi) |
250 |
+ if (vsi->netdev) { |
251 |
+ ice_set_rx_mode(vsi->netdev); |
252 |
+ |
253 |
+- err = ice_vsi_vlan_setup(vsi); |
254 |
++ if (vsi->type != ICE_VSI_LB) { |
255 |
++ err = ice_vsi_vlan_setup(vsi); |
256 |
+ |
257 |
+- if (err) |
258 |
+- return err; |
259 |
++ if (err) |
260 |
++ return err; |
261 |
++ } |
262 |
+ } |
263 |
+ ice_vsi_cfg_dcb_rings(vsi); |
264 |
+ |
265 |
+diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c |
266 |
+index 725b0f38813a9..a2b4e3befa591 100644 |
267 |
+--- a/drivers/net/ethernet/sfc/ptp.c |
268 |
++++ b/drivers/net/ethernet/sfc/ptp.c |
269 |
+@@ -1100,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb) |
270 |
+ |
271 |
+ tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type); |
272 |
+ if (tx_queue && tx_queue->timestamping) { |
273 |
++ /* This code invokes normal driver TX code which is always |
274 |
++ * protected from softirqs when called from generic TX code, |
275 |
++ * which in turn disables preemption. Look at __dev_queue_xmit |
276 |
++ * which uses rcu_read_lock_bh disabling preemption for RCU |
277 |
++ * plus disabling softirqs. We do not need RCU reader |
278 |
++ * protection here. |
279 |
++ * |
280 |
++ * Although it is theoretically safe for current PTP TX/RX code |
281 |
++ * running without disabling softirqs, there are three good |
282 |
++ * reasond for doing so: |
283 |
++ * |
284 |
++ * 1) The code invoked is mainly implemented for non-PTP |
285 |
++ * packets and it is always executed with softirqs |
286 |
++ * disabled. |
287 |
++ * 2) This being a single PTP packet, better to not |
288 |
++ * interrupt its processing by softirqs which can lead |
289 |
++ * to high latencies. |
290 |
++ * 3) netdev_xmit_more checks preemption is disabled and |
291 |
++ * triggers a BUG_ON if not. |
292 |
++ */ |
293 |
++ local_bh_disable(); |
294 |
+ efx_enqueue_skb(tx_queue, skb); |
295 |
++ local_bh_enable(); |
296 |
+ } else { |
297 |
+ WARN_ONCE(1, "PTP channel has no timestamped tx queue\n"); |
298 |
+ dev_kfree_skb_any(skb); |
299 |
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c |
300 |
+index 789a124809e3c..70c5905a916b9 100644 |
301 |
+--- a/drivers/net/macsec.c |
302 |
++++ b/drivers/net/macsec.c |
303 |
+@@ -240,6 +240,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) |
304 |
+ #define DEFAULT_SEND_SCI true |
305 |
+ #define DEFAULT_ENCRYPT false |
306 |
+ #define DEFAULT_ENCODING_SA 0 |
307 |
++#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1)) |
308 |
+ |
309 |
+ static bool send_sci(const struct macsec_secy *secy) |
310 |
+ { |
311 |
+@@ -1694,7 +1695,7 @@ static bool validate_add_rxsa(struct nlattr **attrs) |
312 |
+ return false; |
313 |
+ |
314 |
+ if (attrs[MACSEC_SA_ATTR_PN] && |
315 |
+- *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0) |
316 |
++ nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) |
317 |
+ return false; |
318 |
+ |
319 |
+ if (attrs[MACSEC_SA_ATTR_ACTIVE]) { |
320 |
+@@ -1750,7 +1751,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) |
321 |
+ } |
322 |
+ |
323 |
+ pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; |
324 |
+- if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { |
325 |
++ if (tb_sa[MACSEC_SA_ATTR_PN] && |
326 |
++ nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { |
327 |
+ pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n", |
328 |
+ nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); |
329 |
+ rtnl_unlock(); |
330 |
+@@ -1766,7 +1768,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) |
331 |
+ if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { |
332 |
+ pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n", |
333 |
+ nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), |
334 |
+- MACSEC_SA_ATTR_SALT); |
335 |
++ MACSEC_SALT_LEN); |
336 |
+ rtnl_unlock(); |
337 |
+ return -EINVAL; |
338 |
+ } |
339 |
+@@ -1839,7 +1841,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) |
340 |
+ return 0; |
341 |
+ |
342 |
+ cleanup: |
343 |
+- kfree(rx_sa); |
344 |
++ macsec_rxsa_put(rx_sa); |
345 |
+ rtnl_unlock(); |
346 |
+ return err; |
347 |
+ } |
348 |
+@@ -1936,7 +1938,7 @@ static bool validate_add_txsa(struct nlattr **attrs) |
349 |
+ if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) |
350 |
+ return false; |
351 |
+ |
352 |
+- if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0) |
353 |
++ if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) |
354 |
+ return false; |
355 |
+ |
356 |
+ if (attrs[MACSEC_SA_ATTR_ACTIVE]) { |
357 |
+@@ -2008,7 +2010,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) |
358 |
+ if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { |
359 |
+ pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n", |
360 |
+ nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), |
361 |
+- MACSEC_SA_ATTR_SALT); |
362 |
++ MACSEC_SALT_LEN); |
363 |
+ rtnl_unlock(); |
364 |
+ return -EINVAL; |
365 |
+ } |
366 |
+@@ -2082,7 +2084,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) |
367 |
+ |
368 |
+ cleanup: |
369 |
+ secy->operational = was_operational; |
370 |
+- kfree(tx_sa); |
371 |
++ macsec_txsa_put(tx_sa); |
372 |
+ rtnl_unlock(); |
373 |
+ return err; |
374 |
+ } |
375 |
+@@ -2290,7 +2292,7 @@ static bool validate_upd_sa(struct nlattr **attrs) |
376 |
+ if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) |
377 |
+ return false; |
378 |
+ |
379 |
+- if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0) |
380 |
++ if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) |
381 |
+ return false; |
382 |
+ |
383 |
+ if (attrs[MACSEC_SA_ATTR_ACTIVE]) { |
384 |
+@@ -3737,9 +3739,6 @@ static int macsec_changelink_common(struct net_device *dev, |
385 |
+ secy->operational = tx_sa && tx_sa->active; |
386 |
+ } |
387 |
+ |
388 |
+- if (data[IFLA_MACSEC_WINDOW]) |
389 |
+- secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); |
390 |
+- |
391 |
+ if (data[IFLA_MACSEC_ENCRYPT]) |
392 |
+ tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]); |
393 |
+ |
394 |
+@@ -3785,6 +3784,16 @@ static int macsec_changelink_common(struct net_device *dev, |
395 |
+ } |
396 |
+ } |
397 |
+ |
398 |
++ if (data[IFLA_MACSEC_WINDOW]) { |
399 |
++ secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); |
400 |
++ |
401 |
++ /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window |
402 |
++ * for XPN cipher suites */ |
403 |
++ if (secy->xpn && |
404 |
++ secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW) |
405 |
++ return -EINVAL; |
406 |
++ } |
407 |
++ |
408 |
+ return 0; |
409 |
+ } |
410 |
+ |
411 |
+@@ -3814,7 +3823,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], |
412 |
+ |
413 |
+ ret = macsec_changelink_common(dev, data); |
414 |
+ if (ret) |
415 |
+- return ret; |
416 |
++ goto cleanup; |
417 |
+ |
418 |
+ /* If h/w offloading is available, propagate to the device */ |
419 |
+ if (macsec_is_offloaded(macsec)) { |
420 |
+diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c |
421 |
+index 291fa449993fb..45f295403cb55 100644 |
422 |
+--- a/drivers/net/sungem_phy.c |
423 |
++++ b/drivers/net/sungem_phy.c |
424 |
+@@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy) |
425 |
+ int can_low_power = 1; |
426 |
+ if (np == NULL || of_get_property(np, "no-autolowpower", NULL)) |
427 |
+ can_low_power = 0; |
428 |
++ of_node_put(np); |
429 |
+ if (can_low_power) { |
430 |
+ /* Enable automatic low-power */ |
431 |
+ sungem_phy_write(phy, 0x1c, 0x9002); |
432 |
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c |
433 |
+index 37178b078ee37..0a07c05a610d1 100644 |
434 |
+--- a/drivers/net/virtio_net.c |
435 |
++++ b/drivers/net/virtio_net.c |
436 |
+@@ -213,9 +213,15 @@ struct virtnet_info { |
437 |
+ /* Packet virtio header size */ |
438 |
+ u8 hdr_len; |
439 |
+ |
440 |
+- /* Work struct for refilling if we run low on memory. */ |
441 |
++ /* Work struct for delayed refilling if we run low on memory. */ |
442 |
+ struct delayed_work refill; |
443 |
+ |
444 |
++ /* Is delayed refill enabled? */ |
445 |
++ bool refill_enabled; |
446 |
++ |
447 |
++ /* The lock to synchronize the access to refill_enabled */ |
448 |
++ spinlock_t refill_lock; |
449 |
++ |
450 |
+ /* Work struct for config space updates */ |
451 |
+ struct work_struct config_work; |
452 |
+ |
453 |
+@@ -319,6 +325,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) |
454 |
+ return p; |
455 |
+ } |
456 |
+ |
457 |
++static void enable_delayed_refill(struct virtnet_info *vi) |
458 |
++{ |
459 |
++ spin_lock_bh(&vi->refill_lock); |
460 |
++ vi->refill_enabled = true; |
461 |
++ spin_unlock_bh(&vi->refill_lock); |
462 |
++} |
463 |
++ |
464 |
++static void disable_delayed_refill(struct virtnet_info *vi) |
465 |
++{ |
466 |
++ spin_lock_bh(&vi->refill_lock); |
467 |
++ vi->refill_enabled = false; |
468 |
++ spin_unlock_bh(&vi->refill_lock); |
469 |
++} |
470 |
++ |
471 |
+ static void virtqueue_napi_schedule(struct napi_struct *napi, |
472 |
+ struct virtqueue *vq) |
473 |
+ { |
474 |
+@@ -1403,8 +1423,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget, |
475 |
+ } |
476 |
+ |
477 |
+ if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { |
478 |
+- if (!try_fill_recv(vi, rq, GFP_ATOMIC)) |
479 |
+- schedule_delayed_work(&vi->refill, 0); |
480 |
++ if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { |
481 |
++ spin_lock(&vi->refill_lock); |
482 |
++ if (vi->refill_enabled) |
483 |
++ schedule_delayed_work(&vi->refill, 0); |
484 |
++ spin_unlock(&vi->refill_lock); |
485 |
++ } |
486 |
+ } |
487 |
+ |
488 |
+ u64_stats_update_begin(&rq->stats.syncp); |
489 |
+@@ -1523,6 +1547,8 @@ static int virtnet_open(struct net_device *dev) |
490 |
+ struct virtnet_info *vi = netdev_priv(dev); |
491 |
+ int i, err; |
492 |
+ |
493 |
++ enable_delayed_refill(vi); |
494 |
++ |
495 |
+ for (i = 0; i < vi->max_queue_pairs; i++) { |
496 |
+ if (i < vi->curr_queue_pairs) |
497 |
+ /* Make sure we have some buffers: if oom use wq. */ |
498 |
+@@ -1893,6 +1919,8 @@ static int virtnet_close(struct net_device *dev) |
499 |
+ struct virtnet_info *vi = netdev_priv(dev); |
500 |
+ int i; |
501 |
+ |
502 |
++ /* Make sure NAPI doesn't schedule refill work */ |
503 |
++ disable_delayed_refill(vi); |
504 |
+ /* Make sure refill_work doesn't re-enable napi! */ |
505 |
+ cancel_delayed_work_sync(&vi->refill); |
506 |
+ |
507 |
+@@ -2390,6 +2418,8 @@ static int virtnet_restore_up(struct virtio_device *vdev) |
508 |
+ |
509 |
+ virtio_device_ready(vdev); |
510 |
+ |
511 |
++ enable_delayed_refill(vi); |
512 |
++ |
513 |
+ if (netif_running(vi->dev)) { |
514 |
+ err = virtnet_open(vi->dev); |
515 |
+ if (err) |
516 |
+@@ -3092,6 +3122,7 @@ static int virtnet_probe(struct virtio_device *vdev) |
517 |
+ vdev->priv = vi; |
518 |
+ |
519 |
+ INIT_WORK(&vi->config_work, virtnet_config_changed_work); |
520 |
++ spin_lock_init(&vi->refill_lock); |
521 |
+ |
522 |
+ /* If we can receive ANY GSO packets, we must allocate large ones. */ |
523 |
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || |
524 |
+diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.c b/drivers/net/wireless/mediatek/mt7601u/usb.c |
525 |
+index 6bcc4a13ae6c7..cc772045d526f 100644 |
526 |
+--- a/drivers/net/wireless/mediatek/mt7601u/usb.c |
527 |
++++ b/drivers/net/wireless/mediatek/mt7601u/usb.c |
528 |
+@@ -26,6 +26,7 @@ static const struct usb_device_id mt7601u_device_table[] = { |
529 |
+ { USB_DEVICE(0x2717, 0x4106) }, |
530 |
+ { USB_DEVICE(0x2955, 0x0001) }, |
531 |
+ { USB_DEVICE(0x2955, 0x1001) }, |
532 |
++ { USB_DEVICE(0x2955, 0x1003) }, |
533 |
+ { USB_DEVICE(0x2a5f, 0x1000) }, |
534 |
+ { USB_DEVICE(0x7392, 0x7710) }, |
535 |
+ { 0, } |
536 |
+diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c |
537 |
+index 0f2430fb398db..576cc39077f32 100644 |
538 |
+--- a/drivers/scsi/ufs/ufshcd-pltfrm.c |
539 |
++++ b/drivers/scsi/ufs/ufshcd-pltfrm.c |
540 |
+@@ -107,9 +107,20 @@ out: |
541 |
+ return ret; |
542 |
+ } |
543 |
+ |
544 |
++static bool phandle_exists(const struct device_node *np, |
545 |
++ const char *phandle_name, int index) |
546 |
++{ |
547 |
++ struct device_node *parse_np = of_parse_phandle(np, phandle_name, index); |
548 |
++ |
549 |
++ if (parse_np) |
550 |
++ of_node_put(parse_np); |
551 |
++ |
552 |
++ return parse_np != NULL; |
553 |
++} |
554 |
++ |
555 |
+ #define MAX_PROP_SIZE 32 |
556 |
+ static int ufshcd_populate_vreg(struct device *dev, const char *name, |
557 |
+- struct ufs_vreg **out_vreg) |
558 |
++ struct ufs_vreg **out_vreg) |
559 |
+ { |
560 |
+ int ret = 0; |
561 |
+ char prop_name[MAX_PROP_SIZE]; |
562 |
+@@ -122,7 +133,7 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name, |
563 |
+ } |
564 |
+ |
565 |
+ snprintf(prop_name, MAX_PROP_SIZE, "%s-supply", name); |
566 |
+- if (!of_parse_phandle(np, prop_name, 0)) { |
567 |
++ if (!phandle_exists(np, prop_name, 0)) { |
568 |
+ dev_info(dev, "%s: Unable to find %s regulator, assuming enabled\n", |
569 |
+ __func__, prop_name); |
570 |
+ goto out; |
571 |
+diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c |
572 |
+index d563abc3e1364..914e991731300 100644 |
573 |
+--- a/fs/ntfs/attrib.c |
574 |
++++ b/fs/ntfs/attrib.c |
575 |
+@@ -592,8 +592,12 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name, |
576 |
+ a = (ATTR_RECORD*)((u8*)ctx->attr + |
577 |
+ le32_to_cpu(ctx->attr->length)); |
578 |
+ for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { |
579 |
+- if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + |
580 |
+- le32_to_cpu(ctx->mrec->bytes_allocated)) |
581 |
++ u8 *mrec_end = (u8 *)ctx->mrec + |
582 |
++ le32_to_cpu(ctx->mrec->bytes_allocated); |
583 |
++ u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) + |
584 |
++ a->name_length * sizeof(ntfschar); |
585 |
++ if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end || |
586 |
++ name_end > mrec_end) |
587 |
+ break; |
588 |
+ ctx->attr = a; |
589 |
+ if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) || |
590 |
+diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h |
591 |
+index 7993d527edae9..0a8cd8e59a92c 100644 |
592 |
+--- a/fs/ocfs2/ocfs2.h |
593 |
++++ b/fs/ocfs2/ocfs2.h |
594 |
+@@ -279,7 +279,6 @@ enum ocfs2_mount_options |
595 |
+ OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */ |
596 |
+ OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */ |
597 |
+ OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */ |
598 |
+- OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */ |
599 |
+ }; |
600 |
+ |
601 |
+ #define OCFS2_OSB_SOFT_RO 0x0001 |
602 |
+@@ -675,8 +674,7 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) |
603 |
+ |
604 |
+ static inline int ocfs2_mount_local(struct ocfs2_super *osb) |
605 |
+ { |
606 |
+- return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT) |
607 |
+- || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER)); |
608 |
++ return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); |
609 |
+ } |
610 |
+ |
611 |
+ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) |
612 |
+diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c |
613 |
+index 4da0e4b1e79bf..8caeceeaeda7c 100644 |
614 |
+--- a/fs/ocfs2/slot_map.c |
615 |
++++ b/fs/ocfs2/slot_map.c |
616 |
+@@ -254,16 +254,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, |
617 |
+ int i, ret = -ENOSPC; |
618 |
+ |
619 |
+ if ((preferred >= 0) && (preferred < si->si_num_slots)) { |
620 |
+- if (!si->si_slots[preferred].sl_valid || |
621 |
+- !si->si_slots[preferred].sl_node_num) { |
622 |
++ if (!si->si_slots[preferred].sl_valid) { |
623 |
+ ret = preferred; |
624 |
+ goto out; |
625 |
+ } |
626 |
+ } |
627 |
+ |
628 |
+ for(i = 0; i < si->si_num_slots; i++) { |
629 |
+- if (!si->si_slots[i].sl_valid || |
630 |
+- !si->si_slots[i].sl_node_num) { |
631 |
++ if (!si->si_slots[i].sl_valid) { |
632 |
+ ret = i; |
633 |
+ break; |
634 |
+ } |
635 |
+@@ -458,30 +456,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb) |
636 |
+ spin_lock(&osb->osb_lock); |
637 |
+ ocfs2_update_slot_info(si); |
638 |
+ |
639 |
+- if (ocfs2_mount_local(osb)) |
640 |
+- /* use slot 0 directly in local mode */ |
641 |
+- slot = 0; |
642 |
+- else { |
643 |
+- /* search for ourselves first and take the slot if it already |
644 |
+- * exists. Perhaps we need to mark this in a variable for our |
645 |
+- * own journal recovery? Possibly not, though we certainly |
646 |
+- * need to warn to the user */ |
647 |
+- slot = __ocfs2_node_num_to_slot(si, osb->node_num); |
648 |
++ /* search for ourselves first and take the slot if it already |
649 |
++ * exists. Perhaps we need to mark this in a variable for our |
650 |
++ * own journal recovery? Possibly not, though we certainly |
651 |
++ * need to warn to the user */ |
652 |
++ slot = __ocfs2_node_num_to_slot(si, osb->node_num); |
653 |
++ if (slot < 0) { |
654 |
++ /* if no slot yet, then just take 1st available |
655 |
++ * one. */ |
656 |
++ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); |
657 |
+ if (slot < 0) { |
658 |
+- /* if no slot yet, then just take 1st available |
659 |
+- * one. */ |
660 |
+- slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); |
661 |
+- if (slot < 0) { |
662 |
+- spin_unlock(&osb->osb_lock); |
663 |
+- mlog(ML_ERROR, "no free slots available!\n"); |
664 |
+- status = -EINVAL; |
665 |
+- goto bail; |
666 |
+- } |
667 |
+- } else |
668 |
+- printk(KERN_INFO "ocfs2: Slot %d on device (%s) was " |
669 |
+- "already allocated to this node!\n", |
670 |
+- slot, osb->dev_str); |
671 |
+- } |
672 |
++ spin_unlock(&osb->osb_lock); |
673 |
++ mlog(ML_ERROR, "no free slots available!\n"); |
674 |
++ status = -EINVAL; |
675 |
++ goto bail; |
676 |
++ } |
677 |
++ } else |
678 |
++ printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already " |
679 |
++ "allocated to this node!\n", slot, osb->dev_str); |
680 |
+ |
681 |
+ ocfs2_set_slot(si, slot, osb->node_num); |
682 |
+ osb->slot_num = slot; |
683 |
+diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c |
684 |
+index 477ad05a34ea2..c0e5f1bad499f 100644 |
685 |
+--- a/fs/ocfs2/super.c |
686 |
++++ b/fs/ocfs2/super.c |
687 |
+@@ -175,7 +175,6 @@ enum { |
688 |
+ Opt_dir_resv_level, |
689 |
+ Opt_journal_async_commit, |
690 |
+ Opt_err_cont, |
691 |
+- Opt_nocluster, |
692 |
+ Opt_err, |
693 |
+ }; |
694 |
+ |
695 |
+@@ -209,7 +208,6 @@ static const match_table_t tokens = { |
696 |
+ {Opt_dir_resv_level, "dir_resv_level=%u"}, |
697 |
+ {Opt_journal_async_commit, "journal_async_commit"}, |
698 |
+ {Opt_err_cont, "errors=continue"}, |
699 |
+- {Opt_nocluster, "nocluster"}, |
700 |
+ {Opt_err, NULL} |
701 |
+ }; |
702 |
+ |
703 |
+@@ -621,13 +619,6 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) |
704 |
+ goto out; |
705 |
+ } |
706 |
+ |
707 |
+- tmp = OCFS2_MOUNT_NOCLUSTER; |
708 |
+- if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { |
709 |
+- ret = -EINVAL; |
710 |
+- mlog(ML_ERROR, "Cannot change nocluster option on remount\n"); |
711 |
+- goto out; |
712 |
+- } |
713 |
+- |
714 |
+ tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | |
715 |
+ OCFS2_MOUNT_HB_NONE; |
716 |
+ if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { |
717 |
+@@ -868,7 +859,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, |
718 |
+ } |
719 |
+ |
720 |
+ if (ocfs2_userspace_stack(osb) && |
721 |
+- !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && |
722 |
+ strncmp(osb->osb_cluster_stack, mopt->cluster_stack, |
723 |
+ OCFS2_STACK_LABEL_LEN)) { |
724 |
+ mlog(ML_ERROR, |
725 |
+@@ -1149,11 +1139,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) |
726 |
+ osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : |
727 |
+ "ordered"); |
728 |
+ |
729 |
+- if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && |
730 |
+- !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)) |
731 |
+- printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted " |
732 |
+- "without cluster aware mode.\n", osb->dev_str); |
733 |
+- |
734 |
+ atomic_set(&osb->vol_state, VOLUME_MOUNTED); |
735 |
+ wake_up(&osb->osb_mount_event); |
736 |
+ |
737 |
+@@ -1460,9 +1445,6 @@ static int ocfs2_parse_options(struct super_block *sb, |
738 |
+ case Opt_journal_async_commit: |
739 |
+ mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; |
740 |
+ break; |
741 |
+- case Opt_nocluster: |
742 |
+- mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER; |
743 |
+- break; |
744 |
+ default: |
745 |
+ mlog(ML_ERROR, |
746 |
+ "Unrecognized mount option \"%s\" " |
747 |
+@@ -1574,9 +1556,6 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) |
748 |
+ if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) |
749 |
+ seq_printf(s, ",journal_async_commit"); |
750 |
+ |
751 |
+- if (opts & OCFS2_MOUNT_NOCLUSTER) |
752 |
+- seq_printf(s, ",nocluster"); |
753 |
+- |
754 |
+ return 0; |
755 |
+ } |
756 |
+ |
757 |
+diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h |
758 |
+index 8bd00da6d2a40..2f46ef3800aa2 100644 |
759 |
+--- a/fs/xfs/libxfs/xfs_log_format.h |
760 |
++++ b/fs/xfs/libxfs/xfs_log_format.h |
761 |
+@@ -414,7 +414,16 @@ struct xfs_log_dinode { |
762 |
+ /* start of the extended dinode, writable fields */ |
763 |
+ uint32_t di_crc; /* CRC of the inode */ |
764 |
+ uint64_t di_changecount; /* number of attribute changes */ |
765 |
+- xfs_lsn_t di_lsn; /* flush sequence */ |
766 |
++ |
767 |
++ /* |
768 |
++ * The LSN we write to this field during formatting is not a reflection |
769 |
++ * of the current on-disk LSN. It should never be used for recovery |
770 |
++ * sequencing, nor should it be recovered into the on-disk inode at all. |
771 |
++ * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk() |
772 |
++ * for details. |
773 |
++ */ |
774 |
++ xfs_lsn_t di_lsn; |
775 |
++ |
776 |
+ uint64_t di_flags2; /* more random flags */ |
777 |
+ uint32_t di_cowextsize; /* basic cow extent size for file */ |
778 |
+ uint8_t di_pad2[12]; /* more padding for future expansion */ |
779 |
+diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h |
780 |
+index 397d94775440d..1ce06173c2f55 100644 |
781 |
+--- a/fs/xfs/libxfs/xfs_types.h |
782 |
++++ b/fs/xfs/libxfs/xfs_types.h |
783 |
+@@ -21,6 +21,7 @@ typedef int32_t xfs_suminfo_t; /* type of bitmap summary info */ |
784 |
+ typedef uint32_t xfs_rtword_t; /* word type for bitmap manipulations */ |
785 |
+ |
786 |
+ typedef int64_t xfs_lsn_t; /* log sequence number */ |
787 |
++typedef int64_t xfs_csn_t; /* CIL sequence number */ |
788 |
+ |
789 |
+ typedef uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ |
790 |
+ typedef uint32_t xfs_dahash_t; /* dir/attr hash value */ |
791 |
+diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c |
792 |
+index 8c6e26d62ef28..a3d5ecccfc2cc 100644 |
793 |
+--- a/fs/xfs/xfs_buf_item.c |
794 |
++++ b/fs/xfs/xfs_buf_item.c |
795 |
+@@ -393,17 +393,8 @@ xfs_buf_item_pin( |
796 |
+ } |
797 |
+ |
798 |
+ /* |
799 |
+- * This is called to unpin the buffer associated with the buf log |
800 |
+- * item which was previously pinned with a call to xfs_buf_item_pin(). |
801 |
+- * |
802 |
+- * Also drop the reference to the buf item for the current transaction. |
803 |
+- * If the XFS_BLI_STALE flag is set and we are the last reference, |
804 |
+- * then free up the buf log item and unlock the buffer. |
805 |
+- * |
806 |
+- * If the remove flag is set we are called from uncommit in the |
807 |
+- * forced-shutdown path. If that is true and the reference count on |
808 |
+- * the log item is going to drop to zero we need to free the item's |
809 |
+- * descriptor in the transaction. |
810 |
++ * This is called to unpin the buffer associated with the buf log item which |
811 |
++ * was previously pinned with a call to xfs_buf_item_pin(). |
812 |
+ */ |
813 |
+ STATIC void |
814 |
+ xfs_buf_item_unpin( |
815 |
+@@ -420,38 +411,35 @@ xfs_buf_item_unpin( |
816 |
+ |
817 |
+ trace_xfs_buf_item_unpin(bip); |
818 |
+ |
819 |
++ /* |
820 |
++ * Drop the bli ref associated with the pin and grab the hold required |
821 |
++ * for the I/O simulation failure in the abort case. We have to do this |
822 |
++ * before the pin count drops because the AIL doesn't acquire a bli |
823 |
++ * reference. Therefore if the refcount drops to zero, the bli could |
824 |
++ * still be AIL resident and the buffer submitted for I/O (and freed on |
825 |
++ * completion) at any point before we return. This can be removed once |
826 |
++ * the AIL properly holds a reference on the bli. |
827 |
++ */ |
828 |
+ freed = atomic_dec_and_test(&bip->bli_refcount); |
829 |
+- |
830 |
++ if (freed && !stale && remove) |
831 |
++ xfs_buf_hold(bp); |
832 |
+ if (atomic_dec_and_test(&bp->b_pin_count)) |
833 |
+ wake_up_all(&bp->b_waiters); |
834 |
+ |
835 |
+- if (freed && stale) { |
836 |
++ /* nothing to do but drop the pin count if the bli is active */ |
837 |
++ if (!freed) |
838 |
++ return; |
839 |
++ |
840 |
++ if (stale) { |
841 |
+ ASSERT(bip->bli_flags & XFS_BLI_STALE); |
842 |
+ ASSERT(xfs_buf_islocked(bp)); |
843 |
+ ASSERT(bp->b_flags & XBF_STALE); |
844 |
+ ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); |
845 |
++ ASSERT(list_empty(&lip->li_trans)); |
846 |
++ ASSERT(!bp->b_transp); |
847 |
+ |
848 |
+ trace_xfs_buf_item_unpin_stale(bip); |
849 |
+ |
850 |
+- if (remove) { |
851 |
+- /* |
852 |
+- * If we are in a transaction context, we have to |
853 |
+- * remove the log item from the transaction as we are |
854 |
+- * about to release our reference to the buffer. If we |
855 |
+- * don't, the unlock that occurs later in |
856 |
+- * xfs_trans_uncommit() will try to reference the |
857 |
+- * buffer which we no longer have a hold on. |
858 |
+- */ |
859 |
+- if (!list_empty(&lip->li_trans)) |
860 |
+- xfs_trans_del_item(lip); |
861 |
+- |
862 |
+- /* |
863 |
+- * Since the transaction no longer refers to the buffer, |
864 |
+- * the buffer should no longer refer to the transaction. |
865 |
+- */ |
866 |
+- bp->b_transp = NULL; |
867 |
+- } |
868 |
+- |
869 |
+ /* |
870 |
+ * If we get called here because of an IO error, we may or may |
871 |
+ * not have the item on the AIL. xfs_trans_ail_delete() will |
872 |
+@@ -468,13 +456,13 @@ xfs_buf_item_unpin( |
873 |
+ ASSERT(bp->b_log_item == NULL); |
874 |
+ } |
875 |
+ xfs_buf_relse(bp); |
876 |
+- } else if (freed && remove) { |
877 |
++ } else if (remove) { |
878 |
+ /* |
879 |
+ * The buffer must be locked and held by the caller to simulate |
880 |
+- * an async I/O failure. |
881 |
++ * an async I/O failure. We acquired the hold for this case |
882 |
++ * before the buffer was unpinned. |
883 |
+ */ |
884 |
+ xfs_buf_lock(bp); |
885 |
+- xfs_buf_hold(bp); |
886 |
+ bp->b_flags |= XBF_ASYNC; |
887 |
+ xfs_buf_ioend_fail(bp); |
888 |
+ } |
889 |
+@@ -632,7 +620,7 @@ xfs_buf_item_release( |
890 |
+ STATIC void |
891 |
+ xfs_buf_item_committing( |
892 |
+ struct xfs_log_item *lip, |
893 |
+- xfs_lsn_t commit_lsn) |
894 |
++ xfs_csn_t seq) |
895 |
+ { |
896 |
+ return xfs_buf_item_release(lip); |
897 |
+ } |
898 |
+diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c |
899 |
+index 1d649462d731a..b374c9cee1177 100644 |
900 |
+--- a/fs/xfs/xfs_buf_item_recover.c |
901 |
++++ b/fs/xfs/xfs_buf_item_recover.c |
902 |
+@@ -796,6 +796,7 @@ xlog_recover_get_buf_lsn( |
903 |
+ switch (magicda) { |
904 |
+ case XFS_DIR3_LEAF1_MAGIC: |
905 |
+ case XFS_DIR3_LEAFN_MAGIC: |
906 |
++ case XFS_ATTR3_LEAF_MAGIC: |
907 |
+ case XFS_DA3_NODE_MAGIC: |
908 |
+ lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); |
909 |
+ uuid = &((struct xfs_da3_blkinfo *)blk)->uuid; |
910 |
+diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c |
911 |
+index 8c1fdf37ee8f0..8ed47b739b6cc 100644 |
912 |
+--- a/fs/xfs/xfs_dquot_item.c |
913 |
++++ b/fs/xfs/xfs_dquot_item.c |
914 |
+@@ -188,7 +188,7 @@ xfs_qm_dquot_logitem_release( |
915 |
+ STATIC void |
916 |
+ xfs_qm_dquot_logitem_committing( |
917 |
+ struct xfs_log_item *lip, |
918 |
+- xfs_lsn_t commit_lsn) |
919 |
++ xfs_csn_t seq) |
920 |
+ { |
921 |
+ return xfs_qm_dquot_logitem_release(lip); |
922 |
+ } |
923 |
+diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c |
924 |
+index 5b0f93f738372..4d6bf8d4974fe 100644 |
925 |
+--- a/fs/xfs/xfs_file.c |
926 |
++++ b/fs/xfs/xfs_file.c |
927 |
+@@ -118,6 +118,54 @@ xfs_dir_fsync( |
928 |
+ return xfs_log_force_inode(ip); |
929 |
+ } |
930 |
+ |
931 |
++static xfs_csn_t |
932 |
++xfs_fsync_seq( |
933 |
++ struct xfs_inode *ip, |
934 |
++ bool datasync) |
935 |
++{ |
936 |
++ if (!xfs_ipincount(ip)) |
937 |
++ return 0; |
938 |
++ if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) |
939 |
++ return 0; |
940 |
++ return ip->i_itemp->ili_commit_seq; |
941 |
++} |
942 |
++ |
943 |
++/* |
944 |
++ * All metadata updates are logged, which means that we just have to flush the |
945 |
++ * log up to the latest LSN that touched the inode. |
946 |
++ * |
947 |
++ * If we have concurrent fsync/fdatasync() calls, we need them to all block on |
948 |
++ * the log force before we clear the ili_fsync_fields field. This ensures that |
949 |
++ * we don't get a racing sync operation that does not wait for the metadata to |
950 |
++ * hit the journal before returning. If we race with clearing ili_fsync_fields, |
951 |
++ * then all that will happen is the log force will do nothing as the lsn will |
952 |
++ * already be on disk. We can't race with setting ili_fsync_fields because that |
953 |
++ * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock |
954 |
++ * shared until after the ili_fsync_fields is cleared. |
955 |
++ */ |
956 |
++static int |
957 |
++xfs_fsync_flush_log( |
958 |
++ struct xfs_inode *ip, |
959 |
++ bool datasync, |
960 |
++ int *log_flushed) |
961 |
++{ |
962 |
++ int error = 0; |
963 |
++ xfs_csn_t seq; |
964 |
++ |
965 |
++ xfs_ilock(ip, XFS_ILOCK_SHARED); |
966 |
++ seq = xfs_fsync_seq(ip, datasync); |
967 |
++ if (seq) { |
968 |
++ error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, |
969 |
++ log_flushed); |
970 |
++ |
971 |
++ spin_lock(&ip->i_itemp->ili_lock); |
972 |
++ ip->i_itemp->ili_fsync_fields = 0; |
973 |
++ spin_unlock(&ip->i_itemp->ili_lock); |
974 |
++ } |
975 |
++ xfs_iunlock(ip, XFS_ILOCK_SHARED); |
976 |
++ return error; |
977 |
++} |
978 |
++ |
979 |
+ STATIC int |
980 |
+ xfs_file_fsync( |
981 |
+ struct file *file, |
982 |
+@@ -125,13 +173,10 @@ xfs_file_fsync( |
983 |
+ loff_t end, |
984 |
+ int datasync) |
985 |
+ { |
986 |
+- struct inode *inode = file->f_mapping->host; |
987 |
+- struct xfs_inode *ip = XFS_I(inode); |
988 |
+- struct xfs_inode_log_item *iip = ip->i_itemp; |
989 |
++ struct xfs_inode *ip = XFS_I(file->f_mapping->host); |
990 |
+ struct xfs_mount *mp = ip->i_mount; |
991 |
+ int error = 0; |
992 |
+ int log_flushed = 0; |
993 |
+- xfs_lsn_t lsn = 0; |
994 |
+ |
995 |
+ trace_xfs_file_fsync(ip); |
996 |
+ |
997 |
+@@ -155,33 +200,7 @@ xfs_file_fsync( |
998 |
+ else if (mp->m_logdev_targp != mp->m_ddev_targp) |
999 |
+ xfs_blkdev_issue_flush(mp->m_ddev_targp); |
1000 |
+ |
1001 |
+- /* |
1002 |
+- * All metadata updates are logged, which means that we just have to |
1003 |
+- * flush the log up to the latest LSN that touched the inode. If we have |
1004 |
+- * concurrent fsync/fdatasync() calls, we need them to all block on the |
1005 |
+- * log force before we clear the ili_fsync_fields field. This ensures |
1006 |
+- * that we don't get a racing sync operation that does not wait for the |
1007 |
+- * metadata to hit the journal before returning. If we race with |
1008 |
+- * clearing the ili_fsync_fields, then all that will happen is the log |
1009 |
+- * force will do nothing as the lsn will already be on disk. We can't |
1010 |
+- * race with setting ili_fsync_fields because that is done under |
1011 |
+- * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared |
1012 |
+- * until after the ili_fsync_fields is cleared. |
1013 |
+- */ |
1014 |
+- xfs_ilock(ip, XFS_ILOCK_SHARED); |
1015 |
+- if (xfs_ipincount(ip)) { |
1016 |
+- if (!datasync || |
1017 |
+- (iip->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) |
1018 |
+- lsn = iip->ili_last_lsn; |
1019 |
+- } |
1020 |
+- |
1021 |
+- if (lsn) { |
1022 |
+- error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); |
1023 |
+- spin_lock(&iip->ili_lock); |
1024 |
+- iip->ili_fsync_fields = 0; |
1025 |
+- spin_unlock(&iip->ili_lock); |
1026 |
+- } |
1027 |
+- xfs_iunlock(ip, XFS_ILOCK_SHARED); |
1028 |
++ error = xfs_fsync_flush_log(ip, datasync, &log_flushed); |
1029 |
+ |
1030 |
+ /* |
1031 |
+ * If we only have a single device, and the log force about was |
1032 |
+diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c |
1033 |
+index 03497741aef74..1f61e085676b3 100644 |
1034 |
+--- a/fs/xfs/xfs_inode.c |
1035 |
++++ b/fs/xfs/xfs_inode.c |
1036 |
+@@ -2754,7 +2754,7 @@ xfs_iunpin( |
1037 |
+ trace_xfs_inode_unpin_nowait(ip, _RET_IP_); |
1038 |
+ |
1039 |
+ /* Give the log a push to start the unpinning I/O */ |
1040 |
+- xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL); |
1041 |
++ xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL); |
1042 |
+ |
1043 |
+ } |
1044 |
+ |
1045 |
+@@ -3716,16 +3716,16 @@ int |
1046 |
+ xfs_log_force_inode( |
1047 |
+ struct xfs_inode *ip) |
1048 |
+ { |
1049 |
+- xfs_lsn_t lsn = 0; |
1050 |
++ xfs_csn_t seq = 0; |
1051 |
+ |
1052 |
+ xfs_ilock(ip, XFS_ILOCK_SHARED); |
1053 |
+ if (xfs_ipincount(ip)) |
1054 |
+- lsn = ip->i_itemp->ili_last_lsn; |
1055 |
++ seq = ip->i_itemp->ili_commit_seq; |
1056 |
+ xfs_iunlock(ip, XFS_ILOCK_SHARED); |
1057 |
+ |
1058 |
+- if (!lsn) |
1059 |
++ if (!seq) |
1060 |
+ return 0; |
1061 |
+- return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL); |
1062 |
++ return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, NULL); |
1063 |
+ } |
1064 |
+ |
1065 |
+ /* |
1066 |
+diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c |
1067 |
+index 6ff91e5bf3cd7..3aba4559469f1 100644 |
1068 |
+--- a/fs/xfs/xfs_inode_item.c |
1069 |
++++ b/fs/xfs/xfs_inode_item.c |
1070 |
+@@ -617,9 +617,9 @@ xfs_inode_item_committed( |
1071 |
+ STATIC void |
1072 |
+ xfs_inode_item_committing( |
1073 |
+ struct xfs_log_item *lip, |
1074 |
+- xfs_lsn_t commit_lsn) |
1075 |
++ xfs_csn_t seq) |
1076 |
+ { |
1077 |
+- INODE_ITEM(lip)->ili_last_lsn = commit_lsn; |
1078 |
++ INODE_ITEM(lip)->ili_commit_seq = seq; |
1079 |
+ return xfs_inode_item_release(lip); |
1080 |
+ } |
1081 |
+ |
1082 |
+diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h |
1083 |
+index 4b926e32831c0..403b45ab9aa28 100644 |
1084 |
+--- a/fs/xfs/xfs_inode_item.h |
1085 |
++++ b/fs/xfs/xfs_inode_item.h |
1086 |
+@@ -33,7 +33,7 @@ struct xfs_inode_log_item { |
1087 |
+ unsigned int ili_fields; /* fields to be logged */ |
1088 |
+ unsigned int ili_fsync_fields; /* logged since last fsync */ |
1089 |
+ xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ |
1090 |
+- xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ |
1091 |
++ xfs_csn_t ili_commit_seq; /* last transaction commit */ |
1092 |
+ }; |
1093 |
+ |
1094 |
+ static inline int xfs_inode_clean(struct xfs_inode *ip) |
1095 |
+diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c |
1096 |
+index cb44f7653f03b..538724f9f85ca 100644 |
1097 |
+--- a/fs/xfs/xfs_inode_item_recover.c |
1098 |
++++ b/fs/xfs/xfs_inode_item_recover.c |
1099 |
+@@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts( |
1100 |
+ STATIC void |
1101 |
+ xfs_log_dinode_to_disk( |
1102 |
+ struct xfs_log_dinode *from, |
1103 |
+- struct xfs_dinode *to) |
1104 |
++ struct xfs_dinode *to, |
1105 |
++ xfs_lsn_t lsn) |
1106 |
+ { |
1107 |
+ to->di_magic = cpu_to_be16(from->di_magic); |
1108 |
+ to->di_mode = cpu_to_be16(from->di_mode); |
1109 |
+@@ -182,7 +183,7 @@ xfs_log_dinode_to_disk( |
1110 |
+ to->di_flags2 = cpu_to_be64(from->di_flags2); |
1111 |
+ to->di_cowextsize = cpu_to_be32(from->di_cowextsize); |
1112 |
+ to->di_ino = cpu_to_be64(from->di_ino); |
1113 |
+- to->di_lsn = cpu_to_be64(from->di_lsn); |
1114 |
++ to->di_lsn = cpu_to_be64(lsn); |
1115 |
+ memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); |
1116 |
+ uuid_copy(&to->di_uuid, &from->di_uuid); |
1117 |
+ to->di_flushiter = 0; |
1118 |
+@@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2( |
1119 |
+ } |
1120 |
+ |
1121 |
+ /* |
1122 |
+- * If the inode has an LSN in it, recover the inode only if it's less |
1123 |
+- * than the lsn of the transaction we are replaying. Note: we still |
1124 |
+- * need to replay an owner change even though the inode is more recent |
1125 |
+- * than the transaction as there is no guarantee that all the btree |
1126 |
+- * blocks are more recent than this transaction, too. |
1127 |
++ * If the inode has an LSN in it, recover the inode only if the on-disk |
1128 |
++ * inode's LSN is older than the lsn of the transaction we are |
1129 |
++ * replaying. We can have multiple checkpoints with the same start LSN, |
1130 |
++ * so the current LSN being equal to the on-disk LSN doesn't necessarily |
1131 |
++ * mean that the on-disk inode is more recent than the change being |
1132 |
++ * replayed. |
1133 |
++ * |
1134 |
++ * We must check the current_lsn against the on-disk inode |
1135 |
++ * here because the we can't trust the log dinode to contain a valid LSN |
1136 |
++ * (see comment below before replaying the log dinode for details). |
1137 |
++ * |
1138 |
++ * Note: we still need to replay an owner change even though the inode |
1139 |
++ * is more recent than the transaction as there is no guarantee that all |
1140 |
++ * the btree blocks are more recent than this transaction, too. |
1141 |
+ */ |
1142 |
+ if (dip->di_version >= 3) { |
1143 |
+ xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); |
1144 |
+ |
1145 |
+- if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { |
1146 |
++ if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) { |
1147 |
+ trace_xfs_log_recover_inode_skip(log, in_f); |
1148 |
+ error = 0; |
1149 |
+ goto out_owner_change; |
1150 |
+@@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2( |
1151 |
+ goto out_release; |
1152 |
+ } |
1153 |
+ |
1154 |
+- /* recover the log dinode inode into the on disk inode */ |
1155 |
+- xfs_log_dinode_to_disk(ldip, dip); |
1156 |
++ /* |
1157 |
++ * Recover the log dinode inode into the on disk inode. |
1158 |
++ * |
1159 |
++ * The LSN in the log dinode is garbage - it can be zero or reflect |
1160 |
++ * stale in-memory runtime state that isn't coherent with the changes |
1161 |
++ * logged in this transaction or the changes written to the on-disk |
1162 |
++ * inode. Hence we write the current lSN into the inode because that |
1163 |
++ * matches what xfs_iflush() would write inode the inode when flushing |
1164 |
++ * the changes in this transaction. |
1165 |
++ */ |
1166 |
++ xfs_log_dinode_to_disk(ldip, dip, current_lsn); |
1167 |
+ |
1168 |
+ fields = in_f->ilf_fields; |
1169 |
+ if (fields & XFS_ILOG_DEV) |
1170 |
+diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c |
1171 |
+index b445e63cbc3c7..22d7d74231d42 100644 |
1172 |
+--- a/fs/xfs/xfs_log.c |
1173 |
++++ b/fs/xfs/xfs_log.c |
1174 |
+@@ -765,6 +765,9 @@ xfs_log_mount_finish( |
1175 |
+ if (readonly) |
1176 |
+ mp->m_flags |= XFS_MOUNT_RDONLY; |
1177 |
+ |
1178 |
++ /* Make sure the log is dead if we're returning failure. */ |
1179 |
++ ASSERT(!error || (mp->m_log->l_flags & XLOG_IO_ERROR)); |
1180 |
++ |
1181 |
+ return error; |
1182 |
+ } |
1183 |
+ |
1184 |
+@@ -3210,14 +3213,13 @@ out_error: |
1185 |
+ } |
1186 |
+ |
1187 |
+ static int |
1188 |
+-__xfs_log_force_lsn( |
1189 |
+- struct xfs_mount *mp, |
1190 |
++xlog_force_lsn( |
1191 |
++ struct xlog *log, |
1192 |
+ xfs_lsn_t lsn, |
1193 |
+ uint flags, |
1194 |
+ int *log_flushed, |
1195 |
+ bool already_slept) |
1196 |
+ { |
1197 |
+- struct xlog *log = mp->m_log; |
1198 |
+ struct xlog_in_core *iclog; |
1199 |
+ |
1200 |
+ spin_lock(&log->l_icloglock); |
1201 |
+@@ -3250,8 +3252,6 @@ __xfs_log_force_lsn( |
1202 |
+ if (!already_slept && |
1203 |
+ (iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC || |
1204 |
+ iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) { |
1205 |
+- XFS_STATS_INC(mp, xs_log_force_sleep); |
1206 |
+- |
1207 |
+ xlog_wait(&iclog->ic_prev->ic_write_wait, |
1208 |
+ &log->l_icloglock); |
1209 |
+ return -EAGAIN; |
1210 |
+@@ -3289,25 +3289,29 @@ out_error: |
1211 |
+ * to disk, that thread will wake up all threads waiting on the queue. |
1212 |
+ */ |
1213 |
+ int |
1214 |
+-xfs_log_force_lsn( |
1215 |
++xfs_log_force_seq( |
1216 |
+ struct xfs_mount *mp, |
1217 |
+- xfs_lsn_t lsn, |
1218 |
++ xfs_csn_t seq, |
1219 |
+ uint flags, |
1220 |
+ int *log_flushed) |
1221 |
+ { |
1222 |
++ struct xlog *log = mp->m_log; |
1223 |
++ xfs_lsn_t lsn; |
1224 |
+ int ret; |
1225 |
+- ASSERT(lsn != 0); |
1226 |
++ ASSERT(seq != 0); |
1227 |
+ |
1228 |
+ XFS_STATS_INC(mp, xs_log_force); |
1229 |
+- trace_xfs_log_force(mp, lsn, _RET_IP_); |
1230 |
++ trace_xfs_log_force(mp, seq, _RET_IP_); |
1231 |
+ |
1232 |
+- lsn = xlog_cil_force_lsn(mp->m_log, lsn); |
1233 |
++ lsn = xlog_cil_force_seq(log, seq); |
1234 |
+ if (lsn == NULLCOMMITLSN) |
1235 |
+ return 0; |
1236 |
+ |
1237 |
+- ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false); |
1238 |
+- if (ret == -EAGAIN) |
1239 |
+- ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true); |
1240 |
++ ret = xlog_force_lsn(log, lsn, flags, log_flushed, false); |
1241 |
++ if (ret == -EAGAIN) { |
1242 |
++ XFS_STATS_INC(mp, xs_log_force_sleep); |
1243 |
++ ret = xlog_force_lsn(log, lsn, flags, log_flushed, true); |
1244 |
++ } |
1245 |
+ return ret; |
1246 |
+ } |
1247 |
+ |
1248 |
+diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h |
1249 |
+index 98c913da7587e..a1089f8b7169b 100644 |
1250 |
+--- a/fs/xfs/xfs_log.h |
1251 |
++++ b/fs/xfs/xfs_log.h |
1252 |
+@@ -106,7 +106,7 @@ struct xfs_item_ops; |
1253 |
+ struct xfs_trans; |
1254 |
+ |
1255 |
+ int xfs_log_force(struct xfs_mount *mp, uint flags); |
1256 |
+-int xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags, |
1257 |
++int xfs_log_force_seq(struct xfs_mount *mp, xfs_csn_t seq, uint flags, |
1258 |
+ int *log_forced); |
1259 |
+ int xfs_log_mount(struct xfs_mount *mp, |
1260 |
+ struct xfs_buftarg *log_target, |
1261 |
+@@ -132,8 +132,6 @@ bool xfs_log_writable(struct xfs_mount *mp); |
1262 |
+ struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); |
1263 |
+ void xfs_log_ticket_put(struct xlog_ticket *ticket); |
1264 |
+ |
1265 |
+-void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, |
1266 |
+- xfs_lsn_t *commit_lsn, bool regrant); |
1267 |
+ void xlog_cil_process_committed(struct list_head *list); |
1268 |
+ bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); |
1269 |
+ |
1270 |
+diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c |
1271 |
+index cd5c04dabe2e1..fbe160d5e9b96 100644 |
1272 |
+--- a/fs/xfs/xfs_log_cil.c |
1273 |
++++ b/fs/xfs/xfs_log_cil.c |
1274 |
+@@ -777,7 +777,7 @@ xlog_cil_push_work( |
1275 |
+ * that higher sequences will wait for us to write out a commit record |
1276 |
+ * before they do. |
1277 |
+ * |
1278 |
+- * xfs_log_force_lsn requires us to mirror the new sequence into the cil |
1279 |
++ * xfs_log_force_seq requires us to mirror the new sequence into the cil |
1280 |
+ * structure atomically with the addition of this sequence to the |
1281 |
+ * committing list. This also ensures that we can do unlocked checks |
1282 |
+ * against the current sequence in log forces without risking |
1283 |
+@@ -1020,16 +1020,14 @@ xlog_cil_empty( |
1284 |
+ * allowed again. |
1285 |
+ */ |
1286 |
+ void |
1287 |
+-xfs_log_commit_cil( |
1288 |
+- struct xfs_mount *mp, |
1289 |
++xlog_cil_commit( |
1290 |
++ struct xlog *log, |
1291 |
+ struct xfs_trans *tp, |
1292 |
+- xfs_lsn_t *commit_lsn, |
1293 |
++ xfs_csn_t *commit_seq, |
1294 |
+ bool regrant) |
1295 |
+ { |
1296 |
+- struct xlog *log = mp->m_log; |
1297 |
+ struct xfs_cil *cil = log->l_cilp; |
1298 |
+ struct xfs_log_item *lip, *next; |
1299 |
+- xfs_lsn_t xc_commit_lsn; |
1300 |
+ |
1301 |
+ /* |
1302 |
+ * Do all necessary memory allocation before we lock the CIL. |
1303 |
+@@ -1043,10 +1041,6 @@ xfs_log_commit_cil( |
1304 |
+ |
1305 |
+ xlog_cil_insert_items(log, tp); |
1306 |
+ |
1307 |
+- xc_commit_lsn = cil->xc_ctx->sequence; |
1308 |
+- if (commit_lsn) |
1309 |
+- *commit_lsn = xc_commit_lsn; |
1310 |
+- |
1311 |
+ if (regrant && !XLOG_FORCED_SHUTDOWN(log)) |
1312 |
+ xfs_log_ticket_regrant(log, tp->t_ticket); |
1313 |
+ else |
1314 |
+@@ -1069,8 +1063,10 @@ xfs_log_commit_cil( |
1315 |
+ list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { |
1316 |
+ xfs_trans_del_item(lip); |
1317 |
+ if (lip->li_ops->iop_committing) |
1318 |
+- lip->li_ops->iop_committing(lip, xc_commit_lsn); |
1319 |
++ lip->li_ops->iop_committing(lip, cil->xc_ctx->sequence); |
1320 |
+ } |
1321 |
++ if (commit_seq) |
1322 |
++ *commit_seq = cil->xc_ctx->sequence; |
1323 |
+ |
1324 |
+ /* xlog_cil_push_background() releases cil->xc_ctx_lock */ |
1325 |
+ xlog_cil_push_background(log); |
1326 |
+@@ -1087,9 +1083,9 @@ xfs_log_commit_cil( |
1327 |
+ * iclog flush is necessary following this call. |
1328 |
+ */ |
1329 |
+ xfs_lsn_t |
1330 |
+-xlog_cil_force_lsn( |
1331 |
++xlog_cil_force_seq( |
1332 |
+ struct xlog *log, |
1333 |
+- xfs_lsn_t sequence) |
1334 |
++ xfs_csn_t sequence) |
1335 |
+ { |
1336 |
+ struct xfs_cil *cil = log->l_cilp; |
1337 |
+ struct xfs_cil_ctx *ctx; |
1338 |
+@@ -1183,23 +1179,19 @@ out_shutdown: |
1339 |
+ */ |
1340 |
+ bool |
1341 |
+ xfs_log_item_in_current_chkpt( |
1342 |
+- struct xfs_log_item *lip) |
1343 |
++ struct xfs_log_item *lip) |
1344 |
+ { |
1345 |
+- struct xfs_cil_ctx *ctx; |
1346 |
++ struct xfs_cil *cil = lip->li_mountp->m_log->l_cilp; |
1347 |
+ |
1348 |
+ if (list_empty(&lip->li_cil)) |
1349 |
+ return false; |
1350 |
+ |
1351 |
+- ctx = lip->li_mountp->m_log->l_cilp->xc_ctx; |
1352 |
+- |
1353 |
+ /* |
1354 |
+ * li_seq is written on the first commit of a log item to record the |
1355 |
+ * first checkpoint it is written to. Hence if it is different to the |
1356 |
+ * current sequence, we're in a new checkpoint. |
1357 |
+ */ |
1358 |
+- if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0) |
1359 |
+- return false; |
1360 |
+- return true; |
1361 |
++ return lip->li_seq == READ_ONCE(cil->xc_current_sequence); |
1362 |
+ } |
1363 |
+ |
1364 |
+ /* |
1365 |
+diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h |
1366 |
+index 1c6fdbf3d5066..42cd1602ac256 100644 |
1367 |
+--- a/fs/xfs/xfs_log_priv.h |
1368 |
++++ b/fs/xfs/xfs_log_priv.h |
1369 |
+@@ -230,7 +230,7 @@ struct xfs_cil; |
1370 |
+ |
1371 |
+ struct xfs_cil_ctx { |
1372 |
+ struct xfs_cil *cil; |
1373 |
+- xfs_lsn_t sequence; /* chkpt sequence # */ |
1374 |
++ xfs_csn_t sequence; /* chkpt sequence # */ |
1375 |
+ xfs_lsn_t start_lsn; /* first LSN of chkpt commit */ |
1376 |
+ xfs_lsn_t commit_lsn; /* chkpt commit record lsn */ |
1377 |
+ struct xlog_ticket *ticket; /* chkpt ticket */ |
1378 |
+@@ -268,10 +268,10 @@ struct xfs_cil { |
1379 |
+ struct xfs_cil_ctx *xc_ctx; |
1380 |
+ |
1381 |
+ spinlock_t xc_push_lock ____cacheline_aligned_in_smp; |
1382 |
+- xfs_lsn_t xc_push_seq; |
1383 |
++ xfs_csn_t xc_push_seq; |
1384 |
+ struct list_head xc_committing; |
1385 |
+ wait_queue_head_t xc_commit_wait; |
1386 |
+- xfs_lsn_t xc_current_sequence; |
1387 |
++ xfs_csn_t xc_current_sequence; |
1388 |
+ struct work_struct xc_push_work; |
1389 |
+ wait_queue_head_t xc_push_wait; /* background push throttle */ |
1390 |
+ } ____cacheline_aligned_in_smp; |
1391 |
+@@ -547,19 +547,18 @@ int xlog_cil_init(struct xlog *log); |
1392 |
+ void xlog_cil_init_post_recovery(struct xlog *log); |
1393 |
+ void xlog_cil_destroy(struct xlog *log); |
1394 |
+ bool xlog_cil_empty(struct xlog *log); |
1395 |
++void xlog_cil_commit(struct xlog *log, struct xfs_trans *tp, |
1396 |
++ xfs_csn_t *commit_seq, bool regrant); |
1397 |
+ |
1398 |
+ /* |
1399 |
+ * CIL force routines |
1400 |
+ */ |
1401 |
+-xfs_lsn_t |
1402 |
+-xlog_cil_force_lsn( |
1403 |
+- struct xlog *log, |
1404 |
+- xfs_lsn_t sequence); |
1405 |
++xfs_lsn_t xlog_cil_force_seq(struct xlog *log, xfs_csn_t sequence); |
1406 |
+ |
1407 |
+ static inline void |
1408 |
+ xlog_cil_force(struct xlog *log) |
1409 |
+ { |
1410 |
+- xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence); |
1411 |
++ xlog_cil_force_seq(log, log->l_cilp->xc_current_sequence); |
1412 |
+ } |
1413 |
+ |
1414 |
+ /* |
1415 |
+diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c |
1416 |
+index 87886b7f77dad..69408782019eb 100644 |
1417 |
+--- a/fs/xfs/xfs_log_recover.c |
1418 |
++++ b/fs/xfs/xfs_log_recover.c |
1419 |
+@@ -2457,8 +2457,10 @@ xlog_finish_defer_ops( |
1420 |
+ |
1421 |
+ error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres, |
1422 |
+ dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp); |
1423 |
+- if (error) |
1424 |
++ if (error) { |
1425 |
++ xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); |
1426 |
+ return error; |
1427 |
++ } |
1428 |
+ |
1429 |
+ /* |
1430 |
+ * Transfer to this new transaction all the dfops we captured |
1431 |
+@@ -3454,6 +3456,7 @@ xlog_recover_finish( |
1432 |
+ * this) before we get around to xfs_log_mount_cancel. |
1433 |
+ */ |
1434 |
+ xlog_recover_cancel_intents(log); |
1435 |
++ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); |
1436 |
+ xfs_alert(log->l_mp, "Failed to recover intents"); |
1437 |
+ return error; |
1438 |
+ } |
1439 |
+diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c |
1440 |
+index 44b05e1d5d327..a2a5a0fd92334 100644 |
1441 |
+--- a/fs/xfs/xfs_mount.c |
1442 |
++++ b/fs/xfs/xfs_mount.c |
1443 |
+@@ -968,9 +968,17 @@ xfs_mountfs( |
1444 |
+ /* |
1445 |
+ * Finish recovering the file system. This part needed to be delayed |
1446 |
+ * until after the root and real-time bitmap inodes were consistently |
1447 |
+- * read in. |
1448 |
++ * read in. Temporarily create per-AG space reservations for metadata |
1449 |
++ * btree shape changes because space freeing transactions (for inode |
1450 |
++ * inactivation) require the per-AG reservation in lieu of reserving |
1451 |
++ * blocks. |
1452 |
+ */ |
1453 |
++ error = xfs_fs_reserve_ag_blocks(mp); |
1454 |
++ if (error && error == -ENOSPC) |
1455 |
++ xfs_warn(mp, |
1456 |
++ "ENOSPC reserving per-AG metadata pool, log recovery may fail."); |
1457 |
+ error = xfs_log_mount_finish(mp); |
1458 |
++ xfs_fs_unreserve_ag_blocks(mp); |
1459 |
+ if (error) { |
1460 |
+ xfs_warn(mp, "log mount finish failed"); |
1461 |
+ goto out_rtunmount; |
1462 |
+diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c |
1463 |
+index 36166bae24a6f..73a1de7ceefc9 100644 |
1464 |
+--- a/fs/xfs/xfs_trans.c |
1465 |
++++ b/fs/xfs/xfs_trans.c |
1466 |
+@@ -832,7 +832,7 @@ __xfs_trans_commit( |
1467 |
+ bool regrant) |
1468 |
+ { |
1469 |
+ struct xfs_mount *mp = tp->t_mountp; |
1470 |
+- xfs_lsn_t commit_lsn = -1; |
1471 |
++ xfs_csn_t commit_seq = 0; |
1472 |
+ int error = 0; |
1473 |
+ int sync = tp->t_flags & XFS_TRANS_SYNC; |
1474 |
+ |
1475 |
+@@ -874,7 +874,7 @@ __xfs_trans_commit( |
1476 |
+ xfs_trans_apply_sb_deltas(tp); |
1477 |
+ xfs_trans_apply_dquot_deltas(tp); |
1478 |
+ |
1479 |
+- xfs_log_commit_cil(mp, tp, &commit_lsn, regrant); |
1480 |
++ xlog_cil_commit(mp->m_log, tp, &commit_seq, regrant); |
1481 |
+ |
1482 |
+ xfs_trans_free(tp); |
1483 |
+ |
1484 |
+@@ -883,7 +883,7 @@ __xfs_trans_commit( |
1485 |
+ * log out now and wait for it. |
1486 |
+ */ |
1487 |
+ if (sync) { |
1488 |
+- error = xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL); |
1489 |
++ error = xfs_log_force_seq(mp, commit_seq, XFS_LOG_SYNC, NULL); |
1490 |
+ XFS_STATS_INC(mp, xs_trans_sync); |
1491 |
+ } else { |
1492 |
+ XFS_STATS_INC(mp, xs_trans_async); |
1493 |
+diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h |
1494 |
+index 075eeade4f7d5..97485559008bb 100644 |
1495 |
+--- a/fs/xfs/xfs_trans.h |
1496 |
++++ b/fs/xfs/xfs_trans.h |
1497 |
+@@ -43,7 +43,7 @@ struct xfs_log_item { |
1498 |
+ struct list_head li_cil; /* CIL pointers */ |
1499 |
+ struct xfs_log_vec *li_lv; /* active log vector */ |
1500 |
+ struct xfs_log_vec *li_lv_shadow; /* standby vector */ |
1501 |
+- xfs_lsn_t li_seq; /* CIL commit seq */ |
1502 |
++ xfs_csn_t li_seq; /* CIL commit seq */ |
1503 |
+ }; |
1504 |
+ |
1505 |
+ /* |
1506 |
+@@ -69,7 +69,7 @@ struct xfs_item_ops { |
1507 |
+ void (*iop_pin)(struct xfs_log_item *); |
1508 |
+ void (*iop_unpin)(struct xfs_log_item *, int remove); |
1509 |
+ uint (*iop_push)(struct xfs_log_item *, struct list_head *); |
1510 |
+- void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn); |
1511 |
++ void (*iop_committing)(struct xfs_log_item *lip, xfs_csn_t seq); |
1512 |
+ void (*iop_release)(struct xfs_log_item *); |
1513 |
+ xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t); |
1514 |
+ int (*iop_recover)(struct xfs_log_item *lip, |
1515 |
+diff --git a/include/linux/bpf.h b/include/linux/bpf.h |
1516 |
+index f21bc441e3fa8..b010d45a1ecd5 100644 |
1517 |
+--- a/include/linux/bpf.h |
1518 |
++++ b/include/linux/bpf.h |
1519 |
+@@ -1457,6 +1457,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, |
1520 |
+ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, |
1521 |
+ const union bpf_attr *kattr, |
1522 |
+ union bpf_attr __user *uattr); |
1523 |
++int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, |
1524 |
++ const union bpf_attr *kattr, |
1525 |
++ union bpf_attr __user *uattr); |
1526 |
+ bool btf_ctx_access(int off, int size, enum bpf_access_type type, |
1527 |
+ const struct bpf_prog *prog, |
1528 |
+ struct bpf_insn_access_aux *info); |
1529 |
+@@ -1671,6 +1674,13 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, |
1530 |
+ return -ENOTSUPP; |
1531 |
+ } |
1532 |
+ |
1533 |
++static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, |
1534 |
++ const union bpf_attr *kattr, |
1535 |
++ union bpf_attr __user *uattr) |
1536 |
++{ |
1537 |
++ return -ENOTSUPP; |
1538 |
++} |
1539 |
++ |
1540 |
+ static inline void bpf_map_put(struct bpf_map *map) |
1541 |
+ { |
1542 |
+ } |
1543 |
+diff --git a/include/net/addrconf.h b/include/net/addrconf.h |
1544 |
+index e7ce719838b5e..edba74a536839 100644 |
1545 |
+--- a/include/net/addrconf.h |
1546 |
++++ b/include/net/addrconf.h |
1547 |
+@@ -405,6 +405,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) |
1548 |
+ { |
1549 |
+ const struct inet6_dev *idev = __in6_dev_get(dev); |
1550 |
+ |
1551 |
++ if (unlikely(!idev)) |
1552 |
++ return true; |
1553 |
++ |
1554 |
+ return !!idev->cnf.ignore_routes_with_linkdown; |
1555 |
+ } |
1556 |
+ |
1557 |
+diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h |
1558 |
+index 1d1232917de72..9b8000869b078 100644 |
1559 |
+--- a/include/net/bluetooth/l2cap.h |
1560 |
++++ b/include/net/bluetooth/l2cap.h |
1561 |
+@@ -845,6 +845,7 @@ enum { |
1562 |
+ }; |
1563 |
+ |
1564 |
+ void l2cap_chan_hold(struct l2cap_chan *c); |
1565 |
++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c); |
1566 |
+ void l2cap_chan_put(struct l2cap_chan *c); |
1567 |
+ |
1568 |
+ static inline void l2cap_chan_lock(struct l2cap_chan *chan) |
1569 |
+diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h |
1570 |
+index 0b1864a82d4ad..ff901aade442f 100644 |
1571 |
+--- a/include/net/inet_connection_sock.h |
1572 |
++++ b/include/net/inet_connection_sock.h |
1573 |
+@@ -317,7 +317,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, |
1574 |
+ |
1575 |
+ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); |
1576 |
+ |
1577 |
+-#define TCP_PINGPONG_THRESH 3 |
1578 |
++#define TCP_PINGPONG_THRESH 1 |
1579 |
+ |
1580 |
+ static inline void inet_csk_enter_pingpong_mode(struct sock *sk) |
1581 |
+ { |
1582 |
+@@ -334,14 +334,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk) |
1583 |
+ return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; |
1584 |
+ } |
1585 |
+ |
1586 |
+-static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) |
1587 |
+-{ |
1588 |
+- struct inet_connection_sock *icsk = inet_csk(sk); |
1589 |
+- |
1590 |
+- if (icsk->icsk_ack.pingpong < U8_MAX) |
1591 |
+- icsk->icsk_ack.pingpong++; |
1592 |
+-} |
1593 |
+- |
1594 |
+ static inline bool inet_csk_has_ulp(struct sock *sk) |
1595 |
+ { |
1596 |
+ return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; |
1597 |
+diff --git a/include/net/tcp.h b/include/net/tcp.h |
1598 |
+index 44bfb22069c1f..8129ce9a07719 100644 |
1599 |
+--- a/include/net/tcp.h |
1600 |
++++ b/include/net/tcp.h |
1601 |
+@@ -1396,7 +1396,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, |
1602 |
+ |
1603 |
+ static inline int tcp_win_from_space(const struct sock *sk, int space) |
1604 |
+ { |
1605 |
+- int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; |
1606 |
++ int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale); |
1607 |
+ |
1608 |
+ return tcp_adv_win_scale <= 0 ? |
1609 |
+ (space>>(-tcp_adv_win_scale)) : |
1610 |
+diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h |
1611 |
+index 0f39fdcb2273c..2a234023821e3 100644 |
1612 |
+--- a/include/uapi/linux/bpf.h |
1613 |
++++ b/include/uapi/linux/bpf.h |
1614 |
+@@ -5007,7 +5007,10 @@ struct bpf_pidns_info { |
1615 |
+ |
1616 |
+ /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ |
1617 |
+ struct bpf_sk_lookup { |
1618 |
+- __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ |
1619 |
++ union { |
1620 |
++ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ |
1621 |
++ __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */ |
1622 |
++ }; |
1623 |
+ |
1624 |
+ __u32 family; /* Protocol family (AF_INET, AF_INET6) */ |
1625 |
+ __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ |
1626 |
+diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c |
1627 |
+index e5d22af43fa0b..d29731a30b8e1 100644 |
1628 |
+--- a/kernel/watch_queue.c |
1629 |
++++ b/kernel/watch_queue.c |
1630 |
+@@ -457,6 +457,33 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue) |
1631 |
+ rcu_assign_pointer(watch->queue, wqueue); |
1632 |
+ } |
1633 |
+ |
1634 |
++static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue) |
1635 |
++{ |
1636 |
++ const struct cred *cred; |
1637 |
++ struct watch *w; |
1638 |
++ |
1639 |
++ hlist_for_each_entry(w, &wlist->watchers, list_node) { |
1640 |
++ struct watch_queue *wq = rcu_access_pointer(w->queue); |
1641 |
++ if (wqueue == wq && watch->id == w->id) |
1642 |
++ return -EBUSY; |
1643 |
++ } |
1644 |
++ |
1645 |
++ cred = current_cred(); |
1646 |
++ if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) { |
1647 |
++ atomic_dec(&cred->user->nr_watches); |
1648 |
++ return -EAGAIN; |
1649 |
++ } |
1650 |
++ |
1651 |
++ watch->cred = get_cred(cred); |
1652 |
++ rcu_assign_pointer(watch->watch_list, wlist); |
1653 |
++ |
1654 |
++ kref_get(&wqueue->usage); |
1655 |
++ kref_get(&watch->usage); |
1656 |
++ hlist_add_head(&watch->queue_node, &wqueue->watches); |
1657 |
++ hlist_add_head_rcu(&watch->list_node, &wlist->watchers); |
1658 |
++ return 0; |
1659 |
++} |
1660 |
++ |
1661 |
+ /** |
1662 |
+ * add_watch_to_object - Add a watch on an object to a watch list |
1663 |
+ * @watch: The watch to add |
1664 |
+@@ -471,34 +498,21 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue) |
1665 |
+ */ |
1666 |
+ int add_watch_to_object(struct watch *watch, struct watch_list *wlist) |
1667 |
+ { |
1668 |
+- struct watch_queue *wqueue = rcu_access_pointer(watch->queue); |
1669 |
+- struct watch *w; |
1670 |
+- |
1671 |
+- hlist_for_each_entry(w, &wlist->watchers, list_node) { |
1672 |
+- struct watch_queue *wq = rcu_access_pointer(w->queue); |
1673 |
+- if (wqueue == wq && watch->id == w->id) |
1674 |
+- return -EBUSY; |
1675 |
+- } |
1676 |
+- |
1677 |
+- watch->cred = get_current_cred(); |
1678 |
+- rcu_assign_pointer(watch->watch_list, wlist); |
1679 |
++ struct watch_queue *wqueue; |
1680 |
++ int ret = -ENOENT; |
1681 |
+ |
1682 |
+- if (atomic_inc_return(&watch->cred->user->nr_watches) > |
1683 |
+- task_rlimit(current, RLIMIT_NOFILE)) { |
1684 |
+- atomic_dec(&watch->cred->user->nr_watches); |
1685 |
+- put_cred(watch->cred); |
1686 |
+- return -EAGAIN; |
1687 |
+- } |
1688 |
++ rcu_read_lock(); |
1689 |
+ |
1690 |
++ wqueue = rcu_access_pointer(watch->queue); |
1691 |
+ if (lock_wqueue(wqueue)) { |
1692 |
+- kref_get(&wqueue->usage); |
1693 |
+- kref_get(&watch->usage); |
1694 |
+- hlist_add_head(&watch->queue_node, &wqueue->watches); |
1695 |
++ spin_lock(&wlist->lock); |
1696 |
++ ret = add_one_watch(watch, wlist, wqueue); |
1697 |
++ spin_unlock(&wlist->lock); |
1698 |
+ unlock_wqueue(wqueue); |
1699 |
+ } |
1700 |
+ |
1701 |
+- hlist_add_head(&watch->list_node, &wlist->watchers); |
1702 |
+- return 0; |
1703 |
++ rcu_read_unlock(); |
1704 |
++ return ret; |
1705 |
+ } |
1706 |
+ EXPORT_SYMBOL(add_watch_to_object); |
1707 |
+ |
1708 |
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c |
1709 |
+index f3418edb136be..43ff22ce76324 100644 |
1710 |
+--- a/mm/page_alloc.c |
1711 |
++++ b/mm/page_alloc.c |
1712 |
+@@ -3679,11 +3679,15 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order, |
1713 |
+ * need to be calculated. |
1714 |
+ */ |
1715 |
+ if (!order) { |
1716 |
+- long fast_free; |
1717 |
++ long usable_free; |
1718 |
++ long reserved; |
1719 |
+ |
1720 |
+- fast_free = free_pages; |
1721 |
+- fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags); |
1722 |
+- if (fast_free > mark + z->lowmem_reserve[highest_zoneidx]) |
1723 |
++ usable_free = free_pages; |
1724 |
++ reserved = __zone_watermark_unusable_free(z, 0, alloc_flags); |
1725 |
++ |
1726 |
++ /* reserved may over estimate high-atomic reserves. */ |
1727 |
++ usable_free -= min(usable_free, reserved); |
1728 |
++ if (usable_free > mark + z->lowmem_reserve[highest_zoneidx]) |
1729 |
+ return true; |
1730 |
+ } |
1731 |
+ |
1732 |
+diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c |
1733 |
+index 2557cd917f5ed..6a5ff5dcc09a9 100644 |
1734 |
+--- a/net/bluetooth/l2cap_core.c |
1735 |
++++ b/net/bluetooth/l2cap_core.c |
1736 |
+@@ -111,7 +111,8 @@ static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn, |
1737 |
+ } |
1738 |
+ |
1739 |
+ /* Find channel with given SCID. |
1740 |
+- * Returns locked channel. */ |
1741 |
++ * Returns a reference locked channel. |
1742 |
++ */ |
1743 |
+ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, |
1744 |
+ u16 cid) |
1745 |
+ { |
1746 |
+@@ -119,15 +120,19 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, |
1747 |
+ |
1748 |
+ mutex_lock(&conn->chan_lock); |
1749 |
+ c = __l2cap_get_chan_by_scid(conn, cid); |
1750 |
+- if (c) |
1751 |
+- l2cap_chan_lock(c); |
1752 |
++ if (c) { |
1753 |
++ /* Only lock if chan reference is not 0 */ |
1754 |
++ c = l2cap_chan_hold_unless_zero(c); |
1755 |
++ if (c) |
1756 |
++ l2cap_chan_lock(c); |
1757 |
++ } |
1758 |
+ mutex_unlock(&conn->chan_lock); |
1759 |
+ |
1760 |
+ return c; |
1761 |
+ } |
1762 |
+ |
1763 |
+ /* Find channel with given DCID. |
1764 |
+- * Returns locked channel. |
1765 |
++ * Returns a reference locked channel. |
1766 |
+ */ |
1767 |
+ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, |
1768 |
+ u16 cid) |
1769 |
+@@ -136,8 +141,12 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, |
1770 |
+ |
1771 |
+ mutex_lock(&conn->chan_lock); |
1772 |
+ c = __l2cap_get_chan_by_dcid(conn, cid); |
1773 |
+- if (c) |
1774 |
+- l2cap_chan_lock(c); |
1775 |
++ if (c) { |
1776 |
++ /* Only lock if chan reference is not 0 */ |
1777 |
++ c = l2cap_chan_hold_unless_zero(c); |
1778 |
++ if (c) |
1779 |
++ l2cap_chan_lock(c); |
1780 |
++ } |
1781 |
+ mutex_unlock(&conn->chan_lock); |
1782 |
+ |
1783 |
+ return c; |
1784 |
+@@ -162,8 +171,12 @@ static struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn, |
1785 |
+ |
1786 |
+ mutex_lock(&conn->chan_lock); |
1787 |
+ c = __l2cap_get_chan_by_ident(conn, ident); |
1788 |
+- if (c) |
1789 |
+- l2cap_chan_lock(c); |
1790 |
++ if (c) { |
1791 |
++ /* Only lock if chan reference is not 0 */ |
1792 |
++ c = l2cap_chan_hold_unless_zero(c); |
1793 |
++ if (c) |
1794 |
++ l2cap_chan_lock(c); |
1795 |
++ } |
1796 |
+ mutex_unlock(&conn->chan_lock); |
1797 |
+ |
1798 |
+ return c; |
1799 |
+@@ -497,6 +510,16 @@ void l2cap_chan_hold(struct l2cap_chan *c) |
1800 |
+ kref_get(&c->kref); |
1801 |
+ } |
1802 |
+ |
1803 |
++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c) |
1804 |
++{ |
1805 |
++ BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); |
1806 |
++ |
1807 |
++ if (!kref_get_unless_zero(&c->kref)) |
1808 |
++ return NULL; |
1809 |
++ |
1810 |
++ return c; |
1811 |
++} |
1812 |
++ |
1813 |
+ void l2cap_chan_put(struct l2cap_chan *c) |
1814 |
+ { |
1815 |
+ BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref)); |
1816 |
+@@ -1965,7 +1988,10 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, |
1817 |
+ src_match = !bacmp(&c->src, src); |
1818 |
+ dst_match = !bacmp(&c->dst, dst); |
1819 |
+ if (src_match && dst_match) { |
1820 |
+- l2cap_chan_hold(c); |
1821 |
++ c = l2cap_chan_hold_unless_zero(c); |
1822 |
++ if (!c) |
1823 |
++ continue; |
1824 |
++ |
1825 |
+ read_unlock(&chan_list_lock); |
1826 |
+ return c; |
1827 |
+ } |
1828 |
+@@ -1980,7 +2006,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, |
1829 |
+ } |
1830 |
+ |
1831 |
+ if (c1) |
1832 |
+- l2cap_chan_hold(c1); |
1833 |
++ c1 = l2cap_chan_hold_unless_zero(c1); |
1834 |
+ |
1835 |
+ read_unlock(&chan_list_lock); |
1836 |
+ |
1837 |
+@@ -4460,6 +4486,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, |
1838 |
+ |
1839 |
+ unlock: |
1840 |
+ l2cap_chan_unlock(chan); |
1841 |
++ l2cap_chan_put(chan); |
1842 |
+ return err; |
1843 |
+ } |
1844 |
+ |
1845 |
+@@ -4573,6 +4600,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, |
1846 |
+ |
1847 |
+ done: |
1848 |
+ l2cap_chan_unlock(chan); |
1849 |
++ l2cap_chan_put(chan); |
1850 |
+ return err; |
1851 |
+ } |
1852 |
+ |
1853 |
+@@ -5300,6 +5328,7 @@ send_move_response: |
1854 |
+ l2cap_send_move_chan_rsp(chan, result); |
1855 |
+ |
1856 |
+ l2cap_chan_unlock(chan); |
1857 |
++ l2cap_chan_put(chan); |
1858 |
+ |
1859 |
+ return 0; |
1860 |
+ } |
1861 |
+@@ -5392,6 +5421,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result) |
1862 |
+ } |
1863 |
+ |
1864 |
+ l2cap_chan_unlock(chan); |
1865 |
++ l2cap_chan_put(chan); |
1866 |
+ } |
1867 |
+ |
1868 |
+ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, |
1869 |
+@@ -5421,6 +5451,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, |
1870 |
+ l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED); |
1871 |
+ |
1872 |
+ l2cap_chan_unlock(chan); |
1873 |
++ l2cap_chan_put(chan); |
1874 |
+ } |
1875 |
+ |
1876 |
+ static int l2cap_move_channel_rsp(struct l2cap_conn *conn, |
1877 |
+@@ -5484,6 +5515,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn *conn, |
1878 |
+ l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid); |
1879 |
+ |
1880 |
+ l2cap_chan_unlock(chan); |
1881 |
++ l2cap_chan_put(chan); |
1882 |
+ |
1883 |
+ return 0; |
1884 |
+ } |
1885 |
+@@ -5519,6 +5551,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn, |
1886 |
+ } |
1887 |
+ |
1888 |
+ l2cap_chan_unlock(chan); |
1889 |
++ l2cap_chan_put(chan); |
1890 |
+ |
1891 |
+ return 0; |
1892 |
+ } |
1893 |
+@@ -5891,12 +5924,11 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, |
1894 |
+ if (credits > max_credits) { |
1895 |
+ BT_ERR("LE credits overflow"); |
1896 |
+ l2cap_send_disconn_req(chan, ECONNRESET); |
1897 |
+- l2cap_chan_unlock(chan); |
1898 |
+ |
1899 |
+ /* Return 0 so that we don't trigger an unnecessary |
1900 |
+ * command reject packet. |
1901 |
+ */ |
1902 |
+- return 0; |
1903 |
++ goto unlock; |
1904 |
+ } |
1905 |
+ |
1906 |
+ chan->tx_credits += credits; |
1907 |
+@@ -5907,7 +5939,9 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, |
1908 |
+ if (chan->tx_credits) |
1909 |
+ chan->ops->resume(chan); |
1910 |
+ |
1911 |
++unlock: |
1912 |
+ l2cap_chan_unlock(chan); |
1913 |
++ l2cap_chan_put(chan); |
1914 |
+ |
1915 |
+ return 0; |
1916 |
+ } |
1917 |
+@@ -7587,6 +7621,7 @@ drop: |
1918 |
+ |
1919 |
+ done: |
1920 |
+ l2cap_chan_unlock(chan); |
1921 |
++ l2cap_chan_put(chan); |
1922 |
+ } |
1923 |
+ |
1924 |
+ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, |
1925 |
+@@ -8074,7 +8109,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, |
1926 |
+ if (src_type != c->src_type) |
1927 |
+ continue; |
1928 |
+ |
1929 |
+- l2cap_chan_hold(c); |
1930 |
++ c = l2cap_chan_hold_unless_zero(c); |
1931 |
+ read_unlock(&chan_list_lock); |
1932 |
+ return c; |
1933 |
+ } |
1934 |
+diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c |
1935 |
+index eb684f31fd698..f8b231bbbe381 100644 |
1936 |
+--- a/net/bpf/test_run.c |
1937 |
++++ b/net/bpf/test_run.c |
1938 |
+@@ -10,20 +10,86 @@ |
1939 |
+ #include <net/bpf_sk_storage.h> |
1940 |
+ #include <net/sock.h> |
1941 |
+ #include <net/tcp.h> |
1942 |
++#include <net/net_namespace.h> |
1943 |
+ #include <linux/error-injection.h> |
1944 |
+ #include <linux/smp.h> |
1945 |
++#include <linux/sock_diag.h> |
1946 |
+ |
1947 |
+ #define CREATE_TRACE_POINTS |
1948 |
+ #include <trace/events/bpf_test_run.h> |
1949 |
+ |
1950 |
++struct bpf_test_timer { |
1951 |
++ enum { NO_PREEMPT, NO_MIGRATE } mode; |
1952 |
++ u32 i; |
1953 |
++ u64 time_start, time_spent; |
1954 |
++}; |
1955 |
++ |
1956 |
++static void bpf_test_timer_enter(struct bpf_test_timer *t) |
1957 |
++ __acquires(rcu) |
1958 |
++{ |
1959 |
++ rcu_read_lock(); |
1960 |
++ if (t->mode == NO_PREEMPT) |
1961 |
++ preempt_disable(); |
1962 |
++ else |
1963 |
++ migrate_disable(); |
1964 |
++ |
1965 |
++ t->time_start = ktime_get_ns(); |
1966 |
++} |
1967 |
++ |
1968 |
++static void bpf_test_timer_leave(struct bpf_test_timer *t) |
1969 |
++ __releases(rcu) |
1970 |
++{ |
1971 |
++ t->time_start = 0; |
1972 |
++ |
1973 |
++ if (t->mode == NO_PREEMPT) |
1974 |
++ preempt_enable(); |
1975 |
++ else |
1976 |
++ migrate_enable(); |
1977 |
++ rcu_read_unlock(); |
1978 |
++} |
1979 |
++ |
1980 |
++static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration) |
1981 |
++ __must_hold(rcu) |
1982 |
++{ |
1983 |
++ t->i++; |
1984 |
++ if (t->i >= repeat) { |
1985 |
++ /* We're done. */ |
1986 |
++ t->time_spent += ktime_get_ns() - t->time_start; |
1987 |
++ do_div(t->time_spent, t->i); |
1988 |
++ *duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent; |
1989 |
++ *err = 0; |
1990 |
++ goto reset; |
1991 |
++ } |
1992 |
++ |
1993 |
++ if (signal_pending(current)) { |
1994 |
++ /* During iteration: we've been cancelled, abort. */ |
1995 |
++ *err = -EINTR; |
1996 |
++ goto reset; |
1997 |
++ } |
1998 |
++ |
1999 |
++ if (need_resched()) { |
2000 |
++ /* During iteration: we need to reschedule between runs. */ |
2001 |
++ t->time_spent += ktime_get_ns() - t->time_start; |
2002 |
++ bpf_test_timer_leave(t); |
2003 |
++ cond_resched(); |
2004 |
++ bpf_test_timer_enter(t); |
2005 |
++ } |
2006 |
++ |
2007 |
++ /* Do another round. */ |
2008 |
++ return true; |
2009 |
++ |
2010 |
++reset: |
2011 |
++ t->i = 0; |
2012 |
++ return false; |
2013 |
++} |
2014 |
++ |
2015 |
+ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, |
2016 |
+ u32 *retval, u32 *time, bool xdp) |
2017 |
+ { |
2018 |
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL }; |
2019 |
++ struct bpf_test_timer t = { NO_MIGRATE }; |
2020 |
+ enum bpf_cgroup_storage_type stype; |
2021 |
+- u64 time_start, time_spent = 0; |
2022 |
+- int ret = 0; |
2023 |
+- u32 i; |
2024 |
++ int ret; |
2025 |
+ |
2026 |
+ for_each_cgroup_storage_type(stype) { |
2027 |
+ storage[stype] = bpf_cgroup_storage_alloc(prog, stype); |
2028 |
+@@ -38,10 +104,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, |
2029 |
+ if (!repeat) |
2030 |
+ repeat = 1; |
2031 |
+ |
2032 |
+- rcu_read_lock(); |
2033 |
+- migrate_disable(); |
2034 |
+- time_start = ktime_get_ns(); |
2035 |
+- for (i = 0; i < repeat; i++) { |
2036 |
++ bpf_test_timer_enter(&t); |
2037 |
++ do { |
2038 |
+ ret = bpf_cgroup_storage_set(storage); |
2039 |
+ if (ret) |
2040 |
+ break; |
2041 |
+@@ -53,29 +117,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, |
2042 |
+ |
2043 |
+ bpf_cgroup_storage_unset(); |
2044 |
+ |
2045 |
+- if (signal_pending(current)) { |
2046 |
+- ret = -EINTR; |
2047 |
+- break; |
2048 |
+- } |
2049 |
+- |
2050 |
+- if (need_resched()) { |
2051 |
+- time_spent += ktime_get_ns() - time_start; |
2052 |
+- migrate_enable(); |
2053 |
+- rcu_read_unlock(); |
2054 |
+- |
2055 |
+- cond_resched(); |
2056 |
+- |
2057 |
+- rcu_read_lock(); |
2058 |
+- migrate_disable(); |
2059 |
+- time_start = ktime_get_ns(); |
2060 |
+- } |
2061 |
+- } |
2062 |
+- time_spent += ktime_get_ns() - time_start; |
2063 |
+- migrate_enable(); |
2064 |
+- rcu_read_unlock(); |
2065 |
+- |
2066 |
+- do_div(time_spent, repeat); |
2067 |
+- *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; |
2068 |
++ } while (bpf_test_timer_continue(&t, repeat, &ret, time)); |
2069 |
++ bpf_test_timer_leave(&t); |
2070 |
+ |
2071 |
+ for_each_cgroup_storage_type(stype) |
2072 |
+ bpf_cgroup_storage_free(storage[stype]); |
2073 |
+@@ -688,18 +731,17 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, |
2074 |
+ const union bpf_attr *kattr, |
2075 |
+ union bpf_attr __user *uattr) |
2076 |
+ { |
2077 |
++ struct bpf_test_timer t = { NO_PREEMPT }; |
2078 |
+ u32 size = kattr->test.data_size_in; |
2079 |
+ struct bpf_flow_dissector ctx = {}; |
2080 |
+ u32 repeat = kattr->test.repeat; |
2081 |
+ struct bpf_flow_keys *user_ctx; |
2082 |
+ struct bpf_flow_keys flow_keys; |
2083 |
+- u64 time_start, time_spent = 0; |
2084 |
+ const struct ethhdr *eth; |
2085 |
+ unsigned int flags = 0; |
2086 |
+ u32 retval, duration; |
2087 |
+ void *data; |
2088 |
+ int ret; |
2089 |
+- u32 i; |
2090 |
+ |
2091 |
+ if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR) |
2092 |
+ return -EINVAL; |
2093 |
+@@ -735,48 +777,127 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, |
2094 |
+ ctx.data = data; |
2095 |
+ ctx.data_end = (__u8 *)data + size; |
2096 |
+ |
2097 |
+- rcu_read_lock(); |
2098 |
+- preempt_disable(); |
2099 |
+- time_start = ktime_get_ns(); |
2100 |
+- for (i = 0; i < repeat; i++) { |
2101 |
++ bpf_test_timer_enter(&t); |
2102 |
++ do { |
2103 |
+ retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN, |
2104 |
+ size, flags); |
2105 |
++ } while (bpf_test_timer_continue(&t, repeat, &ret, &duration)); |
2106 |
++ bpf_test_timer_leave(&t); |
2107 |
+ |
2108 |
+- if (signal_pending(current)) { |
2109 |
+- preempt_enable(); |
2110 |
+- rcu_read_unlock(); |
2111 |
++ if (ret < 0) |
2112 |
++ goto out; |
2113 |
+ |
2114 |
+- ret = -EINTR; |
2115 |
+- goto out; |
2116 |
+- } |
2117 |
++ ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys), |
2118 |
++ retval, duration); |
2119 |
++ if (!ret) |
2120 |
++ ret = bpf_ctx_finish(kattr, uattr, user_ctx, |
2121 |
++ sizeof(struct bpf_flow_keys)); |
2122 |
+ |
2123 |
+- if (need_resched()) { |
2124 |
+- time_spent += ktime_get_ns() - time_start; |
2125 |
+- preempt_enable(); |
2126 |
+- rcu_read_unlock(); |
2127 |
++out: |
2128 |
++ kfree(user_ctx); |
2129 |
++ kfree(data); |
2130 |
++ return ret; |
2131 |
++} |
2132 |
+ |
2133 |
+- cond_resched(); |
2134 |
++int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, |
2135 |
++ union bpf_attr __user *uattr) |
2136 |
++{ |
2137 |
++ struct bpf_test_timer t = { NO_PREEMPT }; |
2138 |
++ struct bpf_prog_array *progs = NULL; |
2139 |
++ struct bpf_sk_lookup_kern ctx = {}; |
2140 |
++ u32 repeat = kattr->test.repeat; |
2141 |
++ struct bpf_sk_lookup *user_ctx; |
2142 |
++ u32 retval, duration; |
2143 |
++ int ret = -EINVAL; |
2144 |
+ |
2145 |
+- rcu_read_lock(); |
2146 |
+- preempt_disable(); |
2147 |
+- time_start = ktime_get_ns(); |
2148 |
+- } |
2149 |
++ if (prog->type != BPF_PROG_TYPE_SK_LOOKUP) |
2150 |
++ return -EINVAL; |
2151 |
++ |
2152 |
++ if (kattr->test.flags || kattr->test.cpu) |
2153 |
++ return -EINVAL; |
2154 |
++ |
2155 |
++ if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out || |
2156 |
++ kattr->test.data_size_out) |
2157 |
++ return -EINVAL; |
2158 |
++ |
2159 |
++ if (!repeat) |
2160 |
++ repeat = 1; |
2161 |
++ |
2162 |
++ user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx)); |
2163 |
++ if (IS_ERR(user_ctx)) |
2164 |
++ return PTR_ERR(user_ctx); |
2165 |
++ |
2166 |
++ if (!user_ctx) |
2167 |
++ return -EINVAL; |
2168 |
++ |
2169 |
++ if (user_ctx->sk) |
2170 |
++ goto out; |
2171 |
++ |
2172 |
++ if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx))) |
2173 |
++ goto out; |
2174 |
++ |
2175 |
++ if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) { |
2176 |
++ ret = -ERANGE; |
2177 |
++ goto out; |
2178 |
+ } |
2179 |
+- time_spent += ktime_get_ns() - time_start; |
2180 |
+- preempt_enable(); |
2181 |
+- rcu_read_unlock(); |
2182 |
+ |
2183 |
+- do_div(time_spent, repeat); |
2184 |
+- duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; |
2185 |
++ ctx.family = (u16)user_ctx->family; |
2186 |
++ ctx.protocol = (u16)user_ctx->protocol; |
2187 |
++ ctx.dport = (u16)user_ctx->local_port; |
2188 |
++ ctx.sport = (__force __be16)user_ctx->remote_port; |
2189 |
+ |
2190 |
+- ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys), |
2191 |
+- retval, duration); |
2192 |
++ switch (ctx.family) { |
2193 |
++ case AF_INET: |
2194 |
++ ctx.v4.daddr = (__force __be32)user_ctx->local_ip4; |
2195 |
++ ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4; |
2196 |
++ break; |
2197 |
++ |
2198 |
++#if IS_ENABLED(CONFIG_IPV6) |
2199 |
++ case AF_INET6: |
2200 |
++ ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6; |
2201 |
++ ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6; |
2202 |
++ break; |
2203 |
++#endif |
2204 |
++ |
2205 |
++ default: |
2206 |
++ ret = -EAFNOSUPPORT; |
2207 |
++ goto out; |
2208 |
++ } |
2209 |
++ |
2210 |
++ progs = bpf_prog_array_alloc(1, GFP_KERNEL); |
2211 |
++ if (!progs) { |
2212 |
++ ret = -ENOMEM; |
2213 |
++ goto out; |
2214 |
++ } |
2215 |
++ |
2216 |
++ progs->items[0].prog = prog; |
2217 |
++ |
2218 |
++ bpf_test_timer_enter(&t); |
2219 |
++ do { |
2220 |
++ ctx.selected_sk = NULL; |
2221 |
++ retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN); |
2222 |
++ } while (bpf_test_timer_continue(&t, repeat, &ret, &duration)); |
2223 |
++ bpf_test_timer_leave(&t); |
2224 |
++ |
2225 |
++ if (ret < 0) |
2226 |
++ goto out; |
2227 |
++ |
2228 |
++ user_ctx->cookie = 0; |
2229 |
++ if (ctx.selected_sk) { |
2230 |
++ if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) { |
2231 |
++ ret = -EOPNOTSUPP; |
2232 |
++ goto out; |
2233 |
++ } |
2234 |
++ |
2235 |
++ user_ctx->cookie = sock_gen_cookie(ctx.selected_sk); |
2236 |
++ } |
2237 |
++ |
2238 |
++ ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration); |
2239 |
+ if (!ret) |
2240 |
+- ret = bpf_ctx_finish(kattr, uattr, user_ctx, |
2241 |
+- sizeof(struct bpf_flow_keys)); |
2242 |
++ ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx)); |
2243 |
+ |
2244 |
+ out: |
2245 |
++ bpf_prog_array_free(progs); |
2246 |
+ kfree(user_ctx); |
2247 |
+- kfree(data); |
2248 |
+ return ret; |
2249 |
+ } |
2250 |
+diff --git a/net/core/filter.c b/net/core/filter.c |
2251 |
+index e2b491665775f..815edf7bc4390 100644 |
2252 |
+--- a/net/core/filter.c |
2253 |
++++ b/net/core/filter.c |
2254 |
+@@ -10334,6 +10334,7 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type, |
2255 |
+ } |
2256 |
+ |
2257 |
+ const struct bpf_prog_ops sk_lookup_prog_ops = { |
2258 |
++ .test_run = bpf_prog_test_run_sk_lookup, |
2259 |
+ }; |
2260 |
+ |
2261 |
+ const struct bpf_verifier_ops sk_lookup_verifier_ops = { |
2262 |
+diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c |
2263 |
+index 428cc3a4c36f1..c71b863093ace 100644 |
2264 |
+--- a/net/ipv4/igmp.c |
2265 |
++++ b/net/ipv4/igmp.c |
2266 |
+@@ -827,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev) |
2267 |
+ struct net *net = dev_net(in_dev->dev); |
2268 |
+ if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) |
2269 |
+ return; |
2270 |
+- WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv); |
2271 |
++ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv)); |
2272 |
+ igmp_ifc_start_timer(in_dev, 1); |
2273 |
+ } |
2274 |
+ |
2275 |
+@@ -1009,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, |
2276 |
+ * received value was zero, use the default or statically |
2277 |
+ * configured value. |
2278 |
+ */ |
2279 |
+- in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv; |
2280 |
++ in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); |
2281 |
+ in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; |
2282 |
+ |
2283 |
+ /* RFC3376, 8.3. Query Response Interval: |
2284 |
+@@ -1189,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im, |
2285 |
+ pmc->interface = im->interface; |
2286 |
+ in_dev_hold(in_dev); |
2287 |
+ pmc->multiaddr = im->multiaddr; |
2288 |
+- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; |
2289 |
++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); |
2290 |
+ pmc->sfmode = im->sfmode; |
2291 |
+ if (pmc->sfmode == MCAST_INCLUDE) { |
2292 |
+ struct ip_sf_list *psf; |
2293 |
+@@ -1240,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) |
2294 |
+ swap(im->tomb, pmc->tomb); |
2295 |
+ swap(im->sources, pmc->sources); |
2296 |
+ for (psf = im->sources; psf; psf = psf->sf_next) |
2297 |
+- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; |
2298 |
++ psf->sf_crcount = in_dev->mr_qrv ?: |
2299 |
++ READ_ONCE(net->ipv4.sysctl_igmp_qrv); |
2300 |
+ } else { |
2301 |
+- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; |
2302 |
++ im->crcount = in_dev->mr_qrv ?: |
2303 |
++ READ_ONCE(net->ipv4.sysctl_igmp_qrv); |
2304 |
+ } |
2305 |
+ in_dev_put(pmc->interface); |
2306 |
+ kfree_pmc(pmc); |
2307 |
+@@ -1349,7 +1351,7 @@ static void igmp_group_added(struct ip_mc_list *im) |
2308 |
+ if (in_dev->dead) |
2309 |
+ return; |
2310 |
+ |
2311 |
+- im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; |
2312 |
++ im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv); |
2313 |
+ if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) { |
2314 |
+ spin_lock_bh(&im->lock); |
2315 |
+ igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY); |
2316 |
+@@ -1363,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im) |
2317 |
+ * IN() to IN(A). |
2318 |
+ */ |
2319 |
+ if (im->sfmode == MCAST_EXCLUDE) |
2320 |
+- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; |
2321 |
++ im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); |
2322 |
+ |
2323 |
+ igmp_ifc_event(in_dev); |
2324 |
+ #endif |
2325 |
+@@ -1754,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev) |
2326 |
+ |
2327 |
+ in_dev->mr_qi = IGMP_QUERY_INTERVAL; |
2328 |
+ in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL; |
2329 |
+- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; |
2330 |
++ in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv); |
2331 |
+ } |
2332 |
+ #else |
2333 |
+ static void ip_mc_reset(struct in_device *in_dev) |
2334 |
+@@ -1888,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, |
2335 |
+ #ifdef CONFIG_IP_MULTICAST |
2336 |
+ if (psf->sf_oldin && |
2337 |
+ !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { |
2338 |
+- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; |
2339 |
++ psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); |
2340 |
+ psf->sf_next = pmc->tomb; |
2341 |
+ pmc->tomb = psf; |
2342 |
+ rv = 1; |
2343 |
+@@ -1952,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, |
2344 |
+ /* filter mode change */ |
2345 |
+ pmc->sfmode = MCAST_INCLUDE; |
2346 |
+ #ifdef CONFIG_IP_MULTICAST |
2347 |
+- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; |
2348 |
++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); |
2349 |
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); |
2350 |
+ for (psf = pmc->sources; psf; psf = psf->sf_next) |
2351 |
+ psf->sf_crcount = 0; |
2352 |
+@@ -2131,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, |
2353 |
+ #ifdef CONFIG_IP_MULTICAST |
2354 |
+ /* else no filters; keep old mode for reports */ |
2355 |
+ |
2356 |
+- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; |
2357 |
++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); |
2358 |
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); |
2359 |
+ for (psf = pmc->sources; psf; psf = psf->sf_next) |
2360 |
+ psf->sf_crcount = 0; |
2361 |
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c |
2362 |
+index f1fd26bb199ce..78460eb39b3af 100644 |
2363 |
+--- a/net/ipv4/tcp.c |
2364 |
++++ b/net/ipv4/tcp.c |
2365 |
+@@ -698,7 +698,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, |
2366 |
+ int size_goal) |
2367 |
+ { |
2368 |
+ return skb->len < size_goal && |
2369 |
+- sock_net(sk)->ipv4.sysctl_tcp_autocorking && |
2370 |
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) && |
2371 |
+ !tcp_rtx_queue_empty(sk) && |
2372 |
+ refcount_read(&sk->sk_wmem_alloc) > skb->truesize; |
2373 |
+ } |
2374 |
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c |
2375 |
+index d817f8c31c9ce..d35e88b5ffcbe 100644 |
2376 |
+--- a/net/ipv4/tcp_input.c |
2377 |
++++ b/net/ipv4/tcp_input.c |
2378 |
+@@ -503,7 +503,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) |
2379 |
+ */ |
2380 |
+ static void tcp_init_buffer_space(struct sock *sk) |
2381 |
+ { |
2382 |
+- int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; |
2383 |
++ int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win); |
2384 |
+ struct tcp_sock *tp = tcp_sk(sk); |
2385 |
+ int maxwin; |
2386 |
+ |
2387 |
+@@ -693,7 +693,7 @@ void tcp_rcv_space_adjust(struct sock *sk) |
2388 |
+ * <prev RTT . ><current RTT .. ><next RTT .... > |
2389 |
+ */ |
2390 |
+ |
2391 |
+- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && |
2392 |
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && |
2393 |
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { |
2394 |
+ int rcvmem, rcvbuf; |
2395 |
+ u64 rcvwin, grow; |
2396 |
+@@ -2135,7 +2135,7 @@ void tcp_enter_loss(struct sock *sk) |
2397 |
+ * loss recovery is underway except recurring timeout(s) on |
2398 |
+ * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing |
2399 |
+ */ |
2400 |
+- tp->frto = net->ipv4.sysctl_tcp_frto && |
2401 |
++ tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) && |
2402 |
+ (new_recovery || icsk->icsk_retransmits) && |
2403 |
+ !inet_csk(sk)->icsk_mtup.probe_size; |
2404 |
+ } |
2405 |
+@@ -3004,7 +3004,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, |
2406 |
+ |
2407 |
+ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag) |
2408 |
+ { |
2409 |
+- u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; |
2410 |
++ u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ; |
2411 |
+ struct tcp_sock *tp = tcp_sk(sk); |
2412 |
+ |
2413 |
+ if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) { |
2414 |
+@@ -3528,7 +3528,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, |
2415 |
+ if (*last_oow_ack_time) { |
2416 |
+ s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); |
2417 |
+ |
2418 |
+- if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) { |
2419 |
++ if (0 <= elapsed && |
2420 |
++ elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) { |
2421 |
+ NET_INC_STATS(net, mib_idx); |
2422 |
+ return true; /* rate-limited: don't send yet! */ |
2423 |
+ } |
2424 |
+@@ -3576,7 +3577,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) |
2425 |
+ /* Then check host-wide RFC 5961 rate limit. */ |
2426 |
+ now = jiffies / HZ; |
2427 |
+ if (now != challenge_timestamp) { |
2428 |
+- u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit; |
2429 |
++ u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); |
2430 |
+ u32 half = (ack_limit + 1) >> 1; |
2431 |
+ |
2432 |
+ challenge_timestamp = now; |
2433 |
+@@ -4367,7 +4368,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) |
2434 |
+ { |
2435 |
+ struct tcp_sock *tp = tcp_sk(sk); |
2436 |
+ |
2437 |
+- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { |
2438 |
++ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { |
2439 |
+ int mib_idx; |
2440 |
+ |
2441 |
+ if (before(seq, tp->rcv_nxt)) |
2442 |
+@@ -4414,7 +4415,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) |
2443 |
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); |
2444 |
+ tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); |
2445 |
+ |
2446 |
+- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { |
2447 |
++ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { |
2448 |
+ u32 end_seq = TCP_SKB_CB(skb)->end_seq; |
2449 |
+ |
2450 |
+ tcp_rcv_spurious_retrans(sk, skb); |
2451 |
+@@ -5439,7 +5440,7 @@ send_now: |
2452 |
+ } |
2453 |
+ |
2454 |
+ if (!tcp_is_sack(tp) || |
2455 |
+- tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr) |
2456 |
++ tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)) |
2457 |
+ goto send_now; |
2458 |
+ |
2459 |
+ if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) { |
2460 |
+@@ -5460,11 +5461,12 @@ send_now: |
2461 |
+ if (tp->srtt_us && tp->srtt_us < rtt) |
2462 |
+ rtt = tp->srtt_us; |
2463 |
+ |
2464 |
+- delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns, |
2465 |
++ delay = min_t(unsigned long, |
2466 |
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns), |
2467 |
+ rtt * (NSEC_PER_USEC >> 3)/20); |
2468 |
+ sock_hold(sk); |
2469 |
+ hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay), |
2470 |
+- sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns, |
2471 |
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns), |
2472 |
+ HRTIMER_MODE_REL_PINNED_SOFT); |
2473 |
+ } |
2474 |
+ |
2475 |
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c |
2476 |
+index d5f13ff7d9004..0d165ce2d80a7 100644 |
2477 |
+--- a/net/ipv4/tcp_ipv4.c |
2478 |
++++ b/net/ipv4/tcp_ipv4.c |
2479 |
+@@ -983,7 +983,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, |
2480 |
+ if (skb) { |
2481 |
+ __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); |
2482 |
+ |
2483 |
+- tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? |
2484 |
++ tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? |
2485 |
+ (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | |
2486 |
+ (inet_sk(sk)->tos & INET_ECN_MASK) : |
2487 |
+ inet_sk(sk)->tos; |
2488 |
+@@ -1558,7 +1558,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, |
2489 |
+ /* Set ToS of the new socket based upon the value of incoming SYN. |
2490 |
+ * ECT bits are set later in tcp_init_transfer(). |
2491 |
+ */ |
2492 |
+- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) |
2493 |
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) |
2494 |
+ newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; |
2495 |
+ |
2496 |
+ if (!dst) { |
2497 |
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c |
2498 |
+index 8d7e32f4abf67..f3ca6eea2ca39 100644 |
2499 |
+--- a/net/ipv4/tcp_metrics.c |
2500 |
++++ b/net/ipv4/tcp_metrics.c |
2501 |
+@@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk) |
2502 |
+ int m; |
2503 |
+ |
2504 |
+ sk_dst_confirm(sk); |
2505 |
+- if (net->ipv4.sysctl_tcp_nometrics_save || !dst) |
2506 |
++ if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst) |
2507 |
+ return; |
2508 |
+ |
2509 |
+ rcu_read_lock(); |
2510 |
+@@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk) |
2511 |
+ |
2512 |
+ if (tcp_in_initial_slowstart(tp)) { |
2513 |
+ /* Slow start still did not finish. */ |
2514 |
+- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && |
2515 |
++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && |
2516 |
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { |
2517 |
+ val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); |
2518 |
+ if (val && (tp->snd_cwnd >> 1) > val) |
2519 |
+@@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk) |
2520 |
+ } else if (!tcp_in_slow_start(tp) && |
2521 |
+ icsk->icsk_ca_state == TCP_CA_Open) { |
2522 |
+ /* Cong. avoidance phase, cwnd is reliable. */ |
2523 |
+- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && |
2524 |
++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && |
2525 |
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) |
2526 |
+ tcp_metric_set(tm, TCP_METRIC_SSTHRESH, |
2527 |
+ max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); |
2528 |
+@@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk) |
2529 |
+ tcp_metric_set(tm, TCP_METRIC_CWND, |
2530 |
+ (val + tp->snd_ssthresh) >> 1); |
2531 |
+ } |
2532 |
+- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && |
2533 |
++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && |
2534 |
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { |
2535 |
+ val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); |
2536 |
+ if (val && tp->snd_ssthresh > val) |
2537 |
+@@ -463,7 +463,7 @@ void tcp_init_metrics(struct sock *sk) |
2538 |
+ if (tcp_metric_locked(tm, TCP_METRIC_CWND)) |
2539 |
+ tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND); |
2540 |
+ |
2541 |
+- val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ? |
2542 |
++ val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ? |
2543 |
+ 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH); |
2544 |
+ if (val) { |
2545 |
+ tp->snd_ssthresh = val; |
2546 |
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c |
2547 |
+index 9b67c61576e4c..657b0a4d93599 100644 |
2548 |
+--- a/net/ipv4/tcp_output.c |
2549 |
++++ b/net/ipv4/tcp_output.c |
2550 |
+@@ -167,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp, |
2551 |
+ if (tcp_packets_in_flight(tp) == 0) |
2552 |
+ tcp_ca_event(sk, CA_EVENT_TX_START); |
2553 |
+ |
2554 |
+- /* If this is the first data packet sent in response to the |
2555 |
+- * previous received data, |
2556 |
+- * and it is a reply for ato after last received packet, |
2557 |
+- * increase pingpong count. |
2558 |
+- */ |
2559 |
+- if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) && |
2560 |
+- (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) |
2561 |
+- inet_csk_inc_pingpong_cnt(sk); |
2562 |
+- |
2563 |
+ tp->lsndtime = now; |
2564 |
++ |
2565 |
++ /* If it is a reply for ato after last received |
2566 |
++ * packet, enter pingpong mode. |
2567 |
++ */ |
2568 |
++ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) |
2569 |
++ inet_csk_enter_pingpong_mode(sk); |
2570 |
+ } |
2571 |
+ |
2572 |
+ /* Account for an ACK we sent. */ |
2573 |
+@@ -1987,7 +1984,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) |
2574 |
+ |
2575 |
+ min_tso = ca_ops->min_tso_segs ? |
2576 |
+ ca_ops->min_tso_segs(sk) : |
2577 |
+- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; |
2578 |
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); |
2579 |
+ |
2580 |
+ tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); |
2581 |
+ return min_t(u32, tso_segs, sk->sk_gso_max_segs); |
2582 |
+@@ -2502,7 +2499,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, |
2583 |
+ sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); |
2584 |
+ if (sk->sk_pacing_status == SK_PACING_NONE) |
2585 |
+ limit = min_t(unsigned long, limit, |
2586 |
+- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); |
2587 |
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes)); |
2588 |
+ limit <<= factor; |
2589 |
+ |
2590 |
+ if (static_branch_unlikely(&tcp_tx_delay_enabled) && |
2591 |
+diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c |
2592 |
+index 6ac88fe24a8e0..135e3a060caa8 100644 |
2593 |
+--- a/net/ipv6/ping.c |
2594 |
++++ b/net/ipv6/ping.c |
2595 |
+@@ -22,6 +22,11 @@ |
2596 |
+ #include <linux/proc_fs.h> |
2597 |
+ #include <net/ping.h> |
2598 |
+ |
2599 |
++static void ping_v6_destroy(struct sock *sk) |
2600 |
++{ |
2601 |
++ inet6_destroy_sock(sk); |
2602 |
++} |
2603 |
++ |
2604 |
+ /* Compatibility glue so we can support IPv6 when it's compiled as a module */ |
2605 |
+ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, |
2606 |
+ int *addr_len) |
2607 |
+@@ -166,6 +171,7 @@ struct proto pingv6_prot = { |
2608 |
+ .owner = THIS_MODULE, |
2609 |
+ .init = ping_init_sock, |
2610 |
+ .close = ping_close, |
2611 |
++ .destroy = ping_v6_destroy, |
2612 |
+ .connect = ip6_datagram_connect_v6_only, |
2613 |
+ .disconnect = __udp_disconnect, |
2614 |
+ .setsockopt = ipv6_setsockopt, |
2615 |
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c |
2616 |
+index 303b54414a6cc..8d91f36cb11bc 100644 |
2617 |
+--- a/net/ipv6/tcp_ipv6.c |
2618 |
++++ b/net/ipv6/tcp_ipv6.c |
2619 |
+@@ -542,7 +542,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, |
2620 |
+ if (np->repflow && ireq->pktopts) |
2621 |
+ fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); |
2622 |
+ |
2623 |
+- tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? |
2624 |
++ tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? |
2625 |
+ (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | |
2626 |
+ (np->tclass & INET_ECN_MASK) : |
2627 |
+ np->tclass; |
2628 |
+@@ -1344,7 +1344,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * |
2629 |
+ /* Set ToS of the new socket based upon the value of incoming SYN. |
2630 |
+ * ECT bits are set later in tcp_init_transfer(). |
2631 |
+ */ |
2632 |
+- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) |
2633 |
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) |
2634 |
+ newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; |
2635 |
+ |
2636 |
+ /* Clone native IPv6 options from listening socket (if any) |
2637 |
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c |
2638 |
+index 8123c79e27913..d0e91aa7b30e5 100644 |
2639 |
+--- a/net/mptcp/protocol.c |
2640 |
++++ b/net/mptcp/protocol.c |
2641 |
+@@ -1421,7 +1421,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) |
2642 |
+ if (msk->rcvq_space.copied <= msk->rcvq_space.space) |
2643 |
+ goto new_measure; |
2644 |
+ |
2645 |
+- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && |
2646 |
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && |
2647 |
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { |
2648 |
+ int rcvmem, rcvbuf; |
2649 |
+ u64 rcvwin, grow; |
2650 |
+diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c |
2651 |
+index 1640da5c50776..72d30922ed290 100644 |
2652 |
+--- a/net/netfilter/nfnetlink_queue.c |
2653 |
++++ b/net/netfilter/nfnetlink_queue.c |
2654 |
+@@ -838,11 +838,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) |
2655 |
+ } |
2656 |
+ |
2657 |
+ static int |
2658 |
+-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) |
2659 |
++nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff) |
2660 |
+ { |
2661 |
+ struct sk_buff *nskb; |
2662 |
+ |
2663 |
+ if (diff < 0) { |
2664 |
++ unsigned int min_len = skb_transport_offset(e->skb); |
2665 |
++ |
2666 |
++ if (data_len < min_len) |
2667 |
++ return -EINVAL; |
2668 |
++ |
2669 |
+ if (pskb_trim(e->skb, data_len)) |
2670 |
+ return -ENOMEM; |
2671 |
+ } else if (diff > 0) { |
2672 |
+diff --git a/net/sctp/associola.c b/net/sctp/associola.c |
2673 |
+index fdb69d46276d6..2d4ec61877553 100644 |
2674 |
+--- a/net/sctp/associola.c |
2675 |
++++ b/net/sctp/associola.c |
2676 |
+@@ -226,9 +226,8 @@ static struct sctp_association *sctp_association_init( |
2677 |
+ if (!sctp_ulpq_init(&asoc->ulpq, asoc)) |
2678 |
+ goto fail_init; |
2679 |
+ |
2680 |
+- if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, |
2681 |
+- 0, gfp)) |
2682 |
+- goto fail_init; |
2683 |
++ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp)) |
2684 |
++ goto stream_free; |
2685 |
+ |
2686 |
+ /* Initialize default path MTU. */ |
2687 |
+ asoc->pathmtu = sp->pathmtu; |
2688 |
+diff --git a/net/sctp/stream.c b/net/sctp/stream.c |
2689 |
+index 6dc95dcc0ff4f..ef9fceadef8d5 100644 |
2690 |
+--- a/net/sctp/stream.c |
2691 |
++++ b/net/sctp/stream.c |
2692 |
+@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, |
2693 |
+ |
2694 |
+ ret = sctp_stream_alloc_out(stream, outcnt, gfp); |
2695 |
+ if (ret) |
2696 |
+- goto out_err; |
2697 |
++ return ret; |
2698 |
+ |
2699 |
+ for (i = 0; i < stream->outcnt; i++) |
2700 |
+ SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; |
2701 |
+@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, |
2702 |
+ handle_in: |
2703 |
+ sctp_stream_interleave_init(stream); |
2704 |
+ if (!incnt) |
2705 |
+- goto out; |
2706 |
+- |
2707 |
+- ret = sctp_stream_alloc_in(stream, incnt, gfp); |
2708 |
+- if (ret) |
2709 |
+- goto in_err; |
2710 |
+- |
2711 |
+- goto out; |
2712 |
++ return 0; |
2713 |
+ |
2714 |
+-in_err: |
2715 |
+- sched->free(stream); |
2716 |
+- genradix_free(&stream->in); |
2717 |
+-out_err: |
2718 |
+- genradix_free(&stream->out); |
2719 |
+- stream->outcnt = 0; |
2720 |
+-out: |
2721 |
+- return ret; |
2722 |
++ return sctp_stream_alloc_in(stream, incnt, gfp); |
2723 |
+ } |
2724 |
+ |
2725 |
+ int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) |
2726 |
+diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c |
2727 |
+index 99e5f69fbb742..a2e1d34f52c5b 100644 |
2728 |
+--- a/net/sctp/stream_sched.c |
2729 |
++++ b/net/sctp/stream_sched.c |
2730 |
+@@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc, |
2731 |
+ if (!SCTP_SO(&asoc->stream, i)->ext) |
2732 |
+ continue; |
2733 |
+ |
2734 |
+- ret = n->init_sid(&asoc->stream, i, GFP_KERNEL); |
2735 |
++ ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC); |
2736 |
+ if (ret) |
2737 |
+ goto err; |
2738 |
+ } |
2739 |
+diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c |
2740 |
+index 23eab7ac43ee5..5cb6846544cc7 100644 |
2741 |
+--- a/net/tls/tls_device.c |
2742 |
++++ b/net/tls/tls_device.c |
2743 |
+@@ -1349,8 +1349,13 @@ static int tls_device_down(struct net_device *netdev) |
2744 |
+ * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW. |
2745 |
+ * Now release the ref taken above. |
2746 |
+ */ |
2747 |
+- if (refcount_dec_and_test(&ctx->refcount)) |
2748 |
++ if (refcount_dec_and_test(&ctx->refcount)) { |
2749 |
++ /* sk_destruct ran after tls_device_down took a ref, and |
2750 |
++ * it returned early. Complete the destruction here. |
2751 |
++ */ |
2752 |
++ list_del(&ctx->list); |
2753 |
+ tls_device_free_ctx(ctx); |
2754 |
++ } |
2755 |
+ } |
2756 |
+ |
2757 |
+ up_write(&device_offload_lock); |
2758 |
+diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h |
2759 |
+index e440cd7f32a6f..b9ee2ded381ab 100644 |
2760 |
+--- a/tools/include/uapi/linux/bpf.h |
2761 |
++++ b/tools/include/uapi/linux/bpf.h |
2762 |
+@@ -5006,7 +5006,10 @@ struct bpf_pidns_info { |
2763 |
+ |
2764 |
+ /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ |
2765 |
+ struct bpf_sk_lookup { |
2766 |
+- __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ |
2767 |
++ union { |
2768 |
++ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ |
2769 |
++ __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */ |
2770 |
++ }; |
2771 |
+ |
2772 |
+ __u32 family; /* Protocol family (AF_INET, AF_INET6) */ |
2773 |
+ __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ |
2774 |
+diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c |
2775 |
+index 94809aed8b447..1cab29d45bfb3 100644 |
2776 |
+--- a/tools/perf/util/symbol-elf.c |
2777 |
++++ b/tools/perf/util/symbol-elf.c |
2778 |
+@@ -232,6 +232,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, |
2779 |
+ return NULL; |
2780 |
+ } |
2781 |
+ |
2782 |
++static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr) |
2783 |
++{ |
2784 |
++ size_t i, phdrnum; |
2785 |
++ u64 sz; |
2786 |
++ |
2787 |
++ if (elf_getphdrnum(elf, &phdrnum)) |
2788 |
++ return -1; |
2789 |
++ |
2790 |
++ for (i = 0; i < phdrnum; i++) { |
2791 |
++ if (gelf_getphdr(elf, i, phdr) == NULL) |
2792 |
++ return -1; |
2793 |
++ |
2794 |
++ if (phdr->p_type != PT_LOAD) |
2795 |
++ continue; |
2796 |
++ |
2797 |
++ sz = max(phdr->p_memsz, phdr->p_filesz); |
2798 |
++ if (!sz) |
2799 |
++ continue; |
2800 |
++ |
2801 |
++ if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz)) |
2802 |
++ return 0; |
2803 |
++ } |
2804 |
++ |
2805 |
++ /* Not found any valid program header */ |
2806 |
++ return -1; |
2807 |
++} |
2808 |
++ |
2809 |
+ static bool want_demangle(bool is_kernel_sym) |
2810 |
+ { |
2811 |
+ return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; |
2812 |
+@@ -1181,6 +1208,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, |
2813 |
+ sym.st_value); |
2814 |
+ used_opd = true; |
2815 |
+ } |
2816 |
++ |
2817 |
+ /* |
2818 |
+ * When loading symbols in a data mapping, ABS symbols (which |
2819 |
+ * has a value of SHN_ABS in its st_shndx) failed at |
2820 |
+@@ -1217,11 +1245,20 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, |
2821 |
+ goto out_elf_end; |
2822 |
+ } else if ((used_opd && runtime_ss->adjust_symbols) || |
2823 |
+ (!used_opd && syms_ss->adjust_symbols)) { |
2824 |
++ GElf_Phdr phdr; |
2825 |
++ |
2826 |
++ if (elf_read_program_header(syms_ss->elf, |
2827 |
++ (u64)sym.st_value, &phdr)) { |
2828 |
++ pr_warning("%s: failed to find program header for " |
2829 |
++ "symbol: %s st_value: %#" PRIx64 "\n", |
2830 |
++ __func__, elf_name, (u64)sym.st_value); |
2831 |
++ continue; |
2832 |
++ } |
2833 |
+ pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " |
2834 |
+- "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, |
2835 |
+- (u64)sym.st_value, (u64)shdr.sh_addr, |
2836 |
+- (u64)shdr.sh_offset); |
2837 |
+- sym.st_value -= shdr.sh_addr - shdr.sh_offset; |
2838 |
++ "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n", |
2839 |
++ __func__, (u64)sym.st_value, (u64)phdr.p_vaddr, |
2840 |
++ (u64)phdr.p_offset); |
2841 |
++ sym.st_value -= phdr.p_vaddr - phdr.p_offset; |
2842 |
+ } |
2843 |
+ |
2844 |
+ demangled = demangle_sym(dso, kmodule, elf_name); |
2845 |
+diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c |
2846 |
+index a4c55fcb0e7b1..0fb92d9a319b7 100644 |
2847 |
+--- a/tools/testing/selftests/bpf/test_verifier.c |
2848 |
++++ b/tools/testing/selftests/bpf/test_verifier.c |
2849 |
+@@ -100,7 +100,7 @@ struct bpf_test { |
2850 |
+ enum bpf_prog_type prog_type; |
2851 |
+ uint8_t flags; |
2852 |
+ void (*fill_helper)(struct bpf_test *self); |
2853 |
+- uint8_t runs; |
2854 |
++ int runs; |
2855 |
+ #define bpf_testdata_struct_t \ |
2856 |
+ struct { \ |
2857 |
+ uint32_t retval, retval_unpriv; \ |
2858 |
+@@ -1054,7 +1054,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, |
2859 |
+ |
2860 |
+ run_errs = 0; |
2861 |
+ run_successes = 0; |
2862 |
+- if (!alignment_prevented_execution && fd_prog >= 0) { |
2863 |
++ if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) { |
2864 |
+ uint32_t expected_val; |
2865 |
+ int i; |
2866 |
+ |
2867 |
+diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c |
2868 |
+index 2ad5f974451c3..fd3b62a084b9f 100644 |
2869 |
+--- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c |
2870 |
++++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c |
2871 |
+@@ -239,6 +239,7 @@ |
2872 |
+ .result = ACCEPT, |
2873 |
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP, |
2874 |
+ .expected_attach_type = BPF_SK_LOOKUP, |
2875 |
++ .runs = -1, |
2876 |
+ }, |
2877 |
+ /* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */ |
2878 |
+ { |