Gentoo Archives: gentoo-commits

From: Alice Ferrazzi <alicef@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:5.10 commit in: /
Date: Wed, 03 Aug 2022 14:25:01
Message-Id: 1659535964.206a5e2746ef7fe6e5960e2af948e1eedef7e208.alicef@gentoo
1 commit: 206a5e2746ef7fe6e5960e2af948e1eedef7e208
2 Author: Alice Ferrazzi <alicef <AT> gentoo <DOT> org>
3 AuthorDate: Wed Aug 3 14:12:37 2022 +0000
4 Commit: Alice Ferrazzi <alicef <AT> gentoo <DOT> org>
5 CommitDate: Wed Aug 3 14:12:44 2022 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=206a5e27
7
8 Linux patch 5.10.135
9
10 Signed-off-by: Alice Ferrazzi <alicef <AT> gentoo.org>
11
12 0000_README | 4 +
13 1134_linux-5.10.135.patch | 2841 +++++++++++++++++++++++++++++++++++++++++++++
14 2 files changed, 2845 insertions(+)
15
16 diff --git a/0000_README b/0000_README
17 index 7292c57d..19bd6321 100644
18 --- a/0000_README
19 +++ b/0000_README
20 @@ -579,6 +579,10 @@ Patch: 1133_linux-5.10.134.patch
21 From: http://www.kernel.org
22 Desc: Linux 5.10.134
23
24 +Patch: 1134_linux-5.10.135.patch
25 +From: http://www.kernel.org
26 +Desc: Linux 5.10.135
27 +
28 Patch: 1500_XATTR_USER_PREFIX.patch
29 From: https://bugs.gentoo.org/show_bug.cgi?id=470644
30 Desc: Support for namespace user.pax.* on tmpfs.
31
32 diff --git a/1134_linux-5.10.135.patch b/1134_linux-5.10.135.patch
33 new file mode 100644
34 index 00000000..435afe17
35 --- /dev/null
36 +++ b/1134_linux-5.10.135.patch
37 @@ -0,0 +1,2841 @@
38 +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
39 +index 1a58c580b2366..8b7c26d090459 100644
40 +--- a/Documentation/admin-guide/kernel-parameters.txt
41 ++++ b/Documentation/admin-guide/kernel-parameters.txt
42 +@@ -2873,6 +2873,7 @@
43 + no_entry_flush [PPC]
44 + no_uaccess_flush [PPC]
45 + mmio_stale_data=off [X86]
46 ++ retbleed=off [X86]
47 +
48 + Exceptions:
49 + This does not have any effect on
50 +@@ -2895,6 +2896,7 @@
51 + mds=full,nosmt [X86]
52 + tsx_async_abort=full,nosmt [X86]
53 + mmio_stale_data=full,nosmt [X86]
54 ++ retbleed=auto,nosmt [X86]
55 +
56 + mminit_loglevel=
57 + [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
58 +diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
59 +index 0b1f3235aa773..0158dff638873 100644
60 +--- a/Documentation/networking/ip-sysctl.rst
61 ++++ b/Documentation/networking/ip-sysctl.rst
62 +@@ -2629,7 +2629,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
63 + Default: 4K
64 +
65 + sctp_wmem - vector of 3 INTEGERs: min, default, max
66 +- Currently this tunable has no effect.
67 ++ Only the first value ("min") is used, "default" and "max" are
68 ++ ignored.
69 ++
70 ++ min: Minimum size of send buffer that can be used by SCTP sockets.
71 ++ It is guaranteed to each SCTP socket (but not association) even
72 ++ under moderate memory pressure.
73 ++
74 ++ Default: 4K
75 +
76 + addr_scope_policy - INTEGER
77 + Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
78 +diff --git a/Makefile b/Makefile
79 +index 00dddc2ac804a..5f4dbcb433075 100644
80 +--- a/Makefile
81 ++++ b/Makefile
82 +@@ -1,7 +1,7 @@
83 + # SPDX-License-Identifier: GPL-2.0
84 + VERSION = 5
85 + PATCHLEVEL = 10
86 +-SUBLEVEL = 134
87 ++SUBLEVEL = 135
88 + EXTRAVERSION =
89 + NAME = Dare mighty things
90 +
91 +diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h
92 +index a81dda65c5762..45180a2cc47cb 100644
93 +--- a/arch/arm/include/asm/dma.h
94 ++++ b/arch/arm/include/asm/dma.h
95 +@@ -10,7 +10,7 @@
96 + #else
97 + #define MAX_DMA_ADDRESS ({ \
98 + extern phys_addr_t arm_dma_zone_size; \
99 +- arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \
100 ++ arm_dma_zone_size && arm_dma_zone_size < (0x100000000ULL - PAGE_OFFSET) ? \
101 + (PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; })
102 + #endif
103 +
104 +diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c
105 +index b99dd8e1c93f1..7ba6cf8261626 100644
106 +--- a/arch/arm/lib/xor-neon.c
107 ++++ b/arch/arm/lib/xor-neon.c
108 +@@ -26,8 +26,9 @@ MODULE_LICENSE("GPL");
109 + * While older versions of GCC do not generate incorrect code, they fail to
110 + * recognize the parallel nature of these functions, and emit plain ARM code,
111 + * which is known to be slower than the optimized ARM code in asm-arm/xor.h.
112 ++ *
113 ++ * #warning This code requires at least version 4.6 of GCC
114 + */
115 +-#warning This code requires at least version 4.6 of GCC
116 + #endif
117 +
118 + #pragma GCC diagnostic ignored "-Wunused-variable"
119 +diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
120 +index 2c6e1c6ecbe78..4120c428dc378 100644
121 +--- a/arch/s390/include/asm/archrandom.h
122 ++++ b/arch/s390/include/asm/archrandom.h
123 +@@ -2,7 +2,7 @@
124 + /*
125 + * Kernel interface for the s390 arch_random_* functions
126 + *
127 +- * Copyright IBM Corp. 2017, 2020
128 ++ * Copyright IBM Corp. 2017, 2022
129 + *
130 + * Author: Harald Freudenberger <freude@××××××.com>
131 + *
132 +@@ -14,6 +14,7 @@
133 + #ifdef CONFIG_ARCH_RANDOM
134 +
135 + #include <linux/static_key.h>
136 ++#include <linux/preempt.h>
137 + #include <linux/atomic.h>
138 + #include <asm/cpacf.h>
139 +
140 +@@ -32,7 +33,8 @@ static inline bool __must_check arch_get_random_int(unsigned int *v)
141 +
142 + static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
143 + {
144 +- if (static_branch_likely(&s390_arch_random_available)) {
145 ++ if (static_branch_likely(&s390_arch_random_available) &&
146 ++ in_task()) {
147 + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
148 + atomic64_add(sizeof(*v), &s390_arch_random_counter);
149 + return true;
150 +@@ -42,7 +44,8 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
151 +
152 + static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
153 + {
154 +- if (static_branch_likely(&s390_arch_random_available)) {
155 ++ if (static_branch_likely(&s390_arch_random_available) &&
156 ++ in_task()) {
157 + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
158 + atomic64_add(sizeof(*v), &s390_arch_random_counter);
159 + return true;
160 +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
161 +index 7896b67dda420..2e5762faf7740 100644
162 +--- a/arch/x86/kernel/cpu/bugs.c
163 ++++ b/arch/x86/kernel/cpu/bugs.c
164 +@@ -1476,6 +1476,7 @@ static void __init spectre_v2_select_mitigation(void)
165 + * enable IBRS around firmware calls.
166 + */
167 + if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
168 ++ boot_cpu_has(X86_FEATURE_IBPB) &&
169 + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
170 + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) {
171 +
172 +diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
173 +index a918ca93e4f7d..df5897c90becc 100644
174 +--- a/drivers/edac/ghes_edac.c
175 ++++ b/drivers/edac/ghes_edac.c
176 +@@ -101,9 +101,14 @@ static void dimm_setup_label(struct dimm_info *dimm, u16 handle)
177 +
178 + dmi_memdev_name(handle, &bank, &device);
179 +
180 +- /* both strings must be non-zero */
181 +- if (bank && *bank && device && *device)
182 +- snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device);
183 ++ /*
184 ++ * Set to a NULL string when both bank and device are zero. In this case,
185 ++ * the label assigned by default will be preserved.
186 ++ */
187 ++ snprintf(dimm->label, sizeof(dimm->label), "%s%s%s",
188 ++ (bank && *bank) ? bank : "",
189 ++ (bank && *bank && device && *device) ? " " : "",
190 ++ (device && *device) ? device : "");
191 + }
192 +
193 + static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry)
194 +diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
195 +index 92987daa5e17d..5e72e6cb2f840 100644
196 +--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
197 ++++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
198 +@@ -679,7 +679,11 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
199 + goto out_free_dma;
200 +
201 + for (i = 0; i < npages; i += max) {
202 +- args.end = start + (max << PAGE_SHIFT);
203 ++ if (args.start + (max << PAGE_SHIFT) > end)
204 ++ args.end = end;
205 ++ else
206 ++ args.end = args.start + (max << PAGE_SHIFT);
207 ++
208 + ret = migrate_vma_setup(&args);
209 + if (ret)
210 + goto out_free_pfns;
211 +diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
212 +index 11d4e3ba9af4c..1dad62ecb8a3a 100644
213 +--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
214 ++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
215 +@@ -1907,11 +1907,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
216 + * non-zero req_queue_pairs says that user requested a new
217 + * queue count via ethtool's set_channels, so use this
218 + * value for queues distribution across traffic classes
219 ++ * We need at least one queue pair for the interface
220 ++ * to be usable as we see in else statement.
221 + */
222 + if (vsi->req_queue_pairs > 0)
223 + vsi->num_queue_pairs = vsi->req_queue_pairs;
224 + else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
225 + vsi->num_queue_pairs = pf->num_lan_msix;
226 ++ else
227 ++ vsi->num_queue_pairs = 1;
228 + }
229 +
230 + /* Number of queues per enabled TC */
231 +diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
232 +index 060897eb9cabe..7f1bf71844bce 100644
233 +--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
234 ++++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
235 +@@ -652,7 +652,8 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
236 + rx_desc = ICE_RX_DESC(rx_ring, i);
237 +
238 + if (!(rx_desc->wb.status_error0 &
239 +- cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)))
240 ++ (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) |
241 ++ cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)))))
242 + continue;
243 +
244 + rx_buf = &rx_ring->rx_buf[i];
245 +diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
246 +index aae79fdd51727..810f2bdb91645 100644
247 +--- a/drivers/net/ethernet/intel/ice/ice_main.c
248 ++++ b/drivers/net/ethernet/intel/ice/ice_main.c
249 +@@ -5203,10 +5203,12 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
250 + if (vsi->netdev) {
251 + ice_set_rx_mode(vsi->netdev);
252 +
253 +- err = ice_vsi_vlan_setup(vsi);
254 ++ if (vsi->type != ICE_VSI_LB) {
255 ++ err = ice_vsi_vlan_setup(vsi);
256 +
257 +- if (err)
258 +- return err;
259 ++ if (err)
260 ++ return err;
261 ++ }
262 + }
263 + ice_vsi_cfg_dcb_rings(vsi);
264 +
265 +diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
266 +index 725b0f38813a9..a2b4e3befa591 100644
267 +--- a/drivers/net/ethernet/sfc/ptp.c
268 ++++ b/drivers/net/ethernet/sfc/ptp.c
269 +@@ -1100,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb)
270 +
271 + tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type);
272 + if (tx_queue && tx_queue->timestamping) {
273 ++ /* This code invokes normal driver TX code which is always
274 ++ * protected from softirqs when called from generic TX code,
275 ++ * which in turn disables preemption. Look at __dev_queue_xmit
276 ++ * which uses rcu_read_lock_bh disabling preemption for RCU
277 ++ * plus disabling softirqs. We do not need RCU reader
278 ++ * protection here.
279 ++ *
280 ++ * Although it is theoretically safe for current PTP TX/RX code
281 ++ * running without disabling softirqs, there are three good
282 ++ * reasond for doing so:
283 ++ *
284 ++ * 1) The code invoked is mainly implemented for non-PTP
285 ++ * packets and it is always executed with softirqs
286 ++ * disabled.
287 ++ * 2) This being a single PTP packet, better to not
288 ++ * interrupt its processing by softirqs which can lead
289 ++ * to high latencies.
290 ++ * 3) netdev_xmit_more checks preemption is disabled and
291 ++ * triggers a BUG_ON if not.
292 ++ */
293 ++ local_bh_disable();
294 + efx_enqueue_skb(tx_queue, skb);
295 ++ local_bh_enable();
296 + } else {
297 + WARN_ONCE(1, "PTP channel has no timestamped tx queue\n");
298 + dev_kfree_skb_any(skb);
299 +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
300 +index 789a124809e3c..70c5905a916b9 100644
301 +--- a/drivers/net/macsec.c
302 ++++ b/drivers/net/macsec.c
303 +@@ -240,6 +240,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb)
304 + #define DEFAULT_SEND_SCI true
305 + #define DEFAULT_ENCRYPT false
306 + #define DEFAULT_ENCODING_SA 0
307 ++#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1))
308 +
309 + static bool send_sci(const struct macsec_secy *secy)
310 + {
311 +@@ -1694,7 +1695,7 @@ static bool validate_add_rxsa(struct nlattr **attrs)
312 + return false;
313 +
314 + if (attrs[MACSEC_SA_ATTR_PN] &&
315 +- *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0)
316 ++ nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
317 + return false;
318 +
319 + if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
320 +@@ -1750,7 +1751,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
321 + }
322 +
323 + pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN;
324 +- if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
325 ++ if (tb_sa[MACSEC_SA_ATTR_PN] &&
326 ++ nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
327 + pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n",
328 + nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len);
329 + rtnl_unlock();
330 +@@ -1766,7 +1768,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
331 + if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
332 + pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n",
333 + nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
334 +- MACSEC_SA_ATTR_SALT);
335 ++ MACSEC_SALT_LEN);
336 + rtnl_unlock();
337 + return -EINVAL;
338 + }
339 +@@ -1839,7 +1841,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
340 + return 0;
341 +
342 + cleanup:
343 +- kfree(rx_sa);
344 ++ macsec_rxsa_put(rx_sa);
345 + rtnl_unlock();
346 + return err;
347 + }
348 +@@ -1936,7 +1938,7 @@ static bool validate_add_txsa(struct nlattr **attrs)
349 + if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
350 + return false;
351 +
352 +- if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
353 ++ if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
354 + return false;
355 +
356 + if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
357 +@@ -2008,7 +2010,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
358 + if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
359 + pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n",
360 + nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
361 +- MACSEC_SA_ATTR_SALT);
362 ++ MACSEC_SALT_LEN);
363 + rtnl_unlock();
364 + return -EINVAL;
365 + }
366 +@@ -2082,7 +2084,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
367 +
368 + cleanup:
369 + secy->operational = was_operational;
370 +- kfree(tx_sa);
371 ++ macsec_txsa_put(tx_sa);
372 + rtnl_unlock();
373 + return err;
374 + }
375 +@@ -2290,7 +2292,7 @@ static bool validate_upd_sa(struct nlattr **attrs)
376 + if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
377 + return false;
378 +
379 +- if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
380 ++ if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
381 + return false;
382 +
383 + if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
384 +@@ -3737,9 +3739,6 @@ static int macsec_changelink_common(struct net_device *dev,
385 + secy->operational = tx_sa && tx_sa->active;
386 + }
387 +
388 +- if (data[IFLA_MACSEC_WINDOW])
389 +- secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
390 +-
391 + if (data[IFLA_MACSEC_ENCRYPT])
392 + tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]);
393 +
394 +@@ -3785,6 +3784,16 @@ static int macsec_changelink_common(struct net_device *dev,
395 + }
396 + }
397 +
398 ++ if (data[IFLA_MACSEC_WINDOW]) {
399 ++ secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
400 ++
401 ++ /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window
402 ++ * for XPN cipher suites */
403 ++ if (secy->xpn &&
404 ++ secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW)
405 ++ return -EINVAL;
406 ++ }
407 ++
408 + return 0;
409 + }
410 +
411 +@@ -3814,7 +3823,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[],
412 +
413 + ret = macsec_changelink_common(dev, data);
414 + if (ret)
415 +- return ret;
416 ++ goto cleanup;
417 +
418 + /* If h/w offloading is available, propagate to the device */
419 + if (macsec_is_offloaded(macsec)) {
420 +diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c
421 +index 291fa449993fb..45f295403cb55 100644
422 +--- a/drivers/net/sungem_phy.c
423 ++++ b/drivers/net/sungem_phy.c
424 +@@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy)
425 + int can_low_power = 1;
426 + if (np == NULL || of_get_property(np, "no-autolowpower", NULL))
427 + can_low_power = 0;
428 ++ of_node_put(np);
429 + if (can_low_power) {
430 + /* Enable automatic low-power */
431 + sungem_phy_write(phy, 0x1c, 0x9002);
432 +diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
433 +index 37178b078ee37..0a07c05a610d1 100644
434 +--- a/drivers/net/virtio_net.c
435 ++++ b/drivers/net/virtio_net.c
436 +@@ -213,9 +213,15 @@ struct virtnet_info {
437 + /* Packet virtio header size */
438 + u8 hdr_len;
439 +
440 +- /* Work struct for refilling if we run low on memory. */
441 ++ /* Work struct for delayed refilling if we run low on memory. */
442 + struct delayed_work refill;
443 +
444 ++ /* Is delayed refill enabled? */
445 ++ bool refill_enabled;
446 ++
447 ++ /* The lock to synchronize the access to refill_enabled */
448 ++ spinlock_t refill_lock;
449 ++
450 + /* Work struct for config space updates */
451 + struct work_struct config_work;
452 +
453 +@@ -319,6 +325,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
454 + return p;
455 + }
456 +
457 ++static void enable_delayed_refill(struct virtnet_info *vi)
458 ++{
459 ++ spin_lock_bh(&vi->refill_lock);
460 ++ vi->refill_enabled = true;
461 ++ spin_unlock_bh(&vi->refill_lock);
462 ++}
463 ++
464 ++static void disable_delayed_refill(struct virtnet_info *vi)
465 ++{
466 ++ spin_lock_bh(&vi->refill_lock);
467 ++ vi->refill_enabled = false;
468 ++ spin_unlock_bh(&vi->refill_lock);
469 ++}
470 ++
471 + static void virtqueue_napi_schedule(struct napi_struct *napi,
472 + struct virtqueue *vq)
473 + {
474 +@@ -1403,8 +1423,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
475 + }
476 +
477 + if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
478 +- if (!try_fill_recv(vi, rq, GFP_ATOMIC))
479 +- schedule_delayed_work(&vi->refill, 0);
480 ++ if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
481 ++ spin_lock(&vi->refill_lock);
482 ++ if (vi->refill_enabled)
483 ++ schedule_delayed_work(&vi->refill, 0);
484 ++ spin_unlock(&vi->refill_lock);
485 ++ }
486 + }
487 +
488 + u64_stats_update_begin(&rq->stats.syncp);
489 +@@ -1523,6 +1547,8 @@ static int virtnet_open(struct net_device *dev)
490 + struct virtnet_info *vi = netdev_priv(dev);
491 + int i, err;
492 +
493 ++ enable_delayed_refill(vi);
494 ++
495 + for (i = 0; i < vi->max_queue_pairs; i++) {
496 + if (i < vi->curr_queue_pairs)
497 + /* Make sure we have some buffers: if oom use wq. */
498 +@@ -1893,6 +1919,8 @@ static int virtnet_close(struct net_device *dev)
499 + struct virtnet_info *vi = netdev_priv(dev);
500 + int i;
501 +
502 ++ /* Make sure NAPI doesn't schedule refill work */
503 ++ disable_delayed_refill(vi);
504 + /* Make sure refill_work doesn't re-enable napi! */
505 + cancel_delayed_work_sync(&vi->refill);
506 +
507 +@@ -2390,6 +2418,8 @@ static int virtnet_restore_up(struct virtio_device *vdev)
508 +
509 + virtio_device_ready(vdev);
510 +
511 ++ enable_delayed_refill(vi);
512 ++
513 + if (netif_running(vi->dev)) {
514 + err = virtnet_open(vi->dev);
515 + if (err)
516 +@@ -3092,6 +3122,7 @@ static int virtnet_probe(struct virtio_device *vdev)
517 + vdev->priv = vi;
518 +
519 + INIT_WORK(&vi->config_work, virtnet_config_changed_work);
520 ++ spin_lock_init(&vi->refill_lock);
521 +
522 + /* If we can receive ANY GSO packets, we must allocate large ones. */
523 + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
524 +diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.c b/drivers/net/wireless/mediatek/mt7601u/usb.c
525 +index 6bcc4a13ae6c7..cc772045d526f 100644
526 +--- a/drivers/net/wireless/mediatek/mt7601u/usb.c
527 ++++ b/drivers/net/wireless/mediatek/mt7601u/usb.c
528 +@@ -26,6 +26,7 @@ static const struct usb_device_id mt7601u_device_table[] = {
529 + { USB_DEVICE(0x2717, 0x4106) },
530 + { USB_DEVICE(0x2955, 0x0001) },
531 + { USB_DEVICE(0x2955, 0x1001) },
532 ++ { USB_DEVICE(0x2955, 0x1003) },
533 + { USB_DEVICE(0x2a5f, 0x1000) },
534 + { USB_DEVICE(0x7392, 0x7710) },
535 + { 0, }
536 +diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c
537 +index 0f2430fb398db..576cc39077f32 100644
538 +--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
539 ++++ b/drivers/scsi/ufs/ufshcd-pltfrm.c
540 +@@ -107,9 +107,20 @@ out:
541 + return ret;
542 + }
543 +
544 ++static bool phandle_exists(const struct device_node *np,
545 ++ const char *phandle_name, int index)
546 ++{
547 ++ struct device_node *parse_np = of_parse_phandle(np, phandle_name, index);
548 ++
549 ++ if (parse_np)
550 ++ of_node_put(parse_np);
551 ++
552 ++ return parse_np != NULL;
553 ++}
554 ++
555 + #define MAX_PROP_SIZE 32
556 + static int ufshcd_populate_vreg(struct device *dev, const char *name,
557 +- struct ufs_vreg **out_vreg)
558 ++ struct ufs_vreg **out_vreg)
559 + {
560 + int ret = 0;
561 + char prop_name[MAX_PROP_SIZE];
562 +@@ -122,7 +133,7 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name,
563 + }
564 +
565 + snprintf(prop_name, MAX_PROP_SIZE, "%s-supply", name);
566 +- if (!of_parse_phandle(np, prop_name, 0)) {
567 ++ if (!phandle_exists(np, prop_name, 0)) {
568 + dev_info(dev, "%s: Unable to find %s regulator, assuming enabled\n",
569 + __func__, prop_name);
570 + goto out;
571 +diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
572 +index d563abc3e1364..914e991731300 100644
573 +--- a/fs/ntfs/attrib.c
574 ++++ b/fs/ntfs/attrib.c
575 +@@ -592,8 +592,12 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name,
576 + a = (ATTR_RECORD*)((u8*)ctx->attr +
577 + le32_to_cpu(ctx->attr->length));
578 + for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) {
579 +- if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec +
580 +- le32_to_cpu(ctx->mrec->bytes_allocated))
581 ++ u8 *mrec_end = (u8 *)ctx->mrec +
582 ++ le32_to_cpu(ctx->mrec->bytes_allocated);
583 ++ u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) +
584 ++ a->name_length * sizeof(ntfschar);
585 ++ if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end ||
586 ++ name_end > mrec_end)
587 + break;
588 + ctx->attr = a;
589 + if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) ||
590 +diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
591 +index 7993d527edae9..0a8cd8e59a92c 100644
592 +--- a/fs/ocfs2/ocfs2.h
593 ++++ b/fs/ocfs2/ocfs2.h
594 +@@ -279,7 +279,6 @@ enum ocfs2_mount_options
595 + OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
596 + OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
597 + OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
598 +- OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */
599 + };
600 +
601 + #define OCFS2_OSB_SOFT_RO 0x0001
602 +@@ -675,8 +674,7 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb)
603 +
604 + static inline int ocfs2_mount_local(struct ocfs2_super *osb)
605 + {
606 +- return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)
607 +- || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER));
608 ++ return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
609 + }
610 +
611 + static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
612 +diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
613 +index 4da0e4b1e79bf..8caeceeaeda7c 100644
614 +--- a/fs/ocfs2/slot_map.c
615 ++++ b/fs/ocfs2/slot_map.c
616 +@@ -254,16 +254,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
617 + int i, ret = -ENOSPC;
618 +
619 + if ((preferred >= 0) && (preferred < si->si_num_slots)) {
620 +- if (!si->si_slots[preferred].sl_valid ||
621 +- !si->si_slots[preferred].sl_node_num) {
622 ++ if (!si->si_slots[preferred].sl_valid) {
623 + ret = preferred;
624 + goto out;
625 + }
626 + }
627 +
628 + for(i = 0; i < si->si_num_slots; i++) {
629 +- if (!si->si_slots[i].sl_valid ||
630 +- !si->si_slots[i].sl_node_num) {
631 ++ if (!si->si_slots[i].sl_valid) {
632 + ret = i;
633 + break;
634 + }
635 +@@ -458,30 +456,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
636 + spin_lock(&osb->osb_lock);
637 + ocfs2_update_slot_info(si);
638 +
639 +- if (ocfs2_mount_local(osb))
640 +- /* use slot 0 directly in local mode */
641 +- slot = 0;
642 +- else {
643 +- /* search for ourselves first and take the slot if it already
644 +- * exists. Perhaps we need to mark this in a variable for our
645 +- * own journal recovery? Possibly not, though we certainly
646 +- * need to warn to the user */
647 +- slot = __ocfs2_node_num_to_slot(si, osb->node_num);
648 ++ /* search for ourselves first and take the slot if it already
649 ++ * exists. Perhaps we need to mark this in a variable for our
650 ++ * own journal recovery? Possibly not, though we certainly
651 ++ * need to warn to the user */
652 ++ slot = __ocfs2_node_num_to_slot(si, osb->node_num);
653 ++ if (slot < 0) {
654 ++ /* if no slot yet, then just take 1st available
655 ++ * one. */
656 ++ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
657 + if (slot < 0) {
658 +- /* if no slot yet, then just take 1st available
659 +- * one. */
660 +- slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
661 +- if (slot < 0) {
662 +- spin_unlock(&osb->osb_lock);
663 +- mlog(ML_ERROR, "no free slots available!\n");
664 +- status = -EINVAL;
665 +- goto bail;
666 +- }
667 +- } else
668 +- printk(KERN_INFO "ocfs2: Slot %d on device (%s) was "
669 +- "already allocated to this node!\n",
670 +- slot, osb->dev_str);
671 +- }
672 ++ spin_unlock(&osb->osb_lock);
673 ++ mlog(ML_ERROR, "no free slots available!\n");
674 ++ status = -EINVAL;
675 ++ goto bail;
676 ++ }
677 ++ } else
678 ++ printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
679 ++ "allocated to this node!\n", slot, osb->dev_str);
680 +
681 + ocfs2_set_slot(si, slot, osb->node_num);
682 + osb->slot_num = slot;
683 +diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
684 +index 477ad05a34ea2..c0e5f1bad499f 100644
685 +--- a/fs/ocfs2/super.c
686 ++++ b/fs/ocfs2/super.c
687 +@@ -175,7 +175,6 @@ enum {
688 + Opt_dir_resv_level,
689 + Opt_journal_async_commit,
690 + Opt_err_cont,
691 +- Opt_nocluster,
692 + Opt_err,
693 + };
694 +
695 +@@ -209,7 +208,6 @@ static const match_table_t tokens = {
696 + {Opt_dir_resv_level, "dir_resv_level=%u"},
697 + {Opt_journal_async_commit, "journal_async_commit"},
698 + {Opt_err_cont, "errors=continue"},
699 +- {Opt_nocluster, "nocluster"},
700 + {Opt_err, NULL}
701 + };
702 +
703 +@@ -621,13 +619,6 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
704 + goto out;
705 + }
706 +
707 +- tmp = OCFS2_MOUNT_NOCLUSTER;
708 +- if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
709 +- ret = -EINVAL;
710 +- mlog(ML_ERROR, "Cannot change nocluster option on remount\n");
711 +- goto out;
712 +- }
713 +-
714 + tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
715 + OCFS2_MOUNT_HB_NONE;
716 + if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
717 +@@ -868,7 +859,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
718 + }
719 +
720 + if (ocfs2_userspace_stack(osb) &&
721 +- !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
722 + strncmp(osb->osb_cluster_stack, mopt->cluster_stack,
723 + OCFS2_STACK_LABEL_LEN)) {
724 + mlog(ML_ERROR,
725 +@@ -1149,11 +1139,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
726 + osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
727 + "ordered");
728 +
729 +- if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
730 +- !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT))
731 +- printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted "
732 +- "without cluster aware mode.\n", osb->dev_str);
733 +-
734 + atomic_set(&osb->vol_state, VOLUME_MOUNTED);
735 + wake_up(&osb->osb_mount_event);
736 +
737 +@@ -1460,9 +1445,6 @@ static int ocfs2_parse_options(struct super_block *sb,
738 + case Opt_journal_async_commit:
739 + mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
740 + break;
741 +- case Opt_nocluster:
742 +- mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER;
743 +- break;
744 + default:
745 + mlog(ML_ERROR,
746 + "Unrecognized mount option \"%s\" "
747 +@@ -1574,9 +1556,6 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
748 + if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
749 + seq_printf(s, ",journal_async_commit");
750 +
751 +- if (opts & OCFS2_MOUNT_NOCLUSTER)
752 +- seq_printf(s, ",nocluster");
753 +-
754 + return 0;
755 + }
756 +
757 +diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
758 +index 8bd00da6d2a40..2f46ef3800aa2 100644
759 +--- a/fs/xfs/libxfs/xfs_log_format.h
760 ++++ b/fs/xfs/libxfs/xfs_log_format.h
761 +@@ -414,7 +414,16 @@ struct xfs_log_dinode {
762 + /* start of the extended dinode, writable fields */
763 + uint32_t di_crc; /* CRC of the inode */
764 + uint64_t di_changecount; /* number of attribute changes */
765 +- xfs_lsn_t di_lsn; /* flush sequence */
766 ++
767 ++ /*
768 ++ * The LSN we write to this field during formatting is not a reflection
769 ++ * of the current on-disk LSN. It should never be used for recovery
770 ++ * sequencing, nor should it be recovered into the on-disk inode at all.
771 ++ * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk()
772 ++ * for details.
773 ++ */
774 ++ xfs_lsn_t di_lsn;
775 ++
776 + uint64_t di_flags2; /* more random flags */
777 + uint32_t di_cowextsize; /* basic cow extent size for file */
778 + uint8_t di_pad2[12]; /* more padding for future expansion */
779 +diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
780 +index 397d94775440d..1ce06173c2f55 100644
781 +--- a/fs/xfs/libxfs/xfs_types.h
782 ++++ b/fs/xfs/libxfs/xfs_types.h
783 +@@ -21,6 +21,7 @@ typedef int32_t xfs_suminfo_t; /* type of bitmap summary info */
784 + typedef uint32_t xfs_rtword_t; /* word type for bitmap manipulations */
785 +
786 + typedef int64_t xfs_lsn_t; /* log sequence number */
787 ++typedef int64_t xfs_csn_t; /* CIL sequence number */
788 +
789 + typedef uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
790 + typedef uint32_t xfs_dahash_t; /* dir/attr hash value */
791 +diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
792 +index 8c6e26d62ef28..a3d5ecccfc2cc 100644
793 +--- a/fs/xfs/xfs_buf_item.c
794 ++++ b/fs/xfs/xfs_buf_item.c
795 +@@ -393,17 +393,8 @@ xfs_buf_item_pin(
796 + }
797 +
798 + /*
799 +- * This is called to unpin the buffer associated with the buf log
800 +- * item which was previously pinned with a call to xfs_buf_item_pin().
801 +- *
802 +- * Also drop the reference to the buf item for the current transaction.
803 +- * If the XFS_BLI_STALE flag is set and we are the last reference,
804 +- * then free up the buf log item and unlock the buffer.
805 +- *
806 +- * If the remove flag is set we are called from uncommit in the
807 +- * forced-shutdown path. If that is true and the reference count on
808 +- * the log item is going to drop to zero we need to free the item's
809 +- * descriptor in the transaction.
810 ++ * This is called to unpin the buffer associated with the buf log item which
811 ++ * was previously pinned with a call to xfs_buf_item_pin().
812 + */
813 + STATIC void
814 + xfs_buf_item_unpin(
815 +@@ -420,38 +411,35 @@ xfs_buf_item_unpin(
816 +
817 + trace_xfs_buf_item_unpin(bip);
818 +
819 ++ /*
820 ++ * Drop the bli ref associated with the pin and grab the hold required
821 ++ * for the I/O simulation failure in the abort case. We have to do this
822 ++ * before the pin count drops because the AIL doesn't acquire a bli
823 ++ * reference. Therefore if the refcount drops to zero, the bli could
824 ++ * still be AIL resident and the buffer submitted for I/O (and freed on
825 ++ * completion) at any point before we return. This can be removed once
826 ++ * the AIL properly holds a reference on the bli.
827 ++ */
828 + freed = atomic_dec_and_test(&bip->bli_refcount);
829 +-
830 ++ if (freed && !stale && remove)
831 ++ xfs_buf_hold(bp);
832 + if (atomic_dec_and_test(&bp->b_pin_count))
833 + wake_up_all(&bp->b_waiters);
834 +
835 +- if (freed && stale) {
836 ++ /* nothing to do but drop the pin count if the bli is active */
837 ++ if (!freed)
838 ++ return;
839 ++
840 ++ if (stale) {
841 + ASSERT(bip->bli_flags & XFS_BLI_STALE);
842 + ASSERT(xfs_buf_islocked(bp));
843 + ASSERT(bp->b_flags & XBF_STALE);
844 + ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
845 ++ ASSERT(list_empty(&lip->li_trans));
846 ++ ASSERT(!bp->b_transp);
847 +
848 + trace_xfs_buf_item_unpin_stale(bip);
849 +
850 +- if (remove) {
851 +- /*
852 +- * If we are in a transaction context, we have to
853 +- * remove the log item from the transaction as we are
854 +- * about to release our reference to the buffer. If we
855 +- * don't, the unlock that occurs later in
856 +- * xfs_trans_uncommit() will try to reference the
857 +- * buffer which we no longer have a hold on.
858 +- */
859 +- if (!list_empty(&lip->li_trans))
860 +- xfs_trans_del_item(lip);
861 +-
862 +- /*
863 +- * Since the transaction no longer refers to the buffer,
864 +- * the buffer should no longer refer to the transaction.
865 +- */
866 +- bp->b_transp = NULL;
867 +- }
868 +-
869 + /*
870 + * If we get called here because of an IO error, we may or may
871 + * not have the item on the AIL. xfs_trans_ail_delete() will
872 +@@ -468,13 +456,13 @@ xfs_buf_item_unpin(
873 + ASSERT(bp->b_log_item == NULL);
874 + }
875 + xfs_buf_relse(bp);
876 +- } else if (freed && remove) {
877 ++ } else if (remove) {
878 + /*
879 + * The buffer must be locked and held by the caller to simulate
880 +- * an async I/O failure.
881 ++ * an async I/O failure. We acquired the hold for this case
882 ++ * before the buffer was unpinned.
883 + */
884 + xfs_buf_lock(bp);
885 +- xfs_buf_hold(bp);
886 + bp->b_flags |= XBF_ASYNC;
887 + xfs_buf_ioend_fail(bp);
888 + }
889 +@@ -632,7 +620,7 @@ xfs_buf_item_release(
890 + STATIC void
891 + xfs_buf_item_committing(
892 + struct xfs_log_item *lip,
893 +- xfs_lsn_t commit_lsn)
894 ++ xfs_csn_t seq)
895 + {
896 + return xfs_buf_item_release(lip);
897 + }
898 +diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
899 +index 1d649462d731a..b374c9cee1177 100644
900 +--- a/fs/xfs/xfs_buf_item_recover.c
901 ++++ b/fs/xfs/xfs_buf_item_recover.c
902 +@@ -796,6 +796,7 @@ xlog_recover_get_buf_lsn(
903 + switch (magicda) {
904 + case XFS_DIR3_LEAF1_MAGIC:
905 + case XFS_DIR3_LEAFN_MAGIC:
906 ++ case XFS_ATTR3_LEAF_MAGIC:
907 + case XFS_DA3_NODE_MAGIC:
908 + lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
909 + uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
910 +diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
911 +index 8c1fdf37ee8f0..8ed47b739b6cc 100644
912 +--- a/fs/xfs/xfs_dquot_item.c
913 ++++ b/fs/xfs/xfs_dquot_item.c
914 +@@ -188,7 +188,7 @@ xfs_qm_dquot_logitem_release(
915 + STATIC void
916 + xfs_qm_dquot_logitem_committing(
917 + struct xfs_log_item *lip,
918 +- xfs_lsn_t commit_lsn)
919 ++ xfs_csn_t seq)
920 + {
921 + return xfs_qm_dquot_logitem_release(lip);
922 + }
923 +diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
924 +index 5b0f93f738372..4d6bf8d4974fe 100644
925 +--- a/fs/xfs/xfs_file.c
926 ++++ b/fs/xfs/xfs_file.c
927 +@@ -118,6 +118,54 @@ xfs_dir_fsync(
928 + return xfs_log_force_inode(ip);
929 + }
930 +
931 ++static xfs_csn_t
932 ++xfs_fsync_seq(
933 ++ struct xfs_inode *ip,
934 ++ bool datasync)
935 ++{
936 ++ if (!xfs_ipincount(ip))
937 ++ return 0;
938 ++ if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
939 ++ return 0;
940 ++ return ip->i_itemp->ili_commit_seq;
941 ++}
942 ++
943 ++/*
944 ++ * All metadata updates are logged, which means that we just have to flush the
945 ++ * log up to the latest LSN that touched the inode.
946 ++ *
947 ++ * If we have concurrent fsync/fdatasync() calls, we need them to all block on
948 ++ * the log force before we clear the ili_fsync_fields field. This ensures that
949 ++ * we don't get a racing sync operation that does not wait for the metadata to
950 ++ * hit the journal before returning. If we race with clearing ili_fsync_fields,
951 ++ * then all that will happen is the log force will do nothing as the lsn will
952 ++ * already be on disk. We can't race with setting ili_fsync_fields because that
953 ++ * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock
954 ++ * shared until after the ili_fsync_fields is cleared.
955 ++ */
956 ++static int
957 ++xfs_fsync_flush_log(
958 ++ struct xfs_inode *ip,
959 ++ bool datasync,
960 ++ int *log_flushed)
961 ++{
962 ++ int error = 0;
963 ++ xfs_csn_t seq;
964 ++
965 ++ xfs_ilock(ip, XFS_ILOCK_SHARED);
966 ++ seq = xfs_fsync_seq(ip, datasync);
967 ++ if (seq) {
968 ++ error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC,
969 ++ log_flushed);
970 ++
971 ++ spin_lock(&ip->i_itemp->ili_lock);
972 ++ ip->i_itemp->ili_fsync_fields = 0;
973 ++ spin_unlock(&ip->i_itemp->ili_lock);
974 ++ }
975 ++ xfs_iunlock(ip, XFS_ILOCK_SHARED);
976 ++ return error;
977 ++}
978 ++
979 + STATIC int
980 + xfs_file_fsync(
981 + struct file *file,
982 +@@ -125,13 +173,10 @@ xfs_file_fsync(
983 + loff_t end,
984 + int datasync)
985 + {
986 +- struct inode *inode = file->f_mapping->host;
987 +- struct xfs_inode *ip = XFS_I(inode);
988 +- struct xfs_inode_log_item *iip = ip->i_itemp;
989 ++ struct xfs_inode *ip = XFS_I(file->f_mapping->host);
990 + struct xfs_mount *mp = ip->i_mount;
991 + int error = 0;
992 + int log_flushed = 0;
993 +- xfs_lsn_t lsn = 0;
994 +
995 + trace_xfs_file_fsync(ip);
996 +
997 +@@ -155,33 +200,7 @@ xfs_file_fsync(
998 + else if (mp->m_logdev_targp != mp->m_ddev_targp)
999 + xfs_blkdev_issue_flush(mp->m_ddev_targp);
1000 +
1001 +- /*
1002 +- * All metadata updates are logged, which means that we just have to
1003 +- * flush the log up to the latest LSN that touched the inode. If we have
1004 +- * concurrent fsync/fdatasync() calls, we need them to all block on the
1005 +- * log force before we clear the ili_fsync_fields field. This ensures
1006 +- * that we don't get a racing sync operation that does not wait for the
1007 +- * metadata to hit the journal before returning. If we race with
1008 +- * clearing the ili_fsync_fields, then all that will happen is the log
1009 +- * force will do nothing as the lsn will already be on disk. We can't
1010 +- * race with setting ili_fsync_fields because that is done under
1011 +- * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared
1012 +- * until after the ili_fsync_fields is cleared.
1013 +- */
1014 +- xfs_ilock(ip, XFS_ILOCK_SHARED);
1015 +- if (xfs_ipincount(ip)) {
1016 +- if (!datasync ||
1017 +- (iip->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
1018 +- lsn = iip->ili_last_lsn;
1019 +- }
1020 +-
1021 +- if (lsn) {
1022 +- error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
1023 +- spin_lock(&iip->ili_lock);
1024 +- iip->ili_fsync_fields = 0;
1025 +- spin_unlock(&iip->ili_lock);
1026 +- }
1027 +- xfs_iunlock(ip, XFS_ILOCK_SHARED);
1028 ++ error = xfs_fsync_flush_log(ip, datasync, &log_flushed);
1029 +
1030 + /*
1031 + * If we only have a single device, and the log force about was
1032 +diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
1033 +index 03497741aef74..1f61e085676b3 100644
1034 +--- a/fs/xfs/xfs_inode.c
1035 ++++ b/fs/xfs/xfs_inode.c
1036 +@@ -2754,7 +2754,7 @@ xfs_iunpin(
1037 + trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
1038 +
1039 + /* Give the log a push to start the unpinning I/O */
1040 +- xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL);
1041 ++ xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL);
1042 +
1043 + }
1044 +
1045 +@@ -3716,16 +3716,16 @@ int
1046 + xfs_log_force_inode(
1047 + struct xfs_inode *ip)
1048 + {
1049 +- xfs_lsn_t lsn = 0;
1050 ++ xfs_csn_t seq = 0;
1051 +
1052 + xfs_ilock(ip, XFS_ILOCK_SHARED);
1053 + if (xfs_ipincount(ip))
1054 +- lsn = ip->i_itemp->ili_last_lsn;
1055 ++ seq = ip->i_itemp->ili_commit_seq;
1056 + xfs_iunlock(ip, XFS_ILOCK_SHARED);
1057 +
1058 +- if (!lsn)
1059 ++ if (!seq)
1060 + return 0;
1061 +- return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL);
1062 ++ return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, NULL);
1063 + }
1064 +
1065 + /*
1066 +diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
1067 +index 6ff91e5bf3cd7..3aba4559469f1 100644
1068 +--- a/fs/xfs/xfs_inode_item.c
1069 ++++ b/fs/xfs/xfs_inode_item.c
1070 +@@ -617,9 +617,9 @@ xfs_inode_item_committed(
1071 + STATIC void
1072 + xfs_inode_item_committing(
1073 + struct xfs_log_item *lip,
1074 +- xfs_lsn_t commit_lsn)
1075 ++ xfs_csn_t seq)
1076 + {
1077 +- INODE_ITEM(lip)->ili_last_lsn = commit_lsn;
1078 ++ INODE_ITEM(lip)->ili_commit_seq = seq;
1079 + return xfs_inode_item_release(lip);
1080 + }
1081 +
1082 +diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
1083 +index 4b926e32831c0..403b45ab9aa28 100644
1084 +--- a/fs/xfs/xfs_inode_item.h
1085 ++++ b/fs/xfs/xfs_inode_item.h
1086 +@@ -33,7 +33,7 @@ struct xfs_inode_log_item {
1087 + unsigned int ili_fields; /* fields to be logged */
1088 + unsigned int ili_fsync_fields; /* logged since last fsync */
1089 + xfs_lsn_t ili_flush_lsn; /* lsn at last flush */
1090 +- xfs_lsn_t ili_last_lsn; /* lsn at last transaction */
1091 ++ xfs_csn_t ili_commit_seq; /* last transaction commit */
1092 + };
1093 +
1094 + static inline int xfs_inode_clean(struct xfs_inode *ip)
1095 +diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
1096 +index cb44f7653f03b..538724f9f85ca 100644
1097 +--- a/fs/xfs/xfs_inode_item_recover.c
1098 ++++ b/fs/xfs/xfs_inode_item_recover.c
1099 +@@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts(
1100 + STATIC void
1101 + xfs_log_dinode_to_disk(
1102 + struct xfs_log_dinode *from,
1103 +- struct xfs_dinode *to)
1104 ++ struct xfs_dinode *to,
1105 ++ xfs_lsn_t lsn)
1106 + {
1107 + to->di_magic = cpu_to_be16(from->di_magic);
1108 + to->di_mode = cpu_to_be16(from->di_mode);
1109 +@@ -182,7 +183,7 @@ xfs_log_dinode_to_disk(
1110 + to->di_flags2 = cpu_to_be64(from->di_flags2);
1111 + to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
1112 + to->di_ino = cpu_to_be64(from->di_ino);
1113 +- to->di_lsn = cpu_to_be64(from->di_lsn);
1114 ++ to->di_lsn = cpu_to_be64(lsn);
1115 + memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
1116 + uuid_copy(&to->di_uuid, &from->di_uuid);
1117 + to->di_flushiter = 0;
1118 +@@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2(
1119 + }
1120 +
1121 + /*
1122 +- * If the inode has an LSN in it, recover the inode only if it's less
1123 +- * than the lsn of the transaction we are replaying. Note: we still
1124 +- * need to replay an owner change even though the inode is more recent
1125 +- * than the transaction as there is no guarantee that all the btree
1126 +- * blocks are more recent than this transaction, too.
1127 ++ * If the inode has an LSN in it, recover the inode only if the on-disk
1128 ++ * inode's LSN is older than the lsn of the transaction we are
1129 ++ * replaying. We can have multiple checkpoints with the same start LSN,
1130 ++ * so the current LSN being equal to the on-disk LSN doesn't necessarily
1131 ++ * mean that the on-disk inode is more recent than the change being
1132 ++ * replayed.
1133 ++ *
1134 ++ * We must check the current_lsn against the on-disk inode
1135 ++ * here because the we can't trust the log dinode to contain a valid LSN
1136 ++ * (see comment below before replaying the log dinode for details).
1137 ++ *
1138 ++ * Note: we still need to replay an owner change even though the inode
1139 ++ * is more recent than the transaction as there is no guarantee that all
1140 ++ * the btree blocks are more recent than this transaction, too.
1141 + */
1142 + if (dip->di_version >= 3) {
1143 + xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn);
1144 +
1145 +- if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
1146 ++ if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) {
1147 + trace_xfs_log_recover_inode_skip(log, in_f);
1148 + error = 0;
1149 + goto out_owner_change;
1150 +@@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2(
1151 + goto out_release;
1152 + }
1153 +
1154 +- /* recover the log dinode inode into the on disk inode */
1155 +- xfs_log_dinode_to_disk(ldip, dip);
1156 ++ /*
1157 ++ * Recover the log dinode inode into the on disk inode.
1158 ++ *
1159 ++ * The LSN in the log dinode is garbage - it can be zero or reflect
1160 ++ * stale in-memory runtime state that isn't coherent with the changes
1161 ++ * logged in this transaction or the changes written to the on-disk
1162 ++ * inode. Hence we write the current lSN into the inode because that
1163 ++ * matches what xfs_iflush() would write inode the inode when flushing
1164 ++ * the changes in this transaction.
1165 ++ */
1166 ++ xfs_log_dinode_to_disk(ldip, dip, current_lsn);
1167 +
1168 + fields = in_f->ilf_fields;
1169 + if (fields & XFS_ILOG_DEV)
1170 +diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
1171 +index b445e63cbc3c7..22d7d74231d42 100644
1172 +--- a/fs/xfs/xfs_log.c
1173 ++++ b/fs/xfs/xfs_log.c
1174 +@@ -765,6 +765,9 @@ xfs_log_mount_finish(
1175 + if (readonly)
1176 + mp->m_flags |= XFS_MOUNT_RDONLY;
1177 +
1178 ++ /* Make sure the log is dead if we're returning failure. */
1179 ++ ASSERT(!error || (mp->m_log->l_flags & XLOG_IO_ERROR));
1180 ++
1181 + return error;
1182 + }
1183 +
1184 +@@ -3210,14 +3213,13 @@ out_error:
1185 + }
1186 +
1187 + static int
1188 +-__xfs_log_force_lsn(
1189 +- struct xfs_mount *mp,
1190 ++xlog_force_lsn(
1191 ++ struct xlog *log,
1192 + xfs_lsn_t lsn,
1193 + uint flags,
1194 + int *log_flushed,
1195 + bool already_slept)
1196 + {
1197 +- struct xlog *log = mp->m_log;
1198 + struct xlog_in_core *iclog;
1199 +
1200 + spin_lock(&log->l_icloglock);
1201 +@@ -3250,8 +3252,6 @@ __xfs_log_force_lsn(
1202 + if (!already_slept &&
1203 + (iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC ||
1204 + iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) {
1205 +- XFS_STATS_INC(mp, xs_log_force_sleep);
1206 +-
1207 + xlog_wait(&iclog->ic_prev->ic_write_wait,
1208 + &log->l_icloglock);
1209 + return -EAGAIN;
1210 +@@ -3289,25 +3289,29 @@ out_error:
1211 + * to disk, that thread will wake up all threads waiting on the queue.
1212 + */
1213 + int
1214 +-xfs_log_force_lsn(
1215 ++xfs_log_force_seq(
1216 + struct xfs_mount *mp,
1217 +- xfs_lsn_t lsn,
1218 ++ xfs_csn_t seq,
1219 + uint flags,
1220 + int *log_flushed)
1221 + {
1222 ++ struct xlog *log = mp->m_log;
1223 ++ xfs_lsn_t lsn;
1224 + int ret;
1225 +- ASSERT(lsn != 0);
1226 ++ ASSERT(seq != 0);
1227 +
1228 + XFS_STATS_INC(mp, xs_log_force);
1229 +- trace_xfs_log_force(mp, lsn, _RET_IP_);
1230 ++ trace_xfs_log_force(mp, seq, _RET_IP_);
1231 +
1232 +- lsn = xlog_cil_force_lsn(mp->m_log, lsn);
1233 ++ lsn = xlog_cil_force_seq(log, seq);
1234 + if (lsn == NULLCOMMITLSN)
1235 + return 0;
1236 +
1237 +- ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false);
1238 +- if (ret == -EAGAIN)
1239 +- ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true);
1240 ++ ret = xlog_force_lsn(log, lsn, flags, log_flushed, false);
1241 ++ if (ret == -EAGAIN) {
1242 ++ XFS_STATS_INC(mp, xs_log_force_sleep);
1243 ++ ret = xlog_force_lsn(log, lsn, flags, log_flushed, true);
1244 ++ }
1245 + return ret;
1246 + }
1247 +
1248 +diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
1249 +index 98c913da7587e..a1089f8b7169b 100644
1250 +--- a/fs/xfs/xfs_log.h
1251 ++++ b/fs/xfs/xfs_log.h
1252 +@@ -106,7 +106,7 @@ struct xfs_item_ops;
1253 + struct xfs_trans;
1254 +
1255 + int xfs_log_force(struct xfs_mount *mp, uint flags);
1256 +-int xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags,
1257 ++int xfs_log_force_seq(struct xfs_mount *mp, xfs_csn_t seq, uint flags,
1258 + int *log_forced);
1259 + int xfs_log_mount(struct xfs_mount *mp,
1260 + struct xfs_buftarg *log_target,
1261 +@@ -132,8 +132,6 @@ bool xfs_log_writable(struct xfs_mount *mp);
1262 + struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
1263 + void xfs_log_ticket_put(struct xlog_ticket *ticket);
1264 +
1265 +-void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
1266 +- xfs_lsn_t *commit_lsn, bool regrant);
1267 + void xlog_cil_process_committed(struct list_head *list);
1268 + bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
1269 +
1270 +diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
1271 +index cd5c04dabe2e1..fbe160d5e9b96 100644
1272 +--- a/fs/xfs/xfs_log_cil.c
1273 ++++ b/fs/xfs/xfs_log_cil.c
1274 +@@ -777,7 +777,7 @@ xlog_cil_push_work(
1275 + * that higher sequences will wait for us to write out a commit record
1276 + * before they do.
1277 + *
1278 +- * xfs_log_force_lsn requires us to mirror the new sequence into the cil
1279 ++ * xfs_log_force_seq requires us to mirror the new sequence into the cil
1280 + * structure atomically with the addition of this sequence to the
1281 + * committing list. This also ensures that we can do unlocked checks
1282 + * against the current sequence in log forces without risking
1283 +@@ -1020,16 +1020,14 @@ xlog_cil_empty(
1284 + * allowed again.
1285 + */
1286 + void
1287 +-xfs_log_commit_cil(
1288 +- struct xfs_mount *mp,
1289 ++xlog_cil_commit(
1290 ++ struct xlog *log,
1291 + struct xfs_trans *tp,
1292 +- xfs_lsn_t *commit_lsn,
1293 ++ xfs_csn_t *commit_seq,
1294 + bool regrant)
1295 + {
1296 +- struct xlog *log = mp->m_log;
1297 + struct xfs_cil *cil = log->l_cilp;
1298 + struct xfs_log_item *lip, *next;
1299 +- xfs_lsn_t xc_commit_lsn;
1300 +
1301 + /*
1302 + * Do all necessary memory allocation before we lock the CIL.
1303 +@@ -1043,10 +1041,6 @@ xfs_log_commit_cil(
1304 +
1305 + xlog_cil_insert_items(log, tp);
1306 +
1307 +- xc_commit_lsn = cil->xc_ctx->sequence;
1308 +- if (commit_lsn)
1309 +- *commit_lsn = xc_commit_lsn;
1310 +-
1311 + if (regrant && !XLOG_FORCED_SHUTDOWN(log))
1312 + xfs_log_ticket_regrant(log, tp->t_ticket);
1313 + else
1314 +@@ -1069,8 +1063,10 @@ xfs_log_commit_cil(
1315 + list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
1316 + xfs_trans_del_item(lip);
1317 + if (lip->li_ops->iop_committing)
1318 +- lip->li_ops->iop_committing(lip, xc_commit_lsn);
1319 ++ lip->li_ops->iop_committing(lip, cil->xc_ctx->sequence);
1320 + }
1321 ++ if (commit_seq)
1322 ++ *commit_seq = cil->xc_ctx->sequence;
1323 +
1324 + /* xlog_cil_push_background() releases cil->xc_ctx_lock */
1325 + xlog_cil_push_background(log);
1326 +@@ -1087,9 +1083,9 @@ xfs_log_commit_cil(
1327 + * iclog flush is necessary following this call.
1328 + */
1329 + xfs_lsn_t
1330 +-xlog_cil_force_lsn(
1331 ++xlog_cil_force_seq(
1332 + struct xlog *log,
1333 +- xfs_lsn_t sequence)
1334 ++ xfs_csn_t sequence)
1335 + {
1336 + struct xfs_cil *cil = log->l_cilp;
1337 + struct xfs_cil_ctx *ctx;
1338 +@@ -1183,23 +1179,19 @@ out_shutdown:
1339 + */
1340 + bool
1341 + xfs_log_item_in_current_chkpt(
1342 +- struct xfs_log_item *lip)
1343 ++ struct xfs_log_item *lip)
1344 + {
1345 +- struct xfs_cil_ctx *ctx;
1346 ++ struct xfs_cil *cil = lip->li_mountp->m_log->l_cilp;
1347 +
1348 + if (list_empty(&lip->li_cil))
1349 + return false;
1350 +
1351 +- ctx = lip->li_mountp->m_log->l_cilp->xc_ctx;
1352 +-
1353 + /*
1354 + * li_seq is written on the first commit of a log item to record the
1355 + * first checkpoint it is written to. Hence if it is different to the
1356 + * current sequence, we're in a new checkpoint.
1357 + */
1358 +- if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0)
1359 +- return false;
1360 +- return true;
1361 ++ return lip->li_seq == READ_ONCE(cil->xc_current_sequence);
1362 + }
1363 +
1364 + /*
1365 +diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
1366 +index 1c6fdbf3d5066..42cd1602ac256 100644
1367 +--- a/fs/xfs/xfs_log_priv.h
1368 ++++ b/fs/xfs/xfs_log_priv.h
1369 +@@ -230,7 +230,7 @@ struct xfs_cil;
1370 +
1371 + struct xfs_cil_ctx {
1372 + struct xfs_cil *cil;
1373 +- xfs_lsn_t sequence; /* chkpt sequence # */
1374 ++ xfs_csn_t sequence; /* chkpt sequence # */
1375 + xfs_lsn_t start_lsn; /* first LSN of chkpt commit */
1376 + xfs_lsn_t commit_lsn; /* chkpt commit record lsn */
1377 + struct xlog_ticket *ticket; /* chkpt ticket */
1378 +@@ -268,10 +268,10 @@ struct xfs_cil {
1379 + struct xfs_cil_ctx *xc_ctx;
1380 +
1381 + spinlock_t xc_push_lock ____cacheline_aligned_in_smp;
1382 +- xfs_lsn_t xc_push_seq;
1383 ++ xfs_csn_t xc_push_seq;
1384 + struct list_head xc_committing;
1385 + wait_queue_head_t xc_commit_wait;
1386 +- xfs_lsn_t xc_current_sequence;
1387 ++ xfs_csn_t xc_current_sequence;
1388 + struct work_struct xc_push_work;
1389 + wait_queue_head_t xc_push_wait; /* background push throttle */
1390 + } ____cacheline_aligned_in_smp;
1391 +@@ -547,19 +547,18 @@ int xlog_cil_init(struct xlog *log);
1392 + void xlog_cil_init_post_recovery(struct xlog *log);
1393 + void xlog_cil_destroy(struct xlog *log);
1394 + bool xlog_cil_empty(struct xlog *log);
1395 ++void xlog_cil_commit(struct xlog *log, struct xfs_trans *tp,
1396 ++ xfs_csn_t *commit_seq, bool regrant);
1397 +
1398 + /*
1399 + * CIL force routines
1400 + */
1401 +-xfs_lsn_t
1402 +-xlog_cil_force_lsn(
1403 +- struct xlog *log,
1404 +- xfs_lsn_t sequence);
1405 ++xfs_lsn_t xlog_cil_force_seq(struct xlog *log, xfs_csn_t sequence);
1406 +
1407 + static inline void
1408 + xlog_cil_force(struct xlog *log)
1409 + {
1410 +- xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
1411 ++ xlog_cil_force_seq(log, log->l_cilp->xc_current_sequence);
1412 + }
1413 +
1414 + /*
1415 +diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
1416 +index 87886b7f77dad..69408782019eb 100644
1417 +--- a/fs/xfs/xfs_log_recover.c
1418 ++++ b/fs/xfs/xfs_log_recover.c
1419 +@@ -2457,8 +2457,10 @@ xlog_finish_defer_ops(
1420 +
1421 + error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres,
1422 + dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp);
1423 +- if (error)
1424 ++ if (error) {
1425 ++ xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
1426 + return error;
1427 ++ }
1428 +
1429 + /*
1430 + * Transfer to this new transaction all the dfops we captured
1431 +@@ -3454,6 +3456,7 @@ xlog_recover_finish(
1432 + * this) before we get around to xfs_log_mount_cancel.
1433 + */
1434 + xlog_recover_cancel_intents(log);
1435 ++ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
1436 + xfs_alert(log->l_mp, "Failed to recover intents");
1437 + return error;
1438 + }
1439 +diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
1440 +index 44b05e1d5d327..a2a5a0fd92334 100644
1441 +--- a/fs/xfs/xfs_mount.c
1442 ++++ b/fs/xfs/xfs_mount.c
1443 +@@ -968,9 +968,17 @@ xfs_mountfs(
1444 + /*
1445 + * Finish recovering the file system. This part needed to be delayed
1446 + * until after the root and real-time bitmap inodes were consistently
1447 +- * read in.
1448 ++ * read in. Temporarily create per-AG space reservations for metadata
1449 ++ * btree shape changes because space freeing transactions (for inode
1450 ++ * inactivation) require the per-AG reservation in lieu of reserving
1451 ++ * blocks.
1452 + */
1453 ++ error = xfs_fs_reserve_ag_blocks(mp);
1454 ++ if (error && error == -ENOSPC)
1455 ++ xfs_warn(mp,
1456 ++ "ENOSPC reserving per-AG metadata pool, log recovery may fail.");
1457 + error = xfs_log_mount_finish(mp);
1458 ++ xfs_fs_unreserve_ag_blocks(mp);
1459 + if (error) {
1460 + xfs_warn(mp, "log mount finish failed");
1461 + goto out_rtunmount;
1462 +diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
1463 +index 36166bae24a6f..73a1de7ceefc9 100644
1464 +--- a/fs/xfs/xfs_trans.c
1465 ++++ b/fs/xfs/xfs_trans.c
1466 +@@ -832,7 +832,7 @@ __xfs_trans_commit(
1467 + bool regrant)
1468 + {
1469 + struct xfs_mount *mp = tp->t_mountp;
1470 +- xfs_lsn_t commit_lsn = -1;
1471 ++ xfs_csn_t commit_seq = 0;
1472 + int error = 0;
1473 + int sync = tp->t_flags & XFS_TRANS_SYNC;
1474 +
1475 +@@ -874,7 +874,7 @@ __xfs_trans_commit(
1476 + xfs_trans_apply_sb_deltas(tp);
1477 + xfs_trans_apply_dquot_deltas(tp);
1478 +
1479 +- xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);
1480 ++ xlog_cil_commit(mp->m_log, tp, &commit_seq, regrant);
1481 +
1482 + xfs_trans_free(tp);
1483 +
1484 +@@ -883,7 +883,7 @@ __xfs_trans_commit(
1485 + * log out now and wait for it.
1486 + */
1487 + if (sync) {
1488 +- error = xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
1489 ++ error = xfs_log_force_seq(mp, commit_seq, XFS_LOG_SYNC, NULL);
1490 + XFS_STATS_INC(mp, xs_trans_sync);
1491 + } else {
1492 + XFS_STATS_INC(mp, xs_trans_async);
1493 +diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
1494 +index 075eeade4f7d5..97485559008bb 100644
1495 +--- a/fs/xfs/xfs_trans.h
1496 ++++ b/fs/xfs/xfs_trans.h
1497 +@@ -43,7 +43,7 @@ struct xfs_log_item {
1498 + struct list_head li_cil; /* CIL pointers */
1499 + struct xfs_log_vec *li_lv; /* active log vector */
1500 + struct xfs_log_vec *li_lv_shadow; /* standby vector */
1501 +- xfs_lsn_t li_seq; /* CIL commit seq */
1502 ++ xfs_csn_t li_seq; /* CIL commit seq */
1503 + };
1504 +
1505 + /*
1506 +@@ -69,7 +69,7 @@ struct xfs_item_ops {
1507 + void (*iop_pin)(struct xfs_log_item *);
1508 + void (*iop_unpin)(struct xfs_log_item *, int remove);
1509 + uint (*iop_push)(struct xfs_log_item *, struct list_head *);
1510 +- void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn);
1511 ++ void (*iop_committing)(struct xfs_log_item *lip, xfs_csn_t seq);
1512 + void (*iop_release)(struct xfs_log_item *);
1513 + xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
1514 + int (*iop_recover)(struct xfs_log_item *lip,
1515 +diff --git a/include/linux/bpf.h b/include/linux/bpf.h
1516 +index f21bc441e3fa8..b010d45a1ecd5 100644
1517 +--- a/include/linux/bpf.h
1518 ++++ b/include/linux/bpf.h
1519 +@@ -1457,6 +1457,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
1520 + int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
1521 + const union bpf_attr *kattr,
1522 + union bpf_attr __user *uattr);
1523 ++int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
1524 ++ const union bpf_attr *kattr,
1525 ++ union bpf_attr __user *uattr);
1526 + bool btf_ctx_access(int off, int size, enum bpf_access_type type,
1527 + const struct bpf_prog *prog,
1528 + struct bpf_insn_access_aux *info);
1529 +@@ -1671,6 +1674,13 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
1530 + return -ENOTSUPP;
1531 + }
1532 +
1533 ++static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
1534 ++ const union bpf_attr *kattr,
1535 ++ union bpf_attr __user *uattr)
1536 ++{
1537 ++ return -ENOTSUPP;
1538 ++}
1539 ++
1540 + static inline void bpf_map_put(struct bpf_map *map)
1541 + {
1542 + }
1543 +diff --git a/include/net/addrconf.h b/include/net/addrconf.h
1544 +index e7ce719838b5e..edba74a536839 100644
1545 +--- a/include/net/addrconf.h
1546 ++++ b/include/net/addrconf.h
1547 +@@ -405,6 +405,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev)
1548 + {
1549 + const struct inet6_dev *idev = __in6_dev_get(dev);
1550 +
1551 ++ if (unlikely(!idev))
1552 ++ return true;
1553 ++
1554 + return !!idev->cnf.ignore_routes_with_linkdown;
1555 + }
1556 +
1557 +diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
1558 +index 1d1232917de72..9b8000869b078 100644
1559 +--- a/include/net/bluetooth/l2cap.h
1560 ++++ b/include/net/bluetooth/l2cap.h
1561 +@@ -845,6 +845,7 @@ enum {
1562 + };
1563 +
1564 + void l2cap_chan_hold(struct l2cap_chan *c);
1565 ++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c);
1566 + void l2cap_chan_put(struct l2cap_chan *c);
1567 +
1568 + static inline void l2cap_chan_lock(struct l2cap_chan *chan)
1569 +diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
1570 +index 0b1864a82d4ad..ff901aade442f 100644
1571 +--- a/include/net/inet_connection_sock.h
1572 ++++ b/include/net/inet_connection_sock.h
1573 +@@ -317,7 +317,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
1574 +
1575 + struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
1576 +
1577 +-#define TCP_PINGPONG_THRESH 3
1578 ++#define TCP_PINGPONG_THRESH 1
1579 +
1580 + static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
1581 + {
1582 +@@ -334,14 +334,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
1583 + return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
1584 + }
1585 +
1586 +-static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
1587 +-{
1588 +- struct inet_connection_sock *icsk = inet_csk(sk);
1589 +-
1590 +- if (icsk->icsk_ack.pingpong < U8_MAX)
1591 +- icsk->icsk_ack.pingpong++;
1592 +-}
1593 +-
1594 + static inline bool inet_csk_has_ulp(struct sock *sk)
1595 + {
1596 + return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
1597 +diff --git a/include/net/tcp.h b/include/net/tcp.h
1598 +index 44bfb22069c1f..8129ce9a07719 100644
1599 +--- a/include/net/tcp.h
1600 ++++ b/include/net/tcp.h
1601 +@@ -1396,7 +1396,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space,
1602 +
1603 + static inline int tcp_win_from_space(const struct sock *sk, int space)
1604 + {
1605 +- int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
1606 ++ int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
1607 +
1608 + return tcp_adv_win_scale <= 0 ?
1609 + (space>>(-tcp_adv_win_scale)) :
1610 +diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
1611 +index 0f39fdcb2273c..2a234023821e3 100644
1612 +--- a/include/uapi/linux/bpf.h
1613 ++++ b/include/uapi/linux/bpf.h
1614 +@@ -5007,7 +5007,10 @@ struct bpf_pidns_info {
1615 +
1616 + /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
1617 + struct bpf_sk_lookup {
1618 +- __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
1619 ++ union {
1620 ++ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
1621 ++ __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
1622 ++ };
1623 +
1624 + __u32 family; /* Protocol family (AF_INET, AF_INET6) */
1625 + __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
1626 +diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
1627 +index e5d22af43fa0b..d29731a30b8e1 100644
1628 +--- a/kernel/watch_queue.c
1629 ++++ b/kernel/watch_queue.c
1630 +@@ -457,6 +457,33 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue)
1631 + rcu_assign_pointer(watch->queue, wqueue);
1632 + }
1633 +
1634 ++static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue)
1635 ++{
1636 ++ const struct cred *cred;
1637 ++ struct watch *w;
1638 ++
1639 ++ hlist_for_each_entry(w, &wlist->watchers, list_node) {
1640 ++ struct watch_queue *wq = rcu_access_pointer(w->queue);
1641 ++ if (wqueue == wq && watch->id == w->id)
1642 ++ return -EBUSY;
1643 ++ }
1644 ++
1645 ++ cred = current_cred();
1646 ++ if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) {
1647 ++ atomic_dec(&cred->user->nr_watches);
1648 ++ return -EAGAIN;
1649 ++ }
1650 ++
1651 ++ watch->cred = get_cred(cred);
1652 ++ rcu_assign_pointer(watch->watch_list, wlist);
1653 ++
1654 ++ kref_get(&wqueue->usage);
1655 ++ kref_get(&watch->usage);
1656 ++ hlist_add_head(&watch->queue_node, &wqueue->watches);
1657 ++ hlist_add_head_rcu(&watch->list_node, &wlist->watchers);
1658 ++ return 0;
1659 ++}
1660 ++
1661 + /**
1662 + * add_watch_to_object - Add a watch on an object to a watch list
1663 + * @watch: The watch to add
1664 +@@ -471,34 +498,21 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue)
1665 + */
1666 + int add_watch_to_object(struct watch *watch, struct watch_list *wlist)
1667 + {
1668 +- struct watch_queue *wqueue = rcu_access_pointer(watch->queue);
1669 +- struct watch *w;
1670 +-
1671 +- hlist_for_each_entry(w, &wlist->watchers, list_node) {
1672 +- struct watch_queue *wq = rcu_access_pointer(w->queue);
1673 +- if (wqueue == wq && watch->id == w->id)
1674 +- return -EBUSY;
1675 +- }
1676 +-
1677 +- watch->cred = get_current_cred();
1678 +- rcu_assign_pointer(watch->watch_list, wlist);
1679 ++ struct watch_queue *wqueue;
1680 ++ int ret = -ENOENT;
1681 +
1682 +- if (atomic_inc_return(&watch->cred->user->nr_watches) >
1683 +- task_rlimit(current, RLIMIT_NOFILE)) {
1684 +- atomic_dec(&watch->cred->user->nr_watches);
1685 +- put_cred(watch->cred);
1686 +- return -EAGAIN;
1687 +- }
1688 ++ rcu_read_lock();
1689 +
1690 ++ wqueue = rcu_access_pointer(watch->queue);
1691 + if (lock_wqueue(wqueue)) {
1692 +- kref_get(&wqueue->usage);
1693 +- kref_get(&watch->usage);
1694 +- hlist_add_head(&watch->queue_node, &wqueue->watches);
1695 ++ spin_lock(&wlist->lock);
1696 ++ ret = add_one_watch(watch, wlist, wqueue);
1697 ++ spin_unlock(&wlist->lock);
1698 + unlock_wqueue(wqueue);
1699 + }
1700 +
1701 +- hlist_add_head(&watch->list_node, &wlist->watchers);
1702 +- return 0;
1703 ++ rcu_read_unlock();
1704 ++ return ret;
1705 + }
1706 + EXPORT_SYMBOL(add_watch_to_object);
1707 +
1708 +diff --git a/mm/page_alloc.c b/mm/page_alloc.c
1709 +index f3418edb136be..43ff22ce76324 100644
1710 +--- a/mm/page_alloc.c
1711 ++++ b/mm/page_alloc.c
1712 +@@ -3679,11 +3679,15 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
1713 + * need to be calculated.
1714 + */
1715 + if (!order) {
1716 +- long fast_free;
1717 ++ long usable_free;
1718 ++ long reserved;
1719 +
1720 +- fast_free = free_pages;
1721 +- fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags);
1722 +- if (fast_free > mark + z->lowmem_reserve[highest_zoneidx])
1723 ++ usable_free = free_pages;
1724 ++ reserved = __zone_watermark_unusable_free(z, 0, alloc_flags);
1725 ++
1726 ++ /* reserved may over estimate high-atomic reserves. */
1727 ++ usable_free -= min(usable_free, reserved);
1728 ++ if (usable_free > mark + z->lowmem_reserve[highest_zoneidx])
1729 + return true;
1730 + }
1731 +
1732 +diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
1733 +index 2557cd917f5ed..6a5ff5dcc09a9 100644
1734 +--- a/net/bluetooth/l2cap_core.c
1735 ++++ b/net/bluetooth/l2cap_core.c
1736 +@@ -111,7 +111,8 @@ static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn,
1737 + }
1738 +
1739 + /* Find channel with given SCID.
1740 +- * Returns locked channel. */
1741 ++ * Returns a reference locked channel.
1742 ++ */
1743 + static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
1744 + u16 cid)
1745 + {
1746 +@@ -119,15 +120,19 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
1747 +
1748 + mutex_lock(&conn->chan_lock);
1749 + c = __l2cap_get_chan_by_scid(conn, cid);
1750 +- if (c)
1751 +- l2cap_chan_lock(c);
1752 ++ if (c) {
1753 ++ /* Only lock if chan reference is not 0 */
1754 ++ c = l2cap_chan_hold_unless_zero(c);
1755 ++ if (c)
1756 ++ l2cap_chan_lock(c);
1757 ++ }
1758 + mutex_unlock(&conn->chan_lock);
1759 +
1760 + return c;
1761 + }
1762 +
1763 + /* Find channel with given DCID.
1764 +- * Returns locked channel.
1765 ++ * Returns a reference locked channel.
1766 + */
1767 + static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
1768 + u16 cid)
1769 +@@ -136,8 +141,12 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
1770 +
1771 + mutex_lock(&conn->chan_lock);
1772 + c = __l2cap_get_chan_by_dcid(conn, cid);
1773 +- if (c)
1774 +- l2cap_chan_lock(c);
1775 ++ if (c) {
1776 ++ /* Only lock if chan reference is not 0 */
1777 ++ c = l2cap_chan_hold_unless_zero(c);
1778 ++ if (c)
1779 ++ l2cap_chan_lock(c);
1780 ++ }
1781 + mutex_unlock(&conn->chan_lock);
1782 +
1783 + return c;
1784 +@@ -162,8 +171,12 @@ static struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn,
1785 +
1786 + mutex_lock(&conn->chan_lock);
1787 + c = __l2cap_get_chan_by_ident(conn, ident);
1788 +- if (c)
1789 +- l2cap_chan_lock(c);
1790 ++ if (c) {
1791 ++ /* Only lock if chan reference is not 0 */
1792 ++ c = l2cap_chan_hold_unless_zero(c);
1793 ++ if (c)
1794 ++ l2cap_chan_lock(c);
1795 ++ }
1796 + mutex_unlock(&conn->chan_lock);
1797 +
1798 + return c;
1799 +@@ -497,6 +510,16 @@ void l2cap_chan_hold(struct l2cap_chan *c)
1800 + kref_get(&c->kref);
1801 + }
1802 +
1803 ++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c)
1804 ++{
1805 ++ BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
1806 ++
1807 ++ if (!kref_get_unless_zero(&c->kref))
1808 ++ return NULL;
1809 ++
1810 ++ return c;
1811 ++}
1812 ++
1813 + void l2cap_chan_put(struct l2cap_chan *c)
1814 + {
1815 + BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref));
1816 +@@ -1965,7 +1988,10 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
1817 + src_match = !bacmp(&c->src, src);
1818 + dst_match = !bacmp(&c->dst, dst);
1819 + if (src_match && dst_match) {
1820 +- l2cap_chan_hold(c);
1821 ++ c = l2cap_chan_hold_unless_zero(c);
1822 ++ if (!c)
1823 ++ continue;
1824 ++
1825 + read_unlock(&chan_list_lock);
1826 + return c;
1827 + }
1828 +@@ -1980,7 +2006,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
1829 + }
1830 +
1831 + if (c1)
1832 +- l2cap_chan_hold(c1);
1833 ++ c1 = l2cap_chan_hold_unless_zero(c1);
1834 +
1835 + read_unlock(&chan_list_lock);
1836 +
1837 +@@ -4460,6 +4486,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
1838 +
1839 + unlock:
1840 + l2cap_chan_unlock(chan);
1841 ++ l2cap_chan_put(chan);
1842 + return err;
1843 + }
1844 +
1845 +@@ -4573,6 +4600,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,
1846 +
1847 + done:
1848 + l2cap_chan_unlock(chan);
1849 ++ l2cap_chan_put(chan);
1850 + return err;
1851 + }
1852 +
1853 +@@ -5300,6 +5328,7 @@ send_move_response:
1854 + l2cap_send_move_chan_rsp(chan, result);
1855 +
1856 + l2cap_chan_unlock(chan);
1857 ++ l2cap_chan_put(chan);
1858 +
1859 + return 0;
1860 + }
1861 +@@ -5392,6 +5421,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result)
1862 + }
1863 +
1864 + l2cap_chan_unlock(chan);
1865 ++ l2cap_chan_put(chan);
1866 + }
1867 +
1868 + static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
1869 +@@ -5421,6 +5451,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
1870 + l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED);
1871 +
1872 + l2cap_chan_unlock(chan);
1873 ++ l2cap_chan_put(chan);
1874 + }
1875 +
1876 + static int l2cap_move_channel_rsp(struct l2cap_conn *conn,
1877 +@@ -5484,6 +5515,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn *conn,
1878 + l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid);
1879 +
1880 + l2cap_chan_unlock(chan);
1881 ++ l2cap_chan_put(chan);
1882 +
1883 + return 0;
1884 + }
1885 +@@ -5519,6 +5551,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn,
1886 + }
1887 +
1888 + l2cap_chan_unlock(chan);
1889 ++ l2cap_chan_put(chan);
1890 +
1891 + return 0;
1892 + }
1893 +@@ -5891,12 +5924,11 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
1894 + if (credits > max_credits) {
1895 + BT_ERR("LE credits overflow");
1896 + l2cap_send_disconn_req(chan, ECONNRESET);
1897 +- l2cap_chan_unlock(chan);
1898 +
1899 + /* Return 0 so that we don't trigger an unnecessary
1900 + * command reject packet.
1901 + */
1902 +- return 0;
1903 ++ goto unlock;
1904 + }
1905 +
1906 + chan->tx_credits += credits;
1907 +@@ -5907,7 +5939,9 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
1908 + if (chan->tx_credits)
1909 + chan->ops->resume(chan);
1910 +
1911 ++unlock:
1912 + l2cap_chan_unlock(chan);
1913 ++ l2cap_chan_put(chan);
1914 +
1915 + return 0;
1916 + }
1917 +@@ -7587,6 +7621,7 @@ drop:
1918 +
1919 + done:
1920 + l2cap_chan_unlock(chan);
1921 ++ l2cap_chan_put(chan);
1922 + }
1923 +
1924 + static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
1925 +@@ -8074,7 +8109,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,
1926 + if (src_type != c->src_type)
1927 + continue;
1928 +
1929 +- l2cap_chan_hold(c);
1930 ++ c = l2cap_chan_hold_unless_zero(c);
1931 + read_unlock(&chan_list_lock);
1932 + return c;
1933 + }
1934 +diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
1935 +index eb684f31fd698..f8b231bbbe381 100644
1936 +--- a/net/bpf/test_run.c
1937 ++++ b/net/bpf/test_run.c
1938 +@@ -10,20 +10,86 @@
1939 + #include <net/bpf_sk_storage.h>
1940 + #include <net/sock.h>
1941 + #include <net/tcp.h>
1942 ++#include <net/net_namespace.h>
1943 + #include <linux/error-injection.h>
1944 + #include <linux/smp.h>
1945 ++#include <linux/sock_diag.h>
1946 +
1947 + #define CREATE_TRACE_POINTS
1948 + #include <trace/events/bpf_test_run.h>
1949 +
1950 ++struct bpf_test_timer {
1951 ++ enum { NO_PREEMPT, NO_MIGRATE } mode;
1952 ++ u32 i;
1953 ++ u64 time_start, time_spent;
1954 ++};
1955 ++
1956 ++static void bpf_test_timer_enter(struct bpf_test_timer *t)
1957 ++ __acquires(rcu)
1958 ++{
1959 ++ rcu_read_lock();
1960 ++ if (t->mode == NO_PREEMPT)
1961 ++ preempt_disable();
1962 ++ else
1963 ++ migrate_disable();
1964 ++
1965 ++ t->time_start = ktime_get_ns();
1966 ++}
1967 ++
1968 ++static void bpf_test_timer_leave(struct bpf_test_timer *t)
1969 ++ __releases(rcu)
1970 ++{
1971 ++ t->time_start = 0;
1972 ++
1973 ++ if (t->mode == NO_PREEMPT)
1974 ++ preempt_enable();
1975 ++ else
1976 ++ migrate_enable();
1977 ++ rcu_read_unlock();
1978 ++}
1979 ++
1980 ++static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration)
1981 ++ __must_hold(rcu)
1982 ++{
1983 ++ t->i++;
1984 ++ if (t->i >= repeat) {
1985 ++ /* We're done. */
1986 ++ t->time_spent += ktime_get_ns() - t->time_start;
1987 ++ do_div(t->time_spent, t->i);
1988 ++ *duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent;
1989 ++ *err = 0;
1990 ++ goto reset;
1991 ++ }
1992 ++
1993 ++ if (signal_pending(current)) {
1994 ++ /* During iteration: we've been cancelled, abort. */
1995 ++ *err = -EINTR;
1996 ++ goto reset;
1997 ++ }
1998 ++
1999 ++ if (need_resched()) {
2000 ++ /* During iteration: we need to reschedule between runs. */
2001 ++ t->time_spent += ktime_get_ns() - t->time_start;
2002 ++ bpf_test_timer_leave(t);
2003 ++ cond_resched();
2004 ++ bpf_test_timer_enter(t);
2005 ++ }
2006 ++
2007 ++ /* Do another round. */
2008 ++ return true;
2009 ++
2010 ++reset:
2011 ++ t->i = 0;
2012 ++ return false;
2013 ++}
2014 ++
2015 + static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
2016 + u32 *retval, u32 *time, bool xdp)
2017 + {
2018 + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
2019 ++ struct bpf_test_timer t = { NO_MIGRATE };
2020 + enum bpf_cgroup_storage_type stype;
2021 +- u64 time_start, time_spent = 0;
2022 +- int ret = 0;
2023 +- u32 i;
2024 ++ int ret;
2025 +
2026 + for_each_cgroup_storage_type(stype) {
2027 + storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
2028 +@@ -38,10 +104,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
2029 + if (!repeat)
2030 + repeat = 1;
2031 +
2032 +- rcu_read_lock();
2033 +- migrate_disable();
2034 +- time_start = ktime_get_ns();
2035 +- for (i = 0; i < repeat; i++) {
2036 ++ bpf_test_timer_enter(&t);
2037 ++ do {
2038 + ret = bpf_cgroup_storage_set(storage);
2039 + if (ret)
2040 + break;
2041 +@@ -53,29 +117,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
2042 +
2043 + bpf_cgroup_storage_unset();
2044 +
2045 +- if (signal_pending(current)) {
2046 +- ret = -EINTR;
2047 +- break;
2048 +- }
2049 +-
2050 +- if (need_resched()) {
2051 +- time_spent += ktime_get_ns() - time_start;
2052 +- migrate_enable();
2053 +- rcu_read_unlock();
2054 +-
2055 +- cond_resched();
2056 +-
2057 +- rcu_read_lock();
2058 +- migrate_disable();
2059 +- time_start = ktime_get_ns();
2060 +- }
2061 +- }
2062 +- time_spent += ktime_get_ns() - time_start;
2063 +- migrate_enable();
2064 +- rcu_read_unlock();
2065 +-
2066 +- do_div(time_spent, repeat);
2067 +- *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
2068 ++ } while (bpf_test_timer_continue(&t, repeat, &ret, time));
2069 ++ bpf_test_timer_leave(&t);
2070 +
2071 + for_each_cgroup_storage_type(stype)
2072 + bpf_cgroup_storage_free(storage[stype]);
2073 +@@ -688,18 +731,17 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
2074 + const union bpf_attr *kattr,
2075 + union bpf_attr __user *uattr)
2076 + {
2077 ++ struct bpf_test_timer t = { NO_PREEMPT };
2078 + u32 size = kattr->test.data_size_in;
2079 + struct bpf_flow_dissector ctx = {};
2080 + u32 repeat = kattr->test.repeat;
2081 + struct bpf_flow_keys *user_ctx;
2082 + struct bpf_flow_keys flow_keys;
2083 +- u64 time_start, time_spent = 0;
2084 + const struct ethhdr *eth;
2085 + unsigned int flags = 0;
2086 + u32 retval, duration;
2087 + void *data;
2088 + int ret;
2089 +- u32 i;
2090 +
2091 + if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
2092 + return -EINVAL;
2093 +@@ -735,48 +777,127 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
2094 + ctx.data = data;
2095 + ctx.data_end = (__u8 *)data + size;
2096 +
2097 +- rcu_read_lock();
2098 +- preempt_disable();
2099 +- time_start = ktime_get_ns();
2100 +- for (i = 0; i < repeat; i++) {
2101 ++ bpf_test_timer_enter(&t);
2102 ++ do {
2103 + retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
2104 + size, flags);
2105 ++ } while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
2106 ++ bpf_test_timer_leave(&t);
2107 +
2108 +- if (signal_pending(current)) {
2109 +- preempt_enable();
2110 +- rcu_read_unlock();
2111 ++ if (ret < 0)
2112 ++ goto out;
2113 +
2114 +- ret = -EINTR;
2115 +- goto out;
2116 +- }
2117 ++ ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
2118 ++ retval, duration);
2119 ++ if (!ret)
2120 ++ ret = bpf_ctx_finish(kattr, uattr, user_ctx,
2121 ++ sizeof(struct bpf_flow_keys));
2122 +
2123 +- if (need_resched()) {
2124 +- time_spent += ktime_get_ns() - time_start;
2125 +- preempt_enable();
2126 +- rcu_read_unlock();
2127 ++out:
2128 ++ kfree(user_ctx);
2129 ++ kfree(data);
2130 ++ return ret;
2131 ++}
2132 +
2133 +- cond_resched();
2134 ++int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr,
2135 ++ union bpf_attr __user *uattr)
2136 ++{
2137 ++ struct bpf_test_timer t = { NO_PREEMPT };
2138 ++ struct bpf_prog_array *progs = NULL;
2139 ++ struct bpf_sk_lookup_kern ctx = {};
2140 ++ u32 repeat = kattr->test.repeat;
2141 ++ struct bpf_sk_lookup *user_ctx;
2142 ++ u32 retval, duration;
2143 ++ int ret = -EINVAL;
2144 +
2145 +- rcu_read_lock();
2146 +- preempt_disable();
2147 +- time_start = ktime_get_ns();
2148 +- }
2149 ++ if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)
2150 ++ return -EINVAL;
2151 ++
2152 ++ if (kattr->test.flags || kattr->test.cpu)
2153 ++ return -EINVAL;
2154 ++
2155 ++ if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out ||
2156 ++ kattr->test.data_size_out)
2157 ++ return -EINVAL;
2158 ++
2159 ++ if (!repeat)
2160 ++ repeat = 1;
2161 ++
2162 ++ user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx));
2163 ++ if (IS_ERR(user_ctx))
2164 ++ return PTR_ERR(user_ctx);
2165 ++
2166 ++ if (!user_ctx)
2167 ++ return -EINVAL;
2168 ++
2169 ++ if (user_ctx->sk)
2170 ++ goto out;
2171 ++
2172 ++ if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
2173 ++ goto out;
2174 ++
2175 ++ if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
2176 ++ ret = -ERANGE;
2177 ++ goto out;
2178 + }
2179 +- time_spent += ktime_get_ns() - time_start;
2180 +- preempt_enable();
2181 +- rcu_read_unlock();
2182 +
2183 +- do_div(time_spent, repeat);
2184 +- duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
2185 ++ ctx.family = (u16)user_ctx->family;
2186 ++ ctx.protocol = (u16)user_ctx->protocol;
2187 ++ ctx.dport = (u16)user_ctx->local_port;
2188 ++ ctx.sport = (__force __be16)user_ctx->remote_port;
2189 +
2190 +- ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
2191 +- retval, duration);
2192 ++ switch (ctx.family) {
2193 ++ case AF_INET:
2194 ++ ctx.v4.daddr = (__force __be32)user_ctx->local_ip4;
2195 ++ ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4;
2196 ++ break;
2197 ++
2198 ++#if IS_ENABLED(CONFIG_IPV6)
2199 ++ case AF_INET6:
2200 ++ ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6;
2201 ++ ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6;
2202 ++ break;
2203 ++#endif
2204 ++
2205 ++ default:
2206 ++ ret = -EAFNOSUPPORT;
2207 ++ goto out;
2208 ++ }
2209 ++
2210 ++ progs = bpf_prog_array_alloc(1, GFP_KERNEL);
2211 ++ if (!progs) {
2212 ++ ret = -ENOMEM;
2213 ++ goto out;
2214 ++ }
2215 ++
2216 ++ progs->items[0].prog = prog;
2217 ++
2218 ++ bpf_test_timer_enter(&t);
2219 ++ do {
2220 ++ ctx.selected_sk = NULL;
2221 ++ retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN);
2222 ++ } while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
2223 ++ bpf_test_timer_leave(&t);
2224 ++
2225 ++ if (ret < 0)
2226 ++ goto out;
2227 ++
2228 ++ user_ctx->cookie = 0;
2229 ++ if (ctx.selected_sk) {
2230 ++ if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) {
2231 ++ ret = -EOPNOTSUPP;
2232 ++ goto out;
2233 ++ }
2234 ++
2235 ++ user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
2236 ++ }
2237 ++
2238 ++ ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
2239 + if (!ret)
2240 +- ret = bpf_ctx_finish(kattr, uattr, user_ctx,
2241 +- sizeof(struct bpf_flow_keys));
2242 ++ ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
2243 +
2244 + out:
2245 ++ bpf_prog_array_free(progs);
2246 + kfree(user_ctx);
2247 +- kfree(data);
2248 + return ret;
2249 + }
2250 +diff --git a/net/core/filter.c b/net/core/filter.c
2251 +index e2b491665775f..815edf7bc4390 100644
2252 +--- a/net/core/filter.c
2253 ++++ b/net/core/filter.c
2254 +@@ -10334,6 +10334,7 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
2255 + }
2256 +
2257 + const struct bpf_prog_ops sk_lookup_prog_ops = {
2258 ++ .test_run = bpf_prog_test_run_sk_lookup,
2259 + };
2260 +
2261 + const struct bpf_verifier_ops sk_lookup_verifier_ops = {
2262 +diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
2263 +index 428cc3a4c36f1..c71b863093ace 100644
2264 +--- a/net/ipv4/igmp.c
2265 ++++ b/net/ipv4/igmp.c
2266 +@@ -827,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
2267 + struct net *net = dev_net(in_dev->dev);
2268 + if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
2269 + return;
2270 +- WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
2271 ++ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));
2272 + igmp_ifc_start_timer(in_dev, 1);
2273 + }
2274 +
2275 +@@ -1009,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
2276 + * received value was zero, use the default or statically
2277 + * configured value.
2278 + */
2279 +- in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
2280 ++ in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
2281 + in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
2282 +
2283 + /* RFC3376, 8.3. Query Response Interval:
2284 +@@ -1189,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
2285 + pmc->interface = im->interface;
2286 + in_dev_hold(in_dev);
2287 + pmc->multiaddr = im->multiaddr;
2288 +- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
2289 ++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
2290 + pmc->sfmode = im->sfmode;
2291 + if (pmc->sfmode == MCAST_INCLUDE) {
2292 + struct ip_sf_list *psf;
2293 +@@ -1240,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
2294 + swap(im->tomb, pmc->tomb);
2295 + swap(im->sources, pmc->sources);
2296 + for (psf = im->sources; psf; psf = psf->sf_next)
2297 +- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
2298 ++ psf->sf_crcount = in_dev->mr_qrv ?:
2299 ++ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
2300 + } else {
2301 +- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
2302 ++ im->crcount = in_dev->mr_qrv ?:
2303 ++ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
2304 + }
2305 + in_dev_put(pmc->interface);
2306 + kfree_pmc(pmc);
2307 +@@ -1349,7 +1351,7 @@ static void igmp_group_added(struct ip_mc_list *im)
2308 + if (in_dev->dead)
2309 + return;
2310 +
2311 +- im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
2312 ++ im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
2313 + if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
2314 + spin_lock_bh(&im->lock);
2315 + igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
2316 +@@ -1363,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im)
2317 + * IN() to IN(A).
2318 + */
2319 + if (im->sfmode == MCAST_EXCLUDE)
2320 +- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
2321 ++ im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
2322 +
2323 + igmp_ifc_event(in_dev);
2324 + #endif
2325 +@@ -1754,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev)
2326 +
2327 + in_dev->mr_qi = IGMP_QUERY_INTERVAL;
2328 + in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
2329 +- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
2330 ++ in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
2331 + }
2332 + #else
2333 + static void ip_mc_reset(struct in_device *in_dev)
2334 +@@ -1888,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
2335 + #ifdef CONFIG_IP_MULTICAST
2336 + if (psf->sf_oldin &&
2337 + !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
2338 +- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
2339 ++ psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
2340 + psf->sf_next = pmc->tomb;
2341 + pmc->tomb = psf;
2342 + rv = 1;
2343 +@@ -1952,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
2344 + /* filter mode change */
2345 + pmc->sfmode = MCAST_INCLUDE;
2346 + #ifdef CONFIG_IP_MULTICAST
2347 +- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
2348 ++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
2349 + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
2350 + for (psf = pmc->sources; psf; psf = psf->sf_next)
2351 + psf->sf_crcount = 0;
2352 +@@ -2131,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
2353 + #ifdef CONFIG_IP_MULTICAST
2354 + /* else no filters; keep old mode for reports */
2355 +
2356 +- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
2357 ++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
2358 + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
2359 + for (psf = pmc->sources; psf; psf = psf->sf_next)
2360 + psf->sf_crcount = 0;
2361 +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
2362 +index f1fd26bb199ce..78460eb39b3af 100644
2363 +--- a/net/ipv4/tcp.c
2364 ++++ b/net/ipv4/tcp.c
2365 +@@ -698,7 +698,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
2366 + int size_goal)
2367 + {
2368 + return skb->len < size_goal &&
2369 +- sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
2370 ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
2371 + !tcp_rtx_queue_empty(sk) &&
2372 + refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
2373 + }
2374 +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
2375 +index d817f8c31c9ce..d35e88b5ffcbe 100644
2376 +--- a/net/ipv4/tcp_input.c
2377 ++++ b/net/ipv4/tcp_input.c
2378 +@@ -503,7 +503,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
2379 + */
2380 + static void tcp_init_buffer_space(struct sock *sk)
2381 + {
2382 +- int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
2383 ++ int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
2384 + struct tcp_sock *tp = tcp_sk(sk);
2385 + int maxwin;
2386 +
2387 +@@ -693,7 +693,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
2388 + * <prev RTT . ><current RTT .. ><next RTT .... >
2389 + */
2390 +
2391 +- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
2392 ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
2393 + !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
2394 + int rcvmem, rcvbuf;
2395 + u64 rcvwin, grow;
2396 +@@ -2135,7 +2135,7 @@ void tcp_enter_loss(struct sock *sk)
2397 + * loss recovery is underway except recurring timeout(s) on
2398 + * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
2399 + */
2400 +- tp->frto = net->ipv4.sysctl_tcp_frto &&
2401 ++ tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
2402 + (new_recovery || icsk->icsk_retransmits) &&
2403 + !inet_csk(sk)->icsk_mtup.probe_size;
2404 + }
2405 +@@ -3004,7 +3004,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
2406 +
2407 + static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
2408 + {
2409 +- u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
2410 ++ u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
2411 + struct tcp_sock *tp = tcp_sk(sk);
2412 +
2413 + if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
2414 +@@ -3528,7 +3528,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
2415 + if (*last_oow_ack_time) {
2416 + s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
2417 +
2418 +- if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
2419 ++ if (0 <= elapsed &&
2420 ++ elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
2421 + NET_INC_STATS(net, mib_idx);
2422 + return true; /* rate-limited: don't send yet! */
2423 + }
2424 +@@ -3576,7 +3577,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
2425 + /* Then check host-wide RFC 5961 rate limit. */
2426 + now = jiffies / HZ;
2427 + if (now != challenge_timestamp) {
2428 +- u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
2429 ++ u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
2430 + u32 half = (ack_limit + 1) >> 1;
2431 +
2432 + challenge_timestamp = now;
2433 +@@ -4367,7 +4368,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
2434 + {
2435 + struct tcp_sock *tp = tcp_sk(sk);
2436 +
2437 +- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
2438 ++ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
2439 + int mib_idx;
2440 +
2441 + if (before(seq, tp->rcv_nxt))
2442 +@@ -4414,7 +4415,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
2443 + NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
2444 + tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
2445 +
2446 +- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
2447 ++ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
2448 + u32 end_seq = TCP_SKB_CB(skb)->end_seq;
2449 +
2450 + tcp_rcv_spurious_retrans(sk, skb);
2451 +@@ -5439,7 +5440,7 @@ send_now:
2452 + }
2453 +
2454 + if (!tcp_is_sack(tp) ||
2455 +- tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
2456 ++ tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
2457 + goto send_now;
2458 +
2459 + if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
2460 +@@ -5460,11 +5461,12 @@ send_now:
2461 + if (tp->srtt_us && tp->srtt_us < rtt)
2462 + rtt = tp->srtt_us;
2463 +
2464 +- delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
2465 ++ delay = min_t(unsigned long,
2466 ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
2467 + rtt * (NSEC_PER_USEC >> 3)/20);
2468 + sock_hold(sk);
2469 + hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
2470 +- sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
2471 ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
2472 + HRTIMER_MODE_REL_PINNED_SOFT);
2473 + }
2474 +
2475 +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
2476 +index d5f13ff7d9004..0d165ce2d80a7 100644
2477 +--- a/net/ipv4/tcp_ipv4.c
2478 ++++ b/net/ipv4/tcp_ipv4.c
2479 +@@ -983,7 +983,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
2480 + if (skb) {
2481 + __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
2482 +
2483 +- tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
2484 ++ tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
2485 + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
2486 + (inet_sk(sk)->tos & INET_ECN_MASK) :
2487 + inet_sk(sk)->tos;
2488 +@@ -1558,7 +1558,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
2489 + /* Set ToS of the new socket based upon the value of incoming SYN.
2490 + * ECT bits are set later in tcp_init_transfer().
2491 + */
2492 +- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
2493 ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
2494 + newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
2495 +
2496 + if (!dst) {
2497 +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
2498 +index 8d7e32f4abf67..f3ca6eea2ca39 100644
2499 +--- a/net/ipv4/tcp_metrics.c
2500 ++++ b/net/ipv4/tcp_metrics.c
2501 +@@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)
2502 + int m;
2503 +
2504 + sk_dst_confirm(sk);
2505 +- if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
2506 ++ if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)
2507 + return;
2508 +
2509 + rcu_read_lock();
2510 +@@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk)
2511 +
2512 + if (tcp_in_initial_slowstart(tp)) {
2513 + /* Slow start still did not finish. */
2514 +- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
2515 ++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
2516 + !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
2517 + val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
2518 + if (val && (tp->snd_cwnd >> 1) > val)
2519 +@@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk)
2520 + } else if (!tcp_in_slow_start(tp) &&
2521 + icsk->icsk_ca_state == TCP_CA_Open) {
2522 + /* Cong. avoidance phase, cwnd is reliable. */
2523 +- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
2524 ++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
2525 + !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
2526 + tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
2527 + max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
2528 +@@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk)
2529 + tcp_metric_set(tm, TCP_METRIC_CWND,
2530 + (val + tp->snd_ssthresh) >> 1);
2531 + }
2532 +- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
2533 ++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
2534 + !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
2535 + val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
2536 + if (val && tp->snd_ssthresh > val)
2537 +@@ -463,7 +463,7 @@ void tcp_init_metrics(struct sock *sk)
2538 + if (tcp_metric_locked(tm, TCP_METRIC_CWND))
2539 + tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
2540 +
2541 +- val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
2542 ++ val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
2543 + 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
2544 + if (val) {
2545 + tp->snd_ssthresh = val;
2546 +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
2547 +index 9b67c61576e4c..657b0a4d93599 100644
2548 +--- a/net/ipv4/tcp_output.c
2549 ++++ b/net/ipv4/tcp_output.c
2550 +@@ -167,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
2551 + if (tcp_packets_in_flight(tp) == 0)
2552 + tcp_ca_event(sk, CA_EVENT_TX_START);
2553 +
2554 +- /* If this is the first data packet sent in response to the
2555 +- * previous received data,
2556 +- * and it is a reply for ato after last received packet,
2557 +- * increase pingpong count.
2558 +- */
2559 +- if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
2560 +- (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
2561 +- inet_csk_inc_pingpong_cnt(sk);
2562 +-
2563 + tp->lsndtime = now;
2564 ++
2565 ++ /* If it is a reply for ato after last received
2566 ++ * packet, enter pingpong mode.
2567 ++ */
2568 ++ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
2569 ++ inet_csk_enter_pingpong_mode(sk);
2570 + }
2571 +
2572 + /* Account for an ACK we sent. */
2573 +@@ -1987,7 +1984,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
2574 +
2575 + min_tso = ca_ops->min_tso_segs ?
2576 + ca_ops->min_tso_segs(sk) :
2577 +- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
2578 ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
2579 +
2580 + tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
2581 + return min_t(u32, tso_segs, sk->sk_gso_max_segs);
2582 +@@ -2502,7 +2499,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
2583 + sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
2584 + if (sk->sk_pacing_status == SK_PACING_NONE)
2585 + limit = min_t(unsigned long, limit,
2586 +- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
2587 ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
2588 + limit <<= factor;
2589 +
2590 + if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
2591 +diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
2592 +index 6ac88fe24a8e0..135e3a060caa8 100644
2593 +--- a/net/ipv6/ping.c
2594 ++++ b/net/ipv6/ping.c
2595 +@@ -22,6 +22,11 @@
2596 + #include <linux/proc_fs.h>
2597 + #include <net/ping.h>
2598 +
2599 ++static void ping_v6_destroy(struct sock *sk)
2600 ++{
2601 ++ inet6_destroy_sock(sk);
2602 ++}
2603 ++
2604 + /* Compatibility glue so we can support IPv6 when it's compiled as a module */
2605 + static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
2606 + int *addr_len)
2607 +@@ -166,6 +171,7 @@ struct proto pingv6_prot = {
2608 + .owner = THIS_MODULE,
2609 + .init = ping_init_sock,
2610 + .close = ping_close,
2611 ++ .destroy = ping_v6_destroy,
2612 + .connect = ip6_datagram_connect_v6_only,
2613 + .disconnect = __udp_disconnect,
2614 + .setsockopt = ipv6_setsockopt,
2615 +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
2616 +index 303b54414a6cc..8d91f36cb11bc 100644
2617 +--- a/net/ipv6/tcp_ipv6.c
2618 ++++ b/net/ipv6/tcp_ipv6.c
2619 +@@ -542,7 +542,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
2620 + if (np->repflow && ireq->pktopts)
2621 + fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
2622 +
2623 +- tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
2624 ++ tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
2625 + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
2626 + (np->tclass & INET_ECN_MASK) :
2627 + np->tclass;
2628 +@@ -1344,7 +1344,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
2629 + /* Set ToS of the new socket based upon the value of incoming SYN.
2630 + * ECT bits are set later in tcp_init_transfer().
2631 + */
2632 +- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
2633 ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
2634 + newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
2635 +
2636 + /* Clone native IPv6 options from listening socket (if any)
2637 +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
2638 +index 8123c79e27913..d0e91aa7b30e5 100644
2639 +--- a/net/mptcp/protocol.c
2640 ++++ b/net/mptcp/protocol.c
2641 +@@ -1421,7 +1421,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
2642 + if (msk->rcvq_space.copied <= msk->rcvq_space.space)
2643 + goto new_measure;
2644 +
2645 +- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
2646 ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
2647 + !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
2648 + int rcvmem, rcvbuf;
2649 + u64 rcvwin, grow;
2650 +diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
2651 +index 1640da5c50776..72d30922ed290 100644
2652 +--- a/net/netfilter/nfnetlink_queue.c
2653 ++++ b/net/netfilter/nfnetlink_queue.c
2654 +@@ -838,11 +838,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
2655 + }
2656 +
2657 + static int
2658 +-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
2659 ++nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)
2660 + {
2661 + struct sk_buff *nskb;
2662 +
2663 + if (diff < 0) {
2664 ++ unsigned int min_len = skb_transport_offset(e->skb);
2665 ++
2666 ++ if (data_len < min_len)
2667 ++ return -EINVAL;
2668 ++
2669 + if (pskb_trim(e->skb, data_len))
2670 + return -ENOMEM;
2671 + } else if (diff > 0) {
2672 +diff --git a/net/sctp/associola.c b/net/sctp/associola.c
2673 +index fdb69d46276d6..2d4ec61877553 100644
2674 +--- a/net/sctp/associola.c
2675 ++++ b/net/sctp/associola.c
2676 +@@ -226,9 +226,8 @@ static struct sctp_association *sctp_association_init(
2677 + if (!sctp_ulpq_init(&asoc->ulpq, asoc))
2678 + goto fail_init;
2679 +
2680 +- if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
2681 +- 0, gfp))
2682 +- goto fail_init;
2683 ++ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp))
2684 ++ goto stream_free;
2685 +
2686 + /* Initialize default path MTU. */
2687 + asoc->pathmtu = sp->pathmtu;
2688 +diff --git a/net/sctp/stream.c b/net/sctp/stream.c
2689 +index 6dc95dcc0ff4f..ef9fceadef8d5 100644
2690 +--- a/net/sctp/stream.c
2691 ++++ b/net/sctp/stream.c
2692 +@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
2693 +
2694 + ret = sctp_stream_alloc_out(stream, outcnt, gfp);
2695 + if (ret)
2696 +- goto out_err;
2697 ++ return ret;
2698 +
2699 + for (i = 0; i < stream->outcnt; i++)
2700 + SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
2701 +@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
2702 + handle_in:
2703 + sctp_stream_interleave_init(stream);
2704 + if (!incnt)
2705 +- goto out;
2706 +-
2707 +- ret = sctp_stream_alloc_in(stream, incnt, gfp);
2708 +- if (ret)
2709 +- goto in_err;
2710 +-
2711 +- goto out;
2712 ++ return 0;
2713 +
2714 +-in_err:
2715 +- sched->free(stream);
2716 +- genradix_free(&stream->in);
2717 +-out_err:
2718 +- genradix_free(&stream->out);
2719 +- stream->outcnt = 0;
2720 +-out:
2721 +- return ret;
2722 ++ return sctp_stream_alloc_in(stream, incnt, gfp);
2723 + }
2724 +
2725 + int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
2726 +diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
2727 +index 99e5f69fbb742..a2e1d34f52c5b 100644
2728 +--- a/net/sctp/stream_sched.c
2729 ++++ b/net/sctp/stream_sched.c
2730 +@@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
2731 + if (!SCTP_SO(&asoc->stream, i)->ext)
2732 + continue;
2733 +
2734 +- ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
2735 ++ ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC);
2736 + if (ret)
2737 + goto err;
2738 + }
2739 +diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
2740 +index 23eab7ac43ee5..5cb6846544cc7 100644
2741 +--- a/net/tls/tls_device.c
2742 ++++ b/net/tls/tls_device.c
2743 +@@ -1349,8 +1349,13 @@ static int tls_device_down(struct net_device *netdev)
2744 + * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
2745 + * Now release the ref taken above.
2746 + */
2747 +- if (refcount_dec_and_test(&ctx->refcount))
2748 ++ if (refcount_dec_and_test(&ctx->refcount)) {
2749 ++ /* sk_destruct ran after tls_device_down took a ref, and
2750 ++ * it returned early. Complete the destruction here.
2751 ++ */
2752 ++ list_del(&ctx->list);
2753 + tls_device_free_ctx(ctx);
2754 ++ }
2755 + }
2756 +
2757 + up_write(&device_offload_lock);
2758 +diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
2759 +index e440cd7f32a6f..b9ee2ded381ab 100644
2760 +--- a/tools/include/uapi/linux/bpf.h
2761 ++++ b/tools/include/uapi/linux/bpf.h
2762 +@@ -5006,7 +5006,10 @@ struct bpf_pidns_info {
2763 +
2764 + /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
2765 + struct bpf_sk_lookup {
2766 +- __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
2767 ++ union {
2768 ++ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
2769 ++ __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
2770 ++ };
2771 +
2772 + __u32 family; /* Protocol family (AF_INET, AF_INET6) */
2773 + __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
2774 +diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
2775 +index 94809aed8b447..1cab29d45bfb3 100644
2776 +--- a/tools/perf/util/symbol-elf.c
2777 ++++ b/tools/perf/util/symbol-elf.c
2778 +@@ -232,6 +232,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
2779 + return NULL;
2780 + }
2781 +
2782 ++static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)
2783 ++{
2784 ++ size_t i, phdrnum;
2785 ++ u64 sz;
2786 ++
2787 ++ if (elf_getphdrnum(elf, &phdrnum))
2788 ++ return -1;
2789 ++
2790 ++ for (i = 0; i < phdrnum; i++) {
2791 ++ if (gelf_getphdr(elf, i, phdr) == NULL)
2792 ++ return -1;
2793 ++
2794 ++ if (phdr->p_type != PT_LOAD)
2795 ++ continue;
2796 ++
2797 ++ sz = max(phdr->p_memsz, phdr->p_filesz);
2798 ++ if (!sz)
2799 ++ continue;
2800 ++
2801 ++ if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz))
2802 ++ return 0;
2803 ++ }
2804 ++
2805 ++ /* Not found any valid program header */
2806 ++ return -1;
2807 ++}
2808 ++
2809 + static bool want_demangle(bool is_kernel_sym)
2810 + {
2811 + return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
2812 +@@ -1181,6 +1208,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
2813 + sym.st_value);
2814 + used_opd = true;
2815 + }
2816 ++
2817 + /*
2818 + * When loading symbols in a data mapping, ABS symbols (which
2819 + * has a value of SHN_ABS in its st_shndx) failed at
2820 +@@ -1217,11 +1245,20 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
2821 + goto out_elf_end;
2822 + } else if ((used_opd && runtime_ss->adjust_symbols) ||
2823 + (!used_opd && syms_ss->adjust_symbols)) {
2824 ++ GElf_Phdr phdr;
2825 ++
2826 ++ if (elf_read_program_header(syms_ss->elf,
2827 ++ (u64)sym.st_value, &phdr)) {
2828 ++ pr_warning("%s: failed to find program header for "
2829 ++ "symbol: %s st_value: %#" PRIx64 "\n",
2830 ++ __func__, elf_name, (u64)sym.st_value);
2831 ++ continue;
2832 ++ }
2833 + pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
2834 +- "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
2835 +- (u64)sym.st_value, (u64)shdr.sh_addr,
2836 +- (u64)shdr.sh_offset);
2837 +- sym.st_value -= shdr.sh_addr - shdr.sh_offset;
2838 ++ "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n",
2839 ++ __func__, (u64)sym.st_value, (u64)phdr.p_vaddr,
2840 ++ (u64)phdr.p_offset);
2841 ++ sym.st_value -= phdr.p_vaddr - phdr.p_offset;
2842 + }
2843 +
2844 + demangled = demangle_sym(dso, kmodule, elf_name);
2845 +diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
2846 +index a4c55fcb0e7b1..0fb92d9a319b7 100644
2847 +--- a/tools/testing/selftests/bpf/test_verifier.c
2848 ++++ b/tools/testing/selftests/bpf/test_verifier.c
2849 +@@ -100,7 +100,7 @@ struct bpf_test {
2850 + enum bpf_prog_type prog_type;
2851 + uint8_t flags;
2852 + void (*fill_helper)(struct bpf_test *self);
2853 +- uint8_t runs;
2854 ++ int runs;
2855 + #define bpf_testdata_struct_t \
2856 + struct { \
2857 + uint32_t retval, retval_unpriv; \
2858 +@@ -1054,7 +1054,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
2859 +
2860 + run_errs = 0;
2861 + run_successes = 0;
2862 +- if (!alignment_prevented_execution && fd_prog >= 0) {
2863 ++ if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) {
2864 + uint32_t expected_val;
2865 + int i;
2866 +
2867 +diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
2868 +index 2ad5f974451c3..fd3b62a084b9f 100644
2869 +--- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
2870 ++++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
2871 +@@ -239,6 +239,7 @@
2872 + .result = ACCEPT,
2873 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
2874 + .expected_attach_type = BPF_SK_LOOKUP,
2875 ++ .runs = -1,
2876 + },
2877 + /* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */
2878 + {