[gentoo-commits] proj/linux-patches:5.10 commit in: / - gentoo-commits

From:	Alice Ferrazzi <alicef@g.o>
To:	gentoo-commits@l.g.o
Subject:	[gentoo-commits] proj/linux-patches:5.10 commit in: /
Date:	Wed, 03 Aug 2022 14:25:01
Message-Id:	`1659535964.206a5e2746ef7fe6e5960e2af948e1eedef7e208.alicef@gentoo`

1

commit:     206a5e2746ef7fe6e5960e2af948e1eedef7e208

2

Author:     Alice Ferrazzi <alicef <AT> gentoo <DOT> org>

3

AuthorDate: Wed Aug  3 14:12:37 2022 +0000

4

Commit:     Alice Ferrazzi <alicef <AT> gentoo <DOT> org>

5

CommitDate: Wed Aug  3 14:12:44 2022 +0000

6

URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=206a5e27

7

8

Linux patch 5.10.135

9

10

Signed-off-by: Alice Ferrazzi <alicef <AT> gentoo.org>

11

12

 0000_README               |    4 +

13

 1134_linux-5.10.135.patch | 2841 +++++++++++++++++++++++++++++++++++++++++++++

14

 2 files changed, 2845 insertions(+)

15

16

diff --git a/0000_README b/0000_README

17

index 7292c57d..19bd6321 100644

18

--- a/0000_README

19

+++ b/0000_README

20

@@ -579,6 +579,10 @@ Patch:  1133_linux-5.10.134.patch

21

 From:   http://www.kernel.org

22

 Desc:   Linux 5.10.134

23

24

+Patch:  1134_linux-5.10.135.patch

25

+From:   http://www.kernel.org

26

+Desc:   Linux 5.10.135

27

+

28

 Patch:  1500_XATTR_USER_PREFIX.patch

29

 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644

30

 Desc:   Support for namespace user.pax.* on tmpfs.

31

32

diff --git a/1134_linux-5.10.135.patch b/1134_linux-5.10.135.patch

33

new file mode 100644

34

index 00000000..435afe17

35

--- /dev/null

36

+++ b/1134_linux-5.10.135.patch

37

@@ -0,0 +1,2841 @@

38

+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt

39

+index 1a58c580b2366..8b7c26d090459 100644

40

+--- a/Documentation/admin-guide/kernel-parameters.txt

41

++++ b/Documentation/admin-guide/kernel-parameters.txt

42

+@@ -2873,6 +2873,7 @@

43

+ 					       no_entry_flush [PPC]

44

+ 					       no_uaccess_flush [PPC]

45

+ 					       mmio_stale_data=off [X86]

46

++					       retbleed=off [X86]

47

+

48

+ 				Exceptions:

49

+ 					       This does not have any effect on

50

+@@ -2895,6 +2896,7 @@

51

+ 					       mds=full,nosmt [X86]

52

+ 					       tsx_async_abort=full,nosmt [X86]

53

+ 					       mmio_stale_data=full,nosmt [X86]

54

++					       retbleed=auto,nosmt [X86]

55

+

56

+ 	mminit_loglevel=

57

+ 			[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this

58

+diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst

59

+index 0b1f3235aa773..0158dff638873 100644

60

+--- a/Documentation/networking/ip-sysctl.rst

61

++++ b/Documentation/networking/ip-sysctl.rst

62

+@@ -2629,7 +2629,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max

63

+ 	Default: 4K

64

+

65

+ sctp_wmem  - vector of 3 INTEGERs: min, default, max

66

+-	Currently this tunable has no effect.

67

++	Only the first value ("min") is used, "default" and "max" are

68

++	ignored.

69

++

70

++	min: Minimum size of send buffer that can be used by SCTP sockets.

71

++	It is guaranteed to each SCTP socket (but not association) even

72

++	under moderate memory pressure.

73

++

74

++	Default: 4K

75

+

76

+ addr_scope_policy - INTEGER

77

+ 	Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00

78

+diff --git a/Makefile b/Makefile

79

+index 00dddc2ac804a..5f4dbcb433075 100644

80

+--- a/Makefile

81

++++ b/Makefile

82

+@@ -1,7 +1,7 @@

83

+ # SPDX-License-Identifier: GPL-2.0

84

+ VERSION = 5

85

+ PATCHLEVEL = 10

86

+-SUBLEVEL = 134

87

++SUBLEVEL = 135

88

+ EXTRAVERSION =

89

+ NAME = Dare mighty things

90

+

91

+diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h

92

+index a81dda65c5762..45180a2cc47cb 100644

93

+--- a/arch/arm/include/asm/dma.h

94

++++ b/arch/arm/include/asm/dma.h

95

+@@ -10,7 +10,7 @@

96

+ #else

97

+ #define MAX_DMA_ADDRESS	({ \

98

+ 	extern phys_addr_t arm_dma_zone_size; \

99

+-	arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \

100

++	arm_dma_zone_size && arm_dma_zone_size < (0x100000000ULL - PAGE_OFFSET) ? \

101

+ 		(PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; })

102

+ #endif

103

+

104

+diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c

105

+index b99dd8e1c93f1..7ba6cf8261626 100644

106

+--- a/arch/arm/lib/xor-neon.c

107

++++ b/arch/arm/lib/xor-neon.c

108

+@@ -26,8 +26,9 @@ MODULE_LICENSE("GPL");

109

+  * While older versions of GCC do not generate incorrect code, they fail to

110

+  * recognize the parallel nature of these functions, and emit plain ARM code,

111

+  * which is known to be slower than the optimized ARM code in asm-arm/xor.h.

112

++ *

113

++ * #warning This code requires at least version 4.6 of GCC

114

+  */

115

+-#warning This code requires at least version 4.6 of GCC

116

+ #endif

117

+

118

+ #pragma GCC diagnostic ignored "-Wunused-variable"

119

+diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h

120

+index 2c6e1c6ecbe78..4120c428dc378 100644

121

+--- a/arch/s390/include/asm/archrandom.h

122

++++ b/arch/s390/include/asm/archrandom.h

123

+@@ -2,7 +2,7 @@

124

+ /*

125

+  * Kernel interface for the s390 arch_random_* functions

126

+  *

127

+- * Copyright IBM Corp. 2017, 2020

128

++ * Copyright IBM Corp. 2017, 2022

129

+  *

130

+  * Author: Harald Freudenberger <freude@××××××.com>

131

+  *

132

+@@ -14,6 +14,7 @@

133

+ #ifdef CONFIG_ARCH_RANDOM

134

+

135

+ #include <linux/static_key.h>

136

++#include <linux/preempt.h>

137

+ #include <linux/atomic.h>

138

+ #include <asm/cpacf.h>

139

+

140

+@@ -32,7 +33,8 @@ static inline bool __must_check arch_get_random_int(unsigned int *v)

141

+

142

+ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)

143

+ {

144

+-	if (static_branch_likely(&s390_arch_random_available)) {

145

++	if (static_branch_likely(&s390_arch_random_available) &&

146

++	    in_task()) {

147

+ 		cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));

148

+ 		atomic64_add(sizeof(*v), &s390_arch_random_counter);

149

+ 		return true;

150

+@@ -42,7 +44,8 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)

151

+

152

+ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)

153

+ {

154

+-	if (static_branch_likely(&s390_arch_random_available)) {

155

++	if (static_branch_likely(&s390_arch_random_available) &&

156

++	    in_task()) {

157

+ 		cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));

158

+ 		atomic64_add(sizeof(*v), &s390_arch_random_counter);

159

+ 		return true;

160

+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c

161

+index 7896b67dda420..2e5762faf7740 100644

162

+--- a/arch/x86/kernel/cpu/bugs.c

163

++++ b/arch/x86/kernel/cpu/bugs.c

164

+@@ -1476,6 +1476,7 @@ static void __init spectre_v2_select_mitigation(void)

165

+ 	 * enable IBRS around firmware calls.

166

+ 	 */

167

+ 	if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&

168

++	    boot_cpu_has(X86_FEATURE_IBPB) &&

169

+ 	    (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||

170

+ 	     boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) {

171

+

172

+diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c

173

+index a918ca93e4f7d..df5897c90becc 100644

174

+--- a/drivers/edac/ghes_edac.c

175

++++ b/drivers/edac/ghes_edac.c

176

+@@ -101,9 +101,14 @@ static void dimm_setup_label(struct dimm_info *dimm, u16 handle)

177

+

178

+ 	dmi_memdev_name(handle, &bank, &device);

179

+

180

+-	/* both strings must be non-zero */

181

+-	if (bank && *bank && device && *device)

182

+-		snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device);

183

++	/*

184

++	 * Set to a NULL string when both bank and device are zero. In this case,

185

++	 * the label assigned by default will be preserved.

186

++	 */

187

++	snprintf(dimm->label, sizeof(dimm->label), "%s%s%s",

188

++		 (bank && *bank) ? bank : "",

189

++		 (bank && *bank && device && *device) ? " " : "",

190

++		 (device && *device) ? device : "");

191

+ }

192

+

193

+ static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry)

194

+diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c

195

+index 92987daa5e17d..5e72e6cb2f840 100644

196

+--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c

197

++++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c

198

+@@ -679,7 +679,11 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,

199

+ 		goto out_free_dma;

200

+

201

+ 	for (i = 0; i < npages; i += max) {

202

+-		args.end = start + (max << PAGE_SHIFT);

203

++		if (args.start + (max << PAGE_SHIFT) > end)

204

++			args.end = end;

205

++		else

206

++			args.end = args.start + (max << PAGE_SHIFT);

207

++

208

+ 		ret = migrate_vma_setup(&args);

209

+ 		if (ret)

210

+ 			goto out_free_pfns;

211

+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c

212

+index 11d4e3ba9af4c..1dad62ecb8a3a 100644

213

+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c

214

++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c

215

+@@ -1907,11 +1907,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,

216

+ 		 * non-zero req_queue_pairs says that user requested a new

217

+ 		 * queue count via ethtool's set_channels, so use this

218

+ 		 * value for queues distribution across traffic classes

219

++		 * We need at least one queue pair for the interface

220

++		 * to be usable as we see in else statement.

221

+ 		 */

222

+ 		if (vsi->req_queue_pairs > 0)

223

+ 			vsi->num_queue_pairs = vsi->req_queue_pairs;

224

+ 		else if (pf->flags & I40E_FLAG_MSIX_ENABLED)

225

+ 			vsi->num_queue_pairs = pf->num_lan_msix;

226

++		else

227

++			vsi->num_queue_pairs = 1;

228

+ 	}

229

+

230

+ 	/* Number of queues per enabled TC */

231

+diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c

232

+index 060897eb9cabe..7f1bf71844bce 100644

233

+--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c

234

++++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c

235

+@@ -652,7 +652,8 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)

236

+ 		rx_desc = ICE_RX_DESC(rx_ring, i);

237

+

238

+ 		if (!(rx_desc->wb.status_error0 &

239

+-		    cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)))

240

++		    (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) |

241

++		     cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)))))

242

+ 			continue;

243

+

244

+ 		rx_buf = &rx_ring->rx_buf[i];

245

+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c

246

+index aae79fdd51727..810f2bdb91645 100644

247

+--- a/drivers/net/ethernet/intel/ice/ice_main.c

248

++++ b/drivers/net/ethernet/intel/ice/ice_main.c

249

+@@ -5203,10 +5203,12 @@ int ice_vsi_cfg(struct ice_vsi *vsi)

250

+ 	if (vsi->netdev) {

251

+ 		ice_set_rx_mode(vsi->netdev);

252

+

253

+-		err = ice_vsi_vlan_setup(vsi);

254

++		if (vsi->type != ICE_VSI_LB) {

255

++			err = ice_vsi_vlan_setup(vsi);

256

+

257

+-		if (err)

258

+-			return err;

259

++			if (err)

260

++				return err;

261

++		}

262

+ 	}

263

+ 	ice_vsi_cfg_dcb_rings(vsi);

264

+

265

+diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c

266

+index 725b0f38813a9..a2b4e3befa591 100644

267

+--- a/drivers/net/ethernet/sfc/ptp.c

268

++++ b/drivers/net/ethernet/sfc/ptp.c

269

+@@ -1100,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb)

270

+

271

+ 	tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type);

272

+ 	if (tx_queue && tx_queue->timestamping) {

273

++		/* This code invokes normal driver TX code which is always

274

++		 * protected from softirqs when called from generic TX code,

275

++		 * which in turn disables preemption. Look at __dev_queue_xmit

276

++		 * which uses rcu_read_lock_bh disabling preemption for RCU

277

++		 * plus disabling softirqs. We do not need RCU reader

278

++		 * protection here.

279

++		 *

280

++		 * Although it is theoretically safe for current PTP TX/RX code

281

++		 * running without disabling softirqs, there are three good

282

++		 * reasond for doing so:

283

++		 *

284

++		 *      1) The code invoked is mainly implemented for non-PTP

285

++		 *         packets and it is always executed with softirqs

286

++		 *         disabled.

287

++		 *      2) This being a single PTP packet, better to not

288

++		 *         interrupt its processing by softirqs which can lead

289

++		 *         to high latencies.

290

++		 *      3) netdev_xmit_more checks preemption is disabled and

291

++		 *         triggers a BUG_ON if not.

292

++		 */

293

++		local_bh_disable();

294

+ 		efx_enqueue_skb(tx_queue, skb);

295

++		local_bh_enable();

296

+ 	} else {

297

+ 		WARN_ONCE(1, "PTP channel has no timestamped tx queue\n");

298

+ 		dev_kfree_skb_any(skb);

299

+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c

300

+index 789a124809e3c..70c5905a916b9 100644

301

+--- a/drivers/net/macsec.c

302

++++ b/drivers/net/macsec.c

303

+@@ -240,6 +240,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb)

304

+ #define DEFAULT_SEND_SCI true

305

+ #define DEFAULT_ENCRYPT false

306

+ #define DEFAULT_ENCODING_SA 0

307

++#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1))

308

+

309

+ static bool send_sci(const struct macsec_secy *secy)

310

+ {

311

+@@ -1694,7 +1695,7 @@ static bool validate_add_rxsa(struct nlattr **attrs)

312

+ 		return false;

313

+

314

+ 	if (attrs[MACSEC_SA_ATTR_PN] &&

315

+-	    *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0)

316

++	    nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)

317

+ 		return false;

318

+

319

+ 	if (attrs[MACSEC_SA_ATTR_ACTIVE]) {

320

+@@ -1750,7 +1751,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)

321

+ 	}

322

+

323

+ 	pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN;

324

+-	if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {

325

++	if (tb_sa[MACSEC_SA_ATTR_PN] &&

326

++	    nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {

327

+ 		pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n",

328

+ 			  nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len);

329

+ 		rtnl_unlock();

330

+@@ -1766,7 +1768,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)

331

+ 		if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {

332

+ 			pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n",

333

+ 				  nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),

334

+-				  MACSEC_SA_ATTR_SALT);

335

++				  MACSEC_SALT_LEN);

336

+ 			rtnl_unlock();

337

+ 			return -EINVAL;

338

+ 		}

339

+@@ -1839,7 +1841,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)

340

+ 	return 0;

341

+

342

+ cleanup:

343

+-	kfree(rx_sa);

344

++	macsec_rxsa_put(rx_sa);

345

+ 	rtnl_unlock();

346

+ 	return err;

347

+ }

348

+@@ -1936,7 +1938,7 @@ static bool validate_add_txsa(struct nlattr **attrs)

349

+ 	if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)

350

+ 		return false;

351

+

352

+-	if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)

353

++	if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)

354

+ 		return false;

355

+

356

+ 	if (attrs[MACSEC_SA_ATTR_ACTIVE]) {

357

+@@ -2008,7 +2010,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)

358

+ 		if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {

359

+ 			pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n",

360

+ 				  nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),

361

+-				  MACSEC_SA_ATTR_SALT);

362

++				  MACSEC_SALT_LEN);

363

+ 			rtnl_unlock();

364

+ 			return -EINVAL;

365

+ 		}

366

+@@ -2082,7 +2084,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)

367

+

368

+ cleanup:

369

+ 	secy->operational = was_operational;

370

+-	kfree(tx_sa);

371

++	macsec_txsa_put(tx_sa);

372

+ 	rtnl_unlock();

373

+ 	return err;

374

+ }

375

+@@ -2290,7 +2292,7 @@ static bool validate_upd_sa(struct nlattr **attrs)

376

+ 	if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)

377

+ 		return false;

378

+

379

+-	if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)

380

++	if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)

381

+ 		return false;

382

+

383

+ 	if (attrs[MACSEC_SA_ATTR_ACTIVE]) {

384

+@@ -3737,9 +3739,6 @@ static int macsec_changelink_common(struct net_device *dev,

385

+ 		secy->operational = tx_sa && tx_sa->active;

386

+ 	}

387

+

388

+-	if (data[IFLA_MACSEC_WINDOW])

389

+-		secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);

390

+-

391

+ 	if (data[IFLA_MACSEC_ENCRYPT])

392

+ 		tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]);

393

+

394

+@@ -3785,6 +3784,16 @@ static int macsec_changelink_common(struct net_device *dev,

395

+ 		}

396

+ 	}

397

+

398

++	if (data[IFLA_MACSEC_WINDOW]) {

399

++		secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);

400

++

401

++		/* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window

402

++		 * for XPN cipher suites */

403

++		if (secy->xpn &&

404

++		    secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW)

405

++			return -EINVAL;

406

++	}

407

++

408

+ 	return 0;

409

+ }

410

+

411

+@@ -3814,7 +3823,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[],

412

+

413

+ 	ret = macsec_changelink_common(dev, data);

414

+ 	if (ret)

415

+-		return ret;

416

++		goto cleanup;

417

+

418

+ 	/* If h/w offloading is available, propagate to the device */

419

+ 	if (macsec_is_offloaded(macsec)) {

420

+diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c

421

+index 291fa449993fb..45f295403cb55 100644

422

+--- a/drivers/net/sungem_phy.c

423

++++ b/drivers/net/sungem_phy.c

424

+@@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy)

425

+ 		int can_low_power = 1;

426

+ 		if (np == NULL || of_get_property(np, "no-autolowpower", NULL))

427

+ 			can_low_power = 0;

428

++		of_node_put(np);

429

+ 		if (can_low_power) {

430

+ 			/* Enable automatic low-power */

431

+ 			sungem_phy_write(phy, 0x1c, 0x9002);

432

+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c

433

+index 37178b078ee37..0a07c05a610d1 100644

434

+--- a/drivers/net/virtio_net.c

435

++++ b/drivers/net/virtio_net.c

436

+@@ -213,9 +213,15 @@ struct virtnet_info {

437

+ 	/* Packet virtio header size */

438

+ 	u8 hdr_len;

439

+

440

+-	/* Work struct for refilling if we run low on memory. */

441

++	/* Work struct for delayed refilling if we run low on memory. */

442

+ 	struct delayed_work refill;

443

+

444

++	/* Is delayed refill enabled? */

445

++	bool refill_enabled;

446

++

447

++	/* The lock to synchronize the access to refill_enabled */

448

++	spinlock_t refill_lock;

449

++

450

+ 	/* Work struct for config space updates */

451

+ 	struct work_struct config_work;

452

+

453

+@@ -319,6 +325,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)

454

+ 	return p;

455

+ }

456

+

457

++static void enable_delayed_refill(struct virtnet_info *vi)

458

++{

459

++	spin_lock_bh(&vi->refill_lock);

460

++	vi->refill_enabled = true;

461

++	spin_unlock_bh(&vi->refill_lock);

462

++}

463

++

464

++static void disable_delayed_refill(struct virtnet_info *vi)

465

++{

466

++	spin_lock_bh(&vi->refill_lock);

467

++	vi->refill_enabled = false;

468

++	spin_unlock_bh(&vi->refill_lock);

469

++}

470

++

471

+ static void virtqueue_napi_schedule(struct napi_struct *napi,

472

+ 				    struct virtqueue *vq)

473

+ {

474

+@@ -1403,8 +1423,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget,

475

+ 	}

476

+

477

+ 	if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {

478

+-		if (!try_fill_recv(vi, rq, GFP_ATOMIC))

479

+-			schedule_delayed_work(&vi->refill, 0);

480

++		if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {

481

++			spin_lock(&vi->refill_lock);

482

++			if (vi->refill_enabled)

483

++				schedule_delayed_work(&vi->refill, 0);

484

++			spin_unlock(&vi->refill_lock);

485

++		}

486

+ 	}

487

+

488

+ 	u64_stats_update_begin(&rq->stats.syncp);

489

+@@ -1523,6 +1547,8 @@ static int virtnet_open(struct net_device *dev)

490

+ 	struct virtnet_info *vi = netdev_priv(dev);

491

+ 	int i, err;

492

+

493

++	enable_delayed_refill(vi);

494

++

495

+ 	for (i = 0; i < vi->max_queue_pairs; i++) {

496

+ 		if (i < vi->curr_queue_pairs)

497

+ 			/* Make sure we have some buffers: if oom use wq. */

498

+@@ -1893,6 +1919,8 @@ static int virtnet_close(struct net_device *dev)

499

+ 	struct virtnet_info *vi = netdev_priv(dev);

500

+ 	int i;

501

+

502

++	/* Make sure NAPI doesn't schedule refill work */

503

++	disable_delayed_refill(vi);

504

+ 	/* Make sure refill_work doesn't re-enable napi! */

505

+ 	cancel_delayed_work_sync(&vi->refill);

506

+

507

+@@ -2390,6 +2418,8 @@ static int virtnet_restore_up(struct virtio_device *vdev)

508

+

509

+ 	virtio_device_ready(vdev);

510

+

511

++	enable_delayed_refill(vi);

512

++

513

+ 	if (netif_running(vi->dev)) {

514

+ 		err = virtnet_open(vi->dev);

515

+ 		if (err)

516

+@@ -3092,6 +3122,7 @@ static int virtnet_probe(struct virtio_device *vdev)

517

+ 	vdev->priv = vi;

518

+

519

+ 	INIT_WORK(&vi->config_work, virtnet_config_changed_work);

520

++	spin_lock_init(&vi->refill_lock);

521

+

522

+ 	/* If we can receive ANY GSO packets, we must allocate large ones. */

523

+ 	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||

524

+diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.c b/drivers/net/wireless/mediatek/mt7601u/usb.c

525

+index 6bcc4a13ae6c7..cc772045d526f 100644

526

+--- a/drivers/net/wireless/mediatek/mt7601u/usb.c

527

++++ b/drivers/net/wireless/mediatek/mt7601u/usb.c

528

+@@ -26,6 +26,7 @@ static const struct usb_device_id mt7601u_device_table[] = {

529

+ 	{ USB_DEVICE(0x2717, 0x4106) },

530

+ 	{ USB_DEVICE(0x2955, 0x0001) },

531

+ 	{ USB_DEVICE(0x2955, 0x1001) },

532

++	{ USB_DEVICE(0x2955, 0x1003) },

533

+ 	{ USB_DEVICE(0x2a5f, 0x1000) },

534

+ 	{ USB_DEVICE(0x7392, 0x7710) },

535

+ 	{ 0, }

536

+diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c

537

+index 0f2430fb398db..576cc39077f32 100644

538

+--- a/drivers/scsi/ufs/ufshcd-pltfrm.c

539

++++ b/drivers/scsi/ufs/ufshcd-pltfrm.c

540

+@@ -107,9 +107,20 @@ out:

541

+ 	return ret;

542

+ }

543

+

544

++static bool phandle_exists(const struct device_node *np,

545

++			   const char *phandle_name, int index)

546

++{

547

++	struct device_node *parse_np = of_parse_phandle(np, phandle_name, index);

548

++

549

++	if (parse_np)

550

++		of_node_put(parse_np);

551

++

552

++	return parse_np != NULL;

553

++}

554

++

555

+ #define MAX_PROP_SIZE 32

556

+ static int ufshcd_populate_vreg(struct device *dev, const char *name,

557

+-		struct ufs_vreg **out_vreg)

558

++				struct ufs_vreg **out_vreg)

559

+ {

560

+ 	int ret = 0;

561

+ 	char prop_name[MAX_PROP_SIZE];

562

+@@ -122,7 +133,7 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name,

563

+ 	}

564

+

565

+ 	snprintf(prop_name, MAX_PROP_SIZE, "%s-supply", name);

566

+-	if (!of_parse_phandle(np, prop_name, 0)) {

567

++	if (!phandle_exists(np, prop_name, 0)) {

568

+ 		dev_info(dev, "%s: Unable to find %s regulator, assuming enabled\n",

569

+ 				__func__, prop_name);

570

+ 		goto out;

571

+diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c

572

+index d563abc3e1364..914e991731300 100644

573

+--- a/fs/ntfs/attrib.c

574

++++ b/fs/ntfs/attrib.c

575

+@@ -592,8 +592,12 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name,

576

+ 		a = (ATTR_RECORD*)((u8*)ctx->attr +

577

+ 				le32_to_cpu(ctx->attr->length));

578

+ 	for (;;	a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) {

579

+-		if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec +

580

+-				le32_to_cpu(ctx->mrec->bytes_allocated))

581

++		u8 *mrec_end = (u8 *)ctx->mrec +

582

++		               le32_to_cpu(ctx->mrec->bytes_allocated);

583

++		u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) +

584

++			       a->name_length * sizeof(ntfschar);

585

++		if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end ||

586

++		    name_end > mrec_end)

587

+ 			break;

588

+ 		ctx->attr = a;

589

+ 		if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) ||

590

+diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h

591

+index 7993d527edae9..0a8cd8e59a92c 100644

592

+--- a/fs/ocfs2/ocfs2.h

593

++++ b/fs/ocfs2/ocfs2.h

594

+@@ -279,7 +279,6 @@ enum ocfs2_mount_options

595

+ 	OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15,  /* Journal Async Commit */

596

+ 	OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */

597

+ 	OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */

598

+-	OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */

599

+ };

600

+

601

+ #define OCFS2_OSB_SOFT_RO	0x0001

602

+@@ -675,8 +674,7 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb)

603

+

604

+ static inline int ocfs2_mount_local(struct ocfs2_super *osb)

605

+ {

606

+-	return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)

607

+-		|| (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER));

608

++	return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);

609

+ }

610

+

611

+ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)

612

+diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c

613

+index 4da0e4b1e79bf..8caeceeaeda7c 100644

614

+--- a/fs/ocfs2/slot_map.c

615

++++ b/fs/ocfs2/slot_map.c

616

+@@ -254,16 +254,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,

617

+ 	int i, ret = -ENOSPC;

618

+

619

+ 	if ((preferred >= 0) && (preferred < si->si_num_slots)) {

620

+-		if (!si->si_slots[preferred].sl_valid ||

621

+-		    !si->si_slots[preferred].sl_node_num) {

622

++		if (!si->si_slots[preferred].sl_valid) {

623

+ 			ret = preferred;

624

+ 			goto out;

625

+ 		}

626

+ 	}

627

+

628

+ 	for(i = 0; i < si->si_num_slots; i++) {

629

+-		if (!si->si_slots[i].sl_valid ||

630

+-		    !si->si_slots[i].sl_node_num) {

631

++		if (!si->si_slots[i].sl_valid) {

632

+ 			ret = i;

633

+ 			break;

634

+ 		}

635

+@@ -458,30 +456,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb)

636

+ 	spin_lock(&osb->osb_lock);

637

+ 	ocfs2_update_slot_info(si);

638

+

639

+-	if (ocfs2_mount_local(osb))

640

+-		/* use slot 0 directly in local mode */

641

+-		slot = 0;

642

+-	else {

643

+-		/* search for ourselves first and take the slot if it already

644

+-		 * exists. Perhaps we need to mark this in a variable for our

645

+-		 * own journal recovery? Possibly not, though we certainly

646

+-		 * need to warn to the user */

647

+-		slot = __ocfs2_node_num_to_slot(si, osb->node_num);

648

++	/* search for ourselves first and take the slot if it already

649

++	 * exists. Perhaps we need to mark this in a variable for our

650

++	 * own journal recovery? Possibly not, though we certainly

651

++	 * need to warn to the user */

652

++	slot = __ocfs2_node_num_to_slot(si, osb->node_num);

653

++	if (slot < 0) {

654

++		/* if no slot yet, then just take 1st available

655

++		 * one. */

656

++		slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);

657

+ 		if (slot < 0) {

658

+-			/* if no slot yet, then just take 1st available

659

+-			 * one. */

660

+-			slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);

661

+-			if (slot < 0) {

662

+-				spin_unlock(&osb->osb_lock);

663

+-				mlog(ML_ERROR, "no free slots available!\n");

664

+-				status = -EINVAL;

665

+-				goto bail;

666

+-			}

667

+-		} else

668

+-			printk(KERN_INFO "ocfs2: Slot %d on device (%s) was "

669

+-			       "already allocated to this node!\n",

670

+-			       slot, osb->dev_str);

671

+-	}

672

++			spin_unlock(&osb->osb_lock);

673

++			mlog(ML_ERROR, "no free slots available!\n");

674

++			status = -EINVAL;

675

++			goto bail;

676

++		}

677

++	} else

678

++		printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "

679

++		       "allocated to this node!\n", slot, osb->dev_str);

680

+

681

+ 	ocfs2_set_slot(si, slot, osb->node_num);

682

+ 	osb->slot_num = slot;

683

+diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c

684

+index 477ad05a34ea2..c0e5f1bad499f 100644

685

+--- a/fs/ocfs2/super.c

686

++++ b/fs/ocfs2/super.c

687

+@@ -175,7 +175,6 @@ enum {

688

+ 	Opt_dir_resv_level,

689

+ 	Opt_journal_async_commit,

690

+ 	Opt_err_cont,

691

+-	Opt_nocluster,

692

+ 	Opt_err,

693

+ };

694

+

695

+@@ -209,7 +208,6 @@ static const match_table_t tokens = {

696

+ 	{Opt_dir_resv_level, "dir_resv_level=%u"},

697

+ 	{Opt_journal_async_commit, "journal_async_commit"},

698

+ 	{Opt_err_cont, "errors=continue"},

699

+-	{Opt_nocluster, "nocluster"},

700

+ 	{Opt_err, NULL}

701

+ };

702

+

703

+@@ -621,13 +619,6 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)

704

+ 		goto out;

705

+ 	}

706

+

707

+-	tmp = OCFS2_MOUNT_NOCLUSTER;

708

+-	if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {

709

+-		ret = -EINVAL;

710

+-		mlog(ML_ERROR, "Cannot change nocluster option on remount\n");

711

+-		goto out;

712

+-	}

713

+-

714

+ 	tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |

715

+ 		OCFS2_MOUNT_HB_NONE;

716

+ 	if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {

717

+@@ -868,7 +859,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,

718

+ 	}

719

+

720

+ 	if (ocfs2_userspace_stack(osb) &&

721

+-	    !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&

722

+ 	    strncmp(osb->osb_cluster_stack, mopt->cluster_stack,

723

+ 		    OCFS2_STACK_LABEL_LEN)) {

724

+ 		mlog(ML_ERROR,

725

+@@ -1149,11 +1139,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)

726

+ 	       osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :

727

+ 	       "ordered");

728

+

729

+-	if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&

730

+-	   !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT))

731

+-		printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted "

732

+-		       "without cluster aware mode.\n", osb->dev_str);

733

+-

734

+ 	atomic_set(&osb->vol_state, VOLUME_MOUNTED);

735

+ 	wake_up(&osb->osb_mount_event);

736

+

737

+@@ -1460,9 +1445,6 @@ static int ocfs2_parse_options(struct super_block *sb,

738

+ 		case Opt_journal_async_commit:

739

+ 			mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;

740

+ 			break;

741

+-		case Opt_nocluster:

742

+-			mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER;

743

+-			break;

744

+ 		default:

745

+ 			mlog(ML_ERROR,

746

+ 			     "Unrecognized mount option \"%s\" "

747

+@@ -1574,9 +1556,6 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)

748

+ 	if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)

749

+ 		seq_printf(s, ",journal_async_commit");

750

+

751

+-	if (opts & OCFS2_MOUNT_NOCLUSTER)

752

+-		seq_printf(s, ",nocluster");

753

+-

754

+ 	return 0;

755

+ }

756

+

757

+diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h

758

+index 8bd00da6d2a40..2f46ef3800aa2 100644

759

+--- a/fs/xfs/libxfs/xfs_log_format.h

760

++++ b/fs/xfs/libxfs/xfs_log_format.h

761

+@@ -414,7 +414,16 @@ struct xfs_log_dinode {

762

+ 	/* start of the extended dinode, writable fields */

763

+ 	uint32_t	di_crc;		/* CRC of the inode */

764

+ 	uint64_t	di_changecount;	/* number of attribute changes */

765

+-	xfs_lsn_t	di_lsn;		/* flush sequence */

766

++

767

++	/*

768

++	 * The LSN we write to this field during formatting is not a reflection

769

++	 * of the current on-disk LSN. It should never be used for recovery

770

++	 * sequencing, nor should it be recovered into the on-disk inode at all.

771

++	 * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk()

772

++	 * for details.

773

++	 */

774

++	xfs_lsn_t	di_lsn;

775

++

776

+ 	uint64_t	di_flags2;	/* more random flags */

777

+ 	uint32_t	di_cowextsize;	/* basic cow extent size for file */

778

+ 	uint8_t		di_pad2[12];	/* more padding for future expansion */

779

+diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h

780

+index 397d94775440d..1ce06173c2f55 100644

781

+--- a/fs/xfs/libxfs/xfs_types.h

782

++++ b/fs/xfs/libxfs/xfs_types.h

783

+@@ -21,6 +21,7 @@ typedef int32_t		xfs_suminfo_t;	/* type of bitmap summary info */

784

+ typedef uint32_t	xfs_rtword_t;	/* word type for bitmap manipulations */

785

+

786

+ typedef int64_t		xfs_lsn_t;	/* log sequence number */

787

++typedef int64_t		xfs_csn_t;	/* CIL sequence number */

788

+

789

+ typedef uint32_t	xfs_dablk_t;	/* dir/attr block number (in file) */

790

+ typedef uint32_t	xfs_dahash_t;	/* dir/attr hash value */

791

+diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c

792

+index 8c6e26d62ef28..a3d5ecccfc2cc 100644

793

+--- a/fs/xfs/xfs_buf_item.c

794

++++ b/fs/xfs/xfs_buf_item.c

795

+@@ -393,17 +393,8 @@ xfs_buf_item_pin(

796

+ }

797

+

798

+ /*

799

+- * This is called to unpin the buffer associated with the buf log

800

+- * item which was previously pinned with a call to xfs_buf_item_pin().

801

+- *

802

+- * Also drop the reference to the buf item for the current transaction.

803

+- * If the XFS_BLI_STALE flag is set and we are the last reference,

804

+- * then free up the buf log item and unlock the buffer.

805

+- *

806

+- * If the remove flag is set we are called from uncommit in the

807

+- * forced-shutdown path.  If that is true and the reference count on

808

+- * the log item is going to drop to zero we need to free the item's

809

+- * descriptor in the transaction.

810

++ * This is called to unpin the buffer associated with the buf log item which

811

++ * was previously pinned with a call to xfs_buf_item_pin().

812

+  */

813

+ STATIC void

814

+ xfs_buf_item_unpin(

815

+@@ -420,38 +411,35 @@ xfs_buf_item_unpin(

816

+

817

+ 	trace_xfs_buf_item_unpin(bip);

818

+

819

++	/*

820

++	 * Drop the bli ref associated with the pin and grab the hold required

821

++	 * for the I/O simulation failure in the abort case. We have to do this

822

++	 * before the pin count drops because the AIL doesn't acquire a bli

823

++	 * reference. Therefore if the refcount drops to zero, the bli could

824

++	 * still be AIL resident and the buffer submitted for I/O (and freed on

825

++	 * completion) at any point before we return. This can be removed once

826

++	 * the AIL properly holds a reference on the bli.

827

++	 */

828

+ 	freed = atomic_dec_and_test(&bip->bli_refcount);

829

+-

830

++	if (freed && !stale && remove)

831

++		xfs_buf_hold(bp);

832

+ 	if (atomic_dec_and_test(&bp->b_pin_count))

833

+ 		wake_up_all(&bp->b_waiters);

834

+

835

+-	if (freed && stale) {

836

++	 /* nothing to do but drop the pin count if the bli is active */

837

++	if (!freed)

838

++		return;

839

++

840

++	if (stale) {

841

+ 		ASSERT(bip->bli_flags & XFS_BLI_STALE);

842

+ 		ASSERT(xfs_buf_islocked(bp));

843

+ 		ASSERT(bp->b_flags & XBF_STALE);

844

+ 		ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);

845

++		ASSERT(list_empty(&lip->li_trans));

846

++		ASSERT(!bp->b_transp);

847

+

848

+ 		trace_xfs_buf_item_unpin_stale(bip);

849

+

850

+-		if (remove) {

851

+-			/*

852

+-			 * If we are in a transaction context, we have to

853

+-			 * remove the log item from the transaction as we are

854

+-			 * about to release our reference to the buffer.  If we

855

+-			 * don't, the unlock that occurs later in

856

+-			 * xfs_trans_uncommit() will try to reference the

857

+-			 * buffer which we no longer have a hold on.

858

+-			 */

859

+-			if (!list_empty(&lip->li_trans))

860

+-				xfs_trans_del_item(lip);

861

+-

862

+-			/*

863

+-			 * Since the transaction no longer refers to the buffer,

864

+-			 * the buffer should no longer refer to the transaction.

865

+-			 */

866

+-			bp->b_transp = NULL;

867

+-		}

868

+-

869

+ 		/*

870

+ 		 * If we get called here because of an IO error, we may or may

871

+ 		 * not have the item on the AIL. xfs_trans_ail_delete() will

872

+@@ -468,13 +456,13 @@ xfs_buf_item_unpin(

873

+ 			ASSERT(bp->b_log_item == NULL);

874

+ 		}

875

+ 		xfs_buf_relse(bp);

876

+-	} else if (freed && remove) {

877

++	} else if (remove) {

878

+ 		/*

879

+ 		 * The buffer must be locked and held by the caller to simulate

880

+-		 * an async I/O failure.

881

++		 * an async I/O failure. We acquired the hold for this case

882

++		 * before the buffer was unpinned.

883

+ 		 */

884

+ 		xfs_buf_lock(bp);

885

+-		xfs_buf_hold(bp);

886

+ 		bp->b_flags |= XBF_ASYNC;

887

+ 		xfs_buf_ioend_fail(bp);

888

+ 	}

889

+@@ -632,7 +620,7 @@ xfs_buf_item_release(

890

+ STATIC void

891

+ xfs_buf_item_committing(

892

+ 	struct xfs_log_item	*lip,

893

+-	xfs_lsn_t		commit_lsn)

894

++	xfs_csn_t		seq)

895

+ {

896

+ 	return xfs_buf_item_release(lip);

897

+ }

898

+diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c

899

+index 1d649462d731a..b374c9cee1177 100644

900

+--- a/fs/xfs/xfs_buf_item_recover.c

901

++++ b/fs/xfs/xfs_buf_item_recover.c

902

+@@ -796,6 +796,7 @@ xlog_recover_get_buf_lsn(

903

+ 	switch (magicda) {

904

+ 	case XFS_DIR3_LEAF1_MAGIC:

905

+ 	case XFS_DIR3_LEAFN_MAGIC:

906

++	case XFS_ATTR3_LEAF_MAGIC:

907

+ 	case XFS_DA3_NODE_MAGIC:

908

+ 		lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);

909

+ 		uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;

910

+diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c

911

+index 8c1fdf37ee8f0..8ed47b739b6cc 100644

912

+--- a/fs/xfs/xfs_dquot_item.c

913

++++ b/fs/xfs/xfs_dquot_item.c

914

+@@ -188,7 +188,7 @@ xfs_qm_dquot_logitem_release(

915

+ STATIC void

916

+ xfs_qm_dquot_logitem_committing(

917

+ 	struct xfs_log_item	*lip,

918

+-	xfs_lsn_t		commit_lsn)

919

++	xfs_csn_t		seq)

920

+ {

921

+ 	return xfs_qm_dquot_logitem_release(lip);

922

+ }

923

+diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

924

+index 5b0f93f738372..4d6bf8d4974fe 100644

925

+--- a/fs/xfs/xfs_file.c

926

++++ b/fs/xfs/xfs_file.c

927

+@@ -118,6 +118,54 @@ xfs_dir_fsync(

928

+ 	return xfs_log_force_inode(ip);

929

+ }

930

+

931

++static xfs_csn_t

932

++xfs_fsync_seq(

933

++	struct xfs_inode	*ip,

934

++	bool			datasync)

935

++{

936

++	if (!xfs_ipincount(ip))

937

++		return 0;

938

++	if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))

939

++		return 0;

940

++	return ip->i_itemp->ili_commit_seq;

941

++}

942

++

943

++/*

944

++ * All metadata updates are logged, which means that we just have to flush the

945

++ * log up to the latest LSN that touched the inode.

946

++ *

947

++ * If we have concurrent fsync/fdatasync() calls, we need them to all block on

948

++ * the log force before we clear the ili_fsync_fields field. This ensures that

949

++ * we don't get a racing sync operation that does not wait for the metadata to

950

++ * hit the journal before returning.  If we race with clearing ili_fsync_fields,

951

++ * then all that will happen is the log force will do nothing as the lsn will

952

++ * already be on disk.  We can't race with setting ili_fsync_fields because that

953

++ * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock

954

++ * shared until after the ili_fsync_fields is cleared.

955

++ */

956

++static  int

957

++xfs_fsync_flush_log(

958

++	struct xfs_inode	*ip,

959

++	bool			datasync,

960

++	int			*log_flushed)

961

++{

962

++	int			error = 0;

963

++	xfs_csn_t		seq;

964

++

965

++	xfs_ilock(ip, XFS_ILOCK_SHARED);

966

++	seq = xfs_fsync_seq(ip, datasync);

967

++	if (seq) {

968

++		error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC,

969

++					  log_flushed);

970

++

971

++		spin_lock(&ip->i_itemp->ili_lock);

972

++		ip->i_itemp->ili_fsync_fields = 0;

973

++		spin_unlock(&ip->i_itemp->ili_lock);

974

++	}

975

++	xfs_iunlock(ip, XFS_ILOCK_SHARED);

976

++	return error;

977

++}

978

++

979

+ STATIC int

980

+ xfs_file_fsync(

981

+ 	struct file		*file,

982

+@@ -125,13 +173,10 @@ xfs_file_fsync(

983

+ 	loff_t			end,

984

+ 	int			datasync)

985

+ {

986

+-	struct inode		*inode = file->f_mapping->host;

987

+-	struct xfs_inode	*ip = XFS_I(inode);

988

+-	struct xfs_inode_log_item *iip = ip->i_itemp;

989

++	struct xfs_inode	*ip = XFS_I(file->f_mapping->host);

990

+ 	struct xfs_mount	*mp = ip->i_mount;

991

+ 	int			error = 0;

992

+ 	int			log_flushed = 0;

993

+-	xfs_lsn_t		lsn = 0;

994

+

995

+ 	trace_xfs_file_fsync(ip);

996

+

997

+@@ -155,33 +200,7 @@ xfs_file_fsync(

998

+ 	else if (mp->m_logdev_targp != mp->m_ddev_targp)

999

+ 		xfs_blkdev_issue_flush(mp->m_ddev_targp);

1000

+

1001

+-	/*

1002

+-	 * All metadata updates are logged, which means that we just have to

1003

+-	 * flush the log up to the latest LSN that touched the inode. If we have

1004

+-	 * concurrent fsync/fdatasync() calls, we need them to all block on the

1005

+-	 * log force before we clear the ili_fsync_fields field. This ensures

1006

+-	 * that we don't get a racing sync operation that does not wait for the

1007

+-	 * metadata to hit the journal before returning. If we race with

1008

+-	 * clearing the ili_fsync_fields, then all that will happen is the log

1009

+-	 * force will do nothing as the lsn will already be on disk. We can't

1010

+-	 * race with setting ili_fsync_fields because that is done under

1011

+-	 * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared

1012

+-	 * until after the ili_fsync_fields is cleared.

1013

+-	 */

1014

+-	xfs_ilock(ip, XFS_ILOCK_SHARED);

1015

+-	if (xfs_ipincount(ip)) {

1016

+-		if (!datasync ||

1017

+-		    (iip->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))

1018

+-			lsn = iip->ili_last_lsn;

1019

+-	}

1020

+-

1021

+-	if (lsn) {

1022

+-		error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);

1023

+-		spin_lock(&iip->ili_lock);

1024

+-		iip->ili_fsync_fields = 0;

1025

+-		spin_unlock(&iip->ili_lock);

1026

+-	}

1027

+-	xfs_iunlock(ip, XFS_ILOCK_SHARED);

1028

++	error = xfs_fsync_flush_log(ip, datasync, &log_flushed);

1029

+

1030

+ 	/*

1031

+ 	 * If we only have a single device, and the log force about was

1032

+diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

1033

+index 03497741aef74..1f61e085676b3 100644

1034

+--- a/fs/xfs/xfs_inode.c

1035

++++ b/fs/xfs/xfs_inode.c

1036

+@@ -2754,7 +2754,7 @@ xfs_iunpin(

1037

+ 	trace_xfs_inode_unpin_nowait(ip, _RET_IP_);

1038

+

1039

+ 	/* Give the log a push to start the unpinning I/O */

1040

+-	xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL);

1041

++	xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL);

1042

+

1043

+ }

1044

+

1045

+@@ -3716,16 +3716,16 @@ int

1046

+ xfs_log_force_inode(

1047

+ 	struct xfs_inode	*ip)

1048

+ {

1049

+-	xfs_lsn_t		lsn = 0;

1050

++	xfs_csn_t		seq = 0;

1051

+

1052

+ 	xfs_ilock(ip, XFS_ILOCK_SHARED);

1053

+ 	if (xfs_ipincount(ip))

1054

+-		lsn = ip->i_itemp->ili_last_lsn;

1055

++		seq = ip->i_itemp->ili_commit_seq;

1056

+ 	xfs_iunlock(ip, XFS_ILOCK_SHARED);

1057

+

1058

+-	if (!lsn)

1059

++	if (!seq)

1060

+ 		return 0;

1061

+-	return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL);

1062

++	return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, NULL);

1063

+ }

1064

+

1065

+ /*

1066

+diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c

1067

+index 6ff91e5bf3cd7..3aba4559469f1 100644

1068

+--- a/fs/xfs/xfs_inode_item.c

1069

++++ b/fs/xfs/xfs_inode_item.c

1070

+@@ -617,9 +617,9 @@ xfs_inode_item_committed(

1071

+ STATIC void

1072

+ xfs_inode_item_committing(

1073

+ 	struct xfs_log_item	*lip,

1074

+-	xfs_lsn_t		commit_lsn)

1075

++	xfs_csn_t		seq)

1076

+ {

1077

+-	INODE_ITEM(lip)->ili_last_lsn = commit_lsn;

1078

++	INODE_ITEM(lip)->ili_commit_seq = seq;

1079

+ 	return xfs_inode_item_release(lip);

1080

+ }

1081

+

1082

+diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h

1083

+index 4b926e32831c0..403b45ab9aa28 100644

1084

+--- a/fs/xfs/xfs_inode_item.h

1085

++++ b/fs/xfs/xfs_inode_item.h

1086

+@@ -33,7 +33,7 @@ struct xfs_inode_log_item {

1087

+ 	unsigned int		ili_fields;	   /* fields to be logged */

1088

+ 	unsigned int		ili_fsync_fields;  /* logged since last fsync */

1089

+ 	xfs_lsn_t		ili_flush_lsn;	   /* lsn at last flush */

1090

+-	xfs_lsn_t		ili_last_lsn;	   /* lsn at last transaction */

1091

++	xfs_csn_t		ili_commit_seq;	   /* last transaction commit */

1092

+ };

1093

+

1094

+ static inline int xfs_inode_clean(struct xfs_inode *ip)

1095

+diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c

1096

+index cb44f7653f03b..538724f9f85ca 100644

1097

+--- a/fs/xfs/xfs_inode_item_recover.c

1098

++++ b/fs/xfs/xfs_inode_item_recover.c

1099

+@@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts(

1100

+ STATIC void

1101

+ xfs_log_dinode_to_disk(

1102

+ 	struct xfs_log_dinode	*from,

1103

+-	struct xfs_dinode	*to)

1104

++	struct xfs_dinode	*to,

1105

++	xfs_lsn_t		lsn)

1106

+ {

1107

+ 	to->di_magic = cpu_to_be16(from->di_magic);

1108

+ 	to->di_mode = cpu_to_be16(from->di_mode);

1109

+@@ -182,7 +183,7 @@ xfs_log_dinode_to_disk(

1110

+ 		to->di_flags2 = cpu_to_be64(from->di_flags2);

1111

+ 		to->di_cowextsize = cpu_to_be32(from->di_cowextsize);

1112

+ 		to->di_ino = cpu_to_be64(from->di_ino);

1113

+-		to->di_lsn = cpu_to_be64(from->di_lsn);

1114

++		to->di_lsn = cpu_to_be64(lsn);

1115

+ 		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));

1116

+ 		uuid_copy(&to->di_uuid, &from->di_uuid);

1117

+ 		to->di_flushiter = 0;

1118

+@@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2(

1119

+ 	}

1120

+

1121

+ 	/*

1122

+-	 * If the inode has an LSN in it, recover the inode only if it's less

1123

+-	 * than the lsn of the transaction we are replaying. Note: we still

1124

+-	 * need to replay an owner change even though the inode is more recent

1125

+-	 * than the transaction as there is no guarantee that all the btree

1126

+-	 * blocks are more recent than this transaction, too.

1127

++	 * If the inode has an LSN in it, recover the inode only if the on-disk

1128

++	 * inode's LSN is older than the lsn of the transaction we are

1129

++	 * replaying. We can have multiple checkpoints with the same start LSN,

1130

++	 * so the current LSN being equal to the on-disk LSN doesn't necessarily

1131

++	 * mean that the on-disk inode is more recent than the change being

1132

++	 * replayed.

1133

++	 *

1134

++	 * We must check the current_lsn against the on-disk inode

1135

++	 * here because the we can't trust the log dinode to contain a valid LSN

1136

++	 * (see comment below before replaying the log dinode for details).

1137

++	 *

1138

++	 * Note: we still need to replay an owner change even though the inode

1139

++	 * is more recent than the transaction as there is no guarantee that all

1140

++	 * the btree blocks are more recent than this transaction, too.

1141

+ 	 */

1142

+ 	if (dip->di_version >= 3) {

1143

+ 		xfs_lsn_t	lsn = be64_to_cpu(dip->di_lsn);

1144

+

1145

+-		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {

1146

++		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) {

1147

+ 			trace_xfs_log_recover_inode_skip(log, in_f);

1148

+ 			error = 0;

1149

+ 			goto out_owner_change;

1150

+@@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2(

1151

+ 		goto out_release;

1152

+ 	}

1153

+

1154

+-	/* recover the log dinode inode into the on disk inode */

1155

+-	xfs_log_dinode_to_disk(ldip, dip);

1156

++	/*

1157

++	 * Recover the log dinode inode into the on disk inode.

1158

++	 *

1159

++	 * The LSN in the log dinode is garbage - it can be zero or reflect

1160

++	 * stale in-memory runtime state that isn't coherent with the changes

1161

++	 * logged in this transaction or the changes written to the on-disk

1162

++	 * inode.  Hence we write the current lSN into the inode because that

1163

++	 * matches what xfs_iflush() would write inode the inode when flushing

1164

++	 * the changes in this transaction.

1165

++	 */

1166

++	xfs_log_dinode_to_disk(ldip, dip, current_lsn);

1167

+

1168

+ 	fields = in_f->ilf_fields;

1169

+ 	if (fields & XFS_ILOG_DEV)

1170

+diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c

1171

+index b445e63cbc3c7..22d7d74231d42 100644

1172

+--- a/fs/xfs/xfs_log.c

1173

++++ b/fs/xfs/xfs_log.c

1174

+@@ -765,6 +765,9 @@ xfs_log_mount_finish(

1175

+ 	if (readonly)

1176

+ 		mp->m_flags |= XFS_MOUNT_RDONLY;

1177

+

1178

++	/* Make sure the log is dead if we're returning failure. */

1179

++	ASSERT(!error || (mp->m_log->l_flags & XLOG_IO_ERROR));

1180

++

1181

+ 	return error;

1182

+ }

1183

+

1184

+@@ -3210,14 +3213,13 @@ out_error:

1185

+ }

1186

+

1187

+ static int

1188

+-__xfs_log_force_lsn(

1189

+-	struct xfs_mount	*mp,

1190

++xlog_force_lsn(

1191

++	struct xlog		*log,

1192

+ 	xfs_lsn_t		lsn,

1193

+ 	uint			flags,

1194

+ 	int			*log_flushed,

1195

+ 	bool			already_slept)

1196

+ {

1197

+-	struct xlog		*log = mp->m_log;

1198

+ 	struct xlog_in_core	*iclog;

1199

+

1200

+ 	spin_lock(&log->l_icloglock);

1201

+@@ -3250,8 +3252,6 @@ __xfs_log_force_lsn(

1202

+ 		if (!already_slept &&

1203

+ 		    (iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC ||

1204

+ 		     iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) {

1205

+-			XFS_STATS_INC(mp, xs_log_force_sleep);

1206

+-

1207

+ 			xlog_wait(&iclog->ic_prev->ic_write_wait,

1208

+ 					&log->l_icloglock);

1209

+ 			return -EAGAIN;

1210

+@@ -3289,25 +3289,29 @@ out_error:

1211

+  * to disk, that thread will wake up all threads waiting on the queue.

1212

+  */

1213

+ int

1214

+-xfs_log_force_lsn(

1215

++xfs_log_force_seq(

1216

+ 	struct xfs_mount	*mp,

1217

+-	xfs_lsn_t		lsn,

1218

++	xfs_csn_t		seq,

1219

+ 	uint			flags,

1220

+ 	int			*log_flushed)

1221

+ {

1222

++	struct xlog		*log = mp->m_log;

1223

++	xfs_lsn_t		lsn;

1224

+ 	int			ret;

1225

+-	ASSERT(lsn != 0);

1226

++	ASSERT(seq != 0);

1227

+

1228

+ 	XFS_STATS_INC(mp, xs_log_force);

1229

+-	trace_xfs_log_force(mp, lsn, _RET_IP_);

1230

++	trace_xfs_log_force(mp, seq, _RET_IP_);

1231

+

1232

+-	lsn = xlog_cil_force_lsn(mp->m_log, lsn);

1233

++	lsn = xlog_cil_force_seq(log, seq);

1234

+ 	if (lsn == NULLCOMMITLSN)

1235

+ 		return 0;

1236

+

1237

+-	ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false);

1238

+-	if (ret == -EAGAIN)

1239

+-		ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true);

1240

++	ret = xlog_force_lsn(log, lsn, flags, log_flushed, false);

1241

++	if (ret == -EAGAIN) {

1242

++		XFS_STATS_INC(mp, xs_log_force_sleep);

1243

++		ret = xlog_force_lsn(log, lsn, flags, log_flushed, true);

1244

++	}

1245

+ 	return ret;

1246

+ }

1247

+

1248

+diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h

1249

+index 98c913da7587e..a1089f8b7169b 100644

1250

+--- a/fs/xfs/xfs_log.h

1251

++++ b/fs/xfs/xfs_log.h

1252

+@@ -106,7 +106,7 @@ struct xfs_item_ops;

1253

+ struct xfs_trans;

1254

+

1255

+ int	  xfs_log_force(struct xfs_mount *mp, uint flags);

1256

+-int	  xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags,

1257

++int	  xfs_log_force_seq(struct xfs_mount *mp, xfs_csn_t seq, uint flags,

1258

+ 		int *log_forced);

1259

+ int	  xfs_log_mount(struct xfs_mount	*mp,

1260

+ 			struct xfs_buftarg	*log_target,

1261

+@@ -132,8 +132,6 @@ bool	xfs_log_writable(struct xfs_mount *mp);

1262

+ struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);

1263

+ void	  xfs_log_ticket_put(struct xlog_ticket *ticket);

1264

+

1265

+-void	xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,

1266

+-				xfs_lsn_t *commit_lsn, bool regrant);

1267

+ void	xlog_cil_process_committed(struct list_head *list);

1268

+ bool	xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);

1269

+

1270

+diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c

1271

+index cd5c04dabe2e1..fbe160d5e9b96 100644

1272

+--- a/fs/xfs/xfs_log_cil.c

1273

++++ b/fs/xfs/xfs_log_cil.c

1274

+@@ -777,7 +777,7 @@ xlog_cil_push_work(

1275

+ 	 * that higher sequences will wait for us to write out a commit record

1276

+ 	 * before they do.

1277

+ 	 *

1278

+-	 * xfs_log_force_lsn requires us to mirror the new sequence into the cil

1279

++	 * xfs_log_force_seq requires us to mirror the new sequence into the cil

1280

+ 	 * structure atomically with the addition of this sequence to the

1281

+ 	 * committing list. This also ensures that we can do unlocked checks

1282

+ 	 * against the current sequence in log forces without risking

1283

+@@ -1020,16 +1020,14 @@ xlog_cil_empty(

1284

+  * allowed again.

1285

+  */

1286

+ void

1287

+-xfs_log_commit_cil(

1288

+-	struct xfs_mount	*mp,

1289

++xlog_cil_commit(

1290

++	struct xlog		*log,

1291

+ 	struct xfs_trans	*tp,

1292

+-	xfs_lsn_t		*commit_lsn,

1293

++	xfs_csn_t		*commit_seq,

1294

+ 	bool			regrant)

1295

+ {

1296

+-	struct xlog		*log = mp->m_log;

1297

+ 	struct xfs_cil		*cil = log->l_cilp;

1298

+ 	struct xfs_log_item	*lip, *next;

1299

+-	xfs_lsn_t		xc_commit_lsn;

1300

+

1301

+ 	/*

1302

+ 	 * Do all necessary memory allocation before we lock the CIL.

1303

+@@ -1043,10 +1041,6 @@ xfs_log_commit_cil(

1304

+

1305

+ 	xlog_cil_insert_items(log, tp);

1306

+

1307

+-	xc_commit_lsn = cil->xc_ctx->sequence;

1308

+-	if (commit_lsn)

1309

+-		*commit_lsn = xc_commit_lsn;

1310

+-

1311

+ 	if (regrant && !XLOG_FORCED_SHUTDOWN(log))

1312

+ 		xfs_log_ticket_regrant(log, tp->t_ticket);

1313

+ 	else

1314

+@@ -1069,8 +1063,10 @@ xfs_log_commit_cil(

1315

+ 	list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {

1316

+ 		xfs_trans_del_item(lip);

1317

+ 		if (lip->li_ops->iop_committing)

1318

+-			lip->li_ops->iop_committing(lip, xc_commit_lsn);

1319

++			lip->li_ops->iop_committing(lip, cil->xc_ctx->sequence);

1320

+ 	}

1321

++	if (commit_seq)

1322

++		*commit_seq = cil->xc_ctx->sequence;

1323

+

1324

+ 	/* xlog_cil_push_background() releases cil->xc_ctx_lock */

1325

+ 	xlog_cil_push_background(log);

1326

+@@ -1087,9 +1083,9 @@ xfs_log_commit_cil(

1327

+  * iclog flush is necessary following this call.

1328

+  */

1329

+ xfs_lsn_t

1330

+-xlog_cil_force_lsn(

1331

++xlog_cil_force_seq(

1332

+ 	struct xlog	*log,

1333

+-	xfs_lsn_t	sequence)

1334

++	xfs_csn_t	sequence)

1335

+ {

1336

+ 	struct xfs_cil		*cil = log->l_cilp;

1337

+ 	struct xfs_cil_ctx	*ctx;

1338

+@@ -1183,23 +1179,19 @@ out_shutdown:

1339

+  */

1340

+ bool

1341

+ xfs_log_item_in_current_chkpt(

1342

+-	struct xfs_log_item *lip)

1343

++	struct xfs_log_item	*lip)

1344

+ {

1345

+-	struct xfs_cil_ctx *ctx;

1346

++	struct xfs_cil		*cil = lip->li_mountp->m_log->l_cilp;

1347

+

1348

+ 	if (list_empty(&lip->li_cil))

1349

+ 		return false;

1350

+

1351

+-	ctx = lip->li_mountp->m_log->l_cilp->xc_ctx;

1352

+-

1353

+ 	/*

1354

+ 	 * li_seq is written on the first commit of a log item to record the

1355

+ 	 * first checkpoint it is written to. Hence if it is different to the

1356

+ 	 * current sequence, we're in a new checkpoint.

1357

+ 	 */

1358

+-	if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0)

1359

+-		return false;

1360

+-	return true;

1361

++	return lip->li_seq == READ_ONCE(cil->xc_current_sequence);

1362

+ }

1363

+

1364

+ /*

1365

+diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h

1366

+index 1c6fdbf3d5066..42cd1602ac256 100644

1367

+--- a/fs/xfs/xfs_log_priv.h

1368

++++ b/fs/xfs/xfs_log_priv.h

1369

+@@ -230,7 +230,7 @@ struct xfs_cil;

1370

+

1371

+ struct xfs_cil_ctx {

1372

+ 	struct xfs_cil		*cil;

1373

+-	xfs_lsn_t		sequence;	/* chkpt sequence # */

1374

++	xfs_csn_t		sequence;	/* chkpt sequence # */

1375

+ 	xfs_lsn_t		start_lsn;	/* first LSN of chkpt commit */

1376

+ 	xfs_lsn_t		commit_lsn;	/* chkpt commit record lsn */

1377

+ 	struct xlog_ticket	*ticket;	/* chkpt ticket */

1378

+@@ -268,10 +268,10 @@ struct xfs_cil {

1379

+ 	struct xfs_cil_ctx	*xc_ctx;

1380

+

1381

+ 	spinlock_t		xc_push_lock ____cacheline_aligned_in_smp;

1382

+-	xfs_lsn_t		xc_push_seq;

1383

++	xfs_csn_t		xc_push_seq;

1384

+ 	struct list_head	xc_committing;

1385

+ 	wait_queue_head_t	xc_commit_wait;

1386

+-	xfs_lsn_t		xc_current_sequence;

1387

++	xfs_csn_t		xc_current_sequence;

1388

+ 	struct work_struct	xc_push_work;

1389

+ 	wait_queue_head_t	xc_push_wait;	/* background push throttle */

1390

+ } ____cacheline_aligned_in_smp;

1391

+@@ -547,19 +547,18 @@ int	xlog_cil_init(struct xlog *log);

1392

+ void	xlog_cil_init_post_recovery(struct xlog *log);

1393

+ void	xlog_cil_destroy(struct xlog *log);

1394

+ bool	xlog_cil_empty(struct xlog *log);

1395

++void	xlog_cil_commit(struct xlog *log, struct xfs_trans *tp,

1396

++			xfs_csn_t *commit_seq, bool regrant);

1397

+

1398

+ /*

1399

+  * CIL force routines

1400

+  */

1401

+-xfs_lsn_t

1402

+-xlog_cil_force_lsn(

1403

+-	struct xlog *log,

1404

+-	xfs_lsn_t sequence);

1405

++xfs_lsn_t xlog_cil_force_seq(struct xlog *log, xfs_csn_t sequence);

1406

+

1407

+ static inline void

1408

+ xlog_cil_force(struct xlog *log)

1409

+ {

1410

+-	xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);

1411

++	xlog_cil_force_seq(log, log->l_cilp->xc_current_sequence);

1412

+ }

1413

+

1414

+ /*

1415

+diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c

1416

+index 87886b7f77dad..69408782019eb 100644

1417

+--- a/fs/xfs/xfs_log_recover.c

1418

++++ b/fs/xfs/xfs_log_recover.c

1419

+@@ -2457,8 +2457,10 @@ xlog_finish_defer_ops(

1420

+

1421

+ 		error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres,

1422

+ 				dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp);

1423

+-		if (error)

1424

++		if (error) {

1425

++			xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);

1426

+ 			return error;

1427

++		}

1428

+

1429

+ 		/*

1430

+ 		 * Transfer to this new transaction all the dfops we captured

1431

+@@ -3454,6 +3456,7 @@ xlog_recover_finish(

1432

+ 			 * this) before we get around to xfs_log_mount_cancel.

1433

+ 			 */

1434

+ 			xlog_recover_cancel_intents(log);

1435

++			xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);

1436

+ 			xfs_alert(log->l_mp, "Failed to recover intents");

1437

+ 			return error;

1438

+ 		}

1439

+diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

1440

+index 44b05e1d5d327..a2a5a0fd92334 100644

1441

+--- a/fs/xfs/xfs_mount.c

1442

++++ b/fs/xfs/xfs_mount.c

1443

+@@ -968,9 +968,17 @@ xfs_mountfs(

1444

+ 	/*

1445

+ 	 * Finish recovering the file system.  This part needed to be delayed

1446

+ 	 * until after the root and real-time bitmap inodes were consistently

1447

+-	 * read in.

1448

++	 * read in.  Temporarily create per-AG space reservations for metadata

1449

++	 * btree shape changes because space freeing transactions (for inode

1450

++	 * inactivation) require the per-AG reservation in lieu of reserving

1451

++	 * blocks.

1452

+ 	 */

1453

++	error = xfs_fs_reserve_ag_blocks(mp);

1454

++	if (error && error == -ENOSPC)

1455

++		xfs_warn(mp,

1456

++	"ENOSPC reserving per-AG metadata pool, log recovery may fail.");

1457

+ 	error = xfs_log_mount_finish(mp);

1458

++	xfs_fs_unreserve_ag_blocks(mp);

1459

+ 	if (error) {

1460

+ 		xfs_warn(mp, "log mount finish failed");

1461

+ 		goto out_rtunmount;

1462

+diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c

1463

+index 36166bae24a6f..73a1de7ceefc9 100644

1464

+--- a/fs/xfs/xfs_trans.c

1465

++++ b/fs/xfs/xfs_trans.c

1466

+@@ -832,7 +832,7 @@ __xfs_trans_commit(

1467

+ 	bool			regrant)

1468

+ {

1469

+ 	struct xfs_mount	*mp = tp->t_mountp;

1470

+-	xfs_lsn_t		commit_lsn = -1;

1471

++	xfs_csn_t		commit_seq = 0;

1472

+ 	int			error = 0;

1473

+ 	int			sync = tp->t_flags & XFS_TRANS_SYNC;

1474

+

1475

+@@ -874,7 +874,7 @@ __xfs_trans_commit(

1476

+ 		xfs_trans_apply_sb_deltas(tp);

1477

+ 	xfs_trans_apply_dquot_deltas(tp);

1478

+

1479

+-	xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);

1480

++	xlog_cil_commit(mp->m_log, tp, &commit_seq, regrant);

1481

+

1482

+ 	xfs_trans_free(tp);

1483

+

1484

+@@ -883,7 +883,7 @@ __xfs_trans_commit(

1485

+ 	 * log out now and wait for it.

1486

+ 	 */

1487

+ 	if (sync) {

1488

+-		error = xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);

1489

++		error = xfs_log_force_seq(mp, commit_seq, XFS_LOG_SYNC, NULL);

1490

+ 		XFS_STATS_INC(mp, xs_trans_sync);

1491

+ 	} else {

1492

+ 		XFS_STATS_INC(mp, xs_trans_async);

1493

+diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h

1494

+index 075eeade4f7d5..97485559008bb 100644

1495

+--- a/fs/xfs/xfs_trans.h

1496

++++ b/fs/xfs/xfs_trans.h

1497

+@@ -43,7 +43,7 @@ struct xfs_log_item {

1498

+ 	struct list_head		li_cil;		/* CIL pointers */

1499

+ 	struct xfs_log_vec		*li_lv;		/* active log vector */

1500

+ 	struct xfs_log_vec		*li_lv_shadow;	/* standby vector */

1501

+-	xfs_lsn_t			li_seq;		/* CIL commit seq */

1502

++	xfs_csn_t			li_seq;		/* CIL commit seq */

1503

+ };

1504

+

1505

+ /*

1506

+@@ -69,7 +69,7 @@ struct xfs_item_ops {

1507

+ 	void (*iop_pin)(struct xfs_log_item *);

1508

+ 	void (*iop_unpin)(struct xfs_log_item *, int remove);

1509

+ 	uint (*iop_push)(struct xfs_log_item *, struct list_head *);

1510

+-	void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn);

1511

++	void (*iop_committing)(struct xfs_log_item *lip, xfs_csn_t seq);

1512

+ 	void (*iop_release)(struct xfs_log_item *);

1513

+ 	xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);

1514

+ 	int (*iop_recover)(struct xfs_log_item *lip,

1515

+diff --git a/include/linux/bpf.h b/include/linux/bpf.h

1516

+index f21bc441e3fa8..b010d45a1ecd5 100644

1517

+--- a/include/linux/bpf.h

1518

++++ b/include/linux/bpf.h

1519

+@@ -1457,6 +1457,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,

1520

+ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,

1521

+ 			     const union bpf_attr *kattr,

1522

+ 			     union bpf_attr __user *uattr);

1523

++int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,

1524

++				const union bpf_attr *kattr,

1525

++				union bpf_attr __user *uattr);

1526

+ bool btf_ctx_access(int off, int size, enum bpf_access_type type,

1527

+ 		    const struct bpf_prog *prog,

1528

+ 		    struct bpf_insn_access_aux *info);

1529

+@@ -1671,6 +1674,13 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,

1530

+ 	return -ENOTSUPP;

1531

+ }

1532

+

1533

++static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,

1534

++					      const union bpf_attr *kattr,

1535

++					      union bpf_attr __user *uattr)

1536

++{

1537

++	return -ENOTSUPP;

1538

++}

1539

++

1540

+ static inline void bpf_map_put(struct bpf_map *map)

1541

+ {

1542

+ }

1543

+diff --git a/include/net/addrconf.h b/include/net/addrconf.h

1544

+index e7ce719838b5e..edba74a536839 100644

1545

+--- a/include/net/addrconf.h

1546

++++ b/include/net/addrconf.h

1547

+@@ -405,6 +405,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev)

1548

+ {

1549

+ 	const struct inet6_dev *idev = __in6_dev_get(dev);

1550

+

1551

++	if (unlikely(!idev))

1552

++		return true;

1553

++

1554

+ 	return !!idev->cnf.ignore_routes_with_linkdown;

1555

+ }

1556

+

1557

+diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h

1558

+index 1d1232917de72..9b8000869b078 100644

1559

+--- a/include/net/bluetooth/l2cap.h

1560

++++ b/include/net/bluetooth/l2cap.h

1561

+@@ -845,6 +845,7 @@ enum {

1562

+ };

1563

+

1564

+ void l2cap_chan_hold(struct l2cap_chan *c);

1565

++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c);

1566

+ void l2cap_chan_put(struct l2cap_chan *c);

1567

+

1568

+ static inline void l2cap_chan_lock(struct l2cap_chan *chan)

1569

+diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h

1570

+index 0b1864a82d4ad..ff901aade442f 100644

1571

+--- a/include/net/inet_connection_sock.h

1572

++++ b/include/net/inet_connection_sock.h

1573

+@@ -317,7 +317,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,

1574

+

1575

+ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);

1576

+

1577

+-#define TCP_PINGPONG_THRESH	3

1578

++#define TCP_PINGPONG_THRESH	1

1579

+

1580

+ static inline void inet_csk_enter_pingpong_mode(struct sock *sk)

1581

+ {

1582

+@@ -334,14 +334,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk)

1583

+ 	return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;

1584

+ }

1585

+

1586

+-static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)

1587

+-{

1588

+-	struct inet_connection_sock *icsk = inet_csk(sk);

1589

+-

1590

+-	if (icsk->icsk_ack.pingpong < U8_MAX)

1591

+-		icsk->icsk_ack.pingpong++;

1592

+-}

1593

+-

1594

+ static inline bool inet_csk_has_ulp(struct sock *sk)

1595

+ {

1596

+ 	return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;

1597

+diff --git a/include/net/tcp.h b/include/net/tcp.h

1598

+index 44bfb22069c1f..8129ce9a07719 100644

1599

+--- a/include/net/tcp.h

1600

++++ b/include/net/tcp.h

1601

+@@ -1396,7 +1396,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space,

1602

+

1603

+ static inline int tcp_win_from_space(const struct sock *sk, int space)

1604

+ {

1605

+-	int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;

1606

++	int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);

1607

+

1608

+ 	return tcp_adv_win_scale <= 0 ?

1609

+ 		(space>>(-tcp_adv_win_scale)) :

1610

+diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

1611

+index 0f39fdcb2273c..2a234023821e3 100644

1612

+--- a/include/uapi/linux/bpf.h

1613

++++ b/include/uapi/linux/bpf.h

1614

+@@ -5007,7 +5007,10 @@ struct bpf_pidns_info {

1615

+

1616

+ /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */

1617

+ struct bpf_sk_lookup {

1618

+-	__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */

1619

++	union {

1620

++		__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */

1621

++		__u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */

1622

++	};

1623

+

1624

+ 	__u32 family;		/* Protocol family (AF_INET, AF_INET6) */

1625

+ 	__u32 protocol;		/* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */

1626

+diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c

1627

+index e5d22af43fa0b..d29731a30b8e1 100644

1628

+--- a/kernel/watch_queue.c

1629

++++ b/kernel/watch_queue.c

1630

+@@ -457,6 +457,33 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue)

1631

+ 	rcu_assign_pointer(watch->queue, wqueue);

1632

+ }

1633

+

1634

++static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue)

1635

++{

1636

++	const struct cred *cred;

1637

++	struct watch *w;

1638

++

1639

++	hlist_for_each_entry(w, &wlist->watchers, list_node) {

1640

++		struct watch_queue *wq = rcu_access_pointer(w->queue);

1641

++		if (wqueue == wq && watch->id == w->id)

1642

++			return -EBUSY;

1643

++	}

1644

++

1645

++	cred = current_cred();

1646

++	if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) {

1647

++		atomic_dec(&cred->user->nr_watches);

1648

++		return -EAGAIN;

1649

++	}

1650

++

1651

++	watch->cred = get_cred(cred);

1652

++	rcu_assign_pointer(watch->watch_list, wlist);

1653

++

1654

++	kref_get(&wqueue->usage);

1655

++	kref_get(&watch->usage);

1656

++	hlist_add_head(&watch->queue_node, &wqueue->watches);

1657

++	hlist_add_head_rcu(&watch->list_node, &wlist->watchers);

1658

++	return 0;

1659

++}

1660

++

1661

+ /**

1662

+  * add_watch_to_object - Add a watch on an object to a watch list

1663

+  * @watch: The watch to add

1664

+@@ -471,34 +498,21 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue)

1665

+  */

1666

+ int add_watch_to_object(struct watch *watch, struct watch_list *wlist)

1667

+ {

1668

+-	struct watch_queue *wqueue = rcu_access_pointer(watch->queue);

1669

+-	struct watch *w;

1670

+-

1671

+-	hlist_for_each_entry(w, &wlist->watchers, list_node) {

1672

+-		struct watch_queue *wq = rcu_access_pointer(w->queue);

1673

+-		if (wqueue == wq && watch->id == w->id)

1674

+-			return -EBUSY;

1675

+-	}

1676

+-

1677

+-	watch->cred = get_current_cred();

1678

+-	rcu_assign_pointer(watch->watch_list, wlist);

1679

++	struct watch_queue *wqueue;

1680

++	int ret = -ENOENT;

1681

+

1682

+-	if (atomic_inc_return(&watch->cred->user->nr_watches) >

1683

+-	    task_rlimit(current, RLIMIT_NOFILE)) {

1684

+-		atomic_dec(&watch->cred->user->nr_watches);

1685

+-		put_cred(watch->cred);

1686

+-		return -EAGAIN;

1687

+-	}

1688

++	rcu_read_lock();

1689

+

1690

++	wqueue = rcu_access_pointer(watch->queue);

1691

+ 	if (lock_wqueue(wqueue)) {

1692

+-		kref_get(&wqueue->usage);

1693

+-		kref_get(&watch->usage);

1694

+-		hlist_add_head(&watch->queue_node, &wqueue->watches);

1695

++		spin_lock(&wlist->lock);

1696

++		ret = add_one_watch(watch, wlist, wqueue);

1697

++		spin_unlock(&wlist->lock);

1698

+ 		unlock_wqueue(wqueue);

1699

+ 	}

1700

+

1701

+-	hlist_add_head(&watch->list_node, &wlist->watchers);

1702

+-	return 0;

1703

++	rcu_read_unlock();

1704

++	return ret;

1705

+ }

1706

+ EXPORT_SYMBOL(add_watch_to_object);

1707

+

1708

+diff --git a/mm/page_alloc.c b/mm/page_alloc.c

1709

+index f3418edb136be..43ff22ce76324 100644

1710

+--- a/mm/page_alloc.c

1711

++++ b/mm/page_alloc.c

1712

+@@ -3679,11 +3679,15 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,

1713

+ 	 * need to be calculated.

1714

+ 	 */

1715

+ 	if (!order) {

1716

+-		long fast_free;

1717

++		long usable_free;

1718

++		long reserved;

1719

+

1720

+-		fast_free = free_pages;

1721

+-		fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags);

1722

+-		if (fast_free > mark + z->lowmem_reserve[highest_zoneidx])

1723

++		usable_free = free_pages;

1724

++		reserved = __zone_watermark_unusable_free(z, 0, alloc_flags);

1725

++

1726

++		/* reserved may over estimate high-atomic reserves. */

1727

++		usable_free -= min(usable_free, reserved);

1728

++		if (usable_free > mark + z->lowmem_reserve[highest_zoneidx])

1729

+ 			return true;

1730

+ 	}

1731

+

1732

+diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c

1733

+index 2557cd917f5ed..6a5ff5dcc09a9 100644

1734

+--- a/net/bluetooth/l2cap_core.c

1735

++++ b/net/bluetooth/l2cap_core.c

1736

+@@ -111,7 +111,8 @@ static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn,

1737

+ }

1738

+

1739

+ /* Find channel with given SCID.

1740

+- * Returns locked channel. */

1741

++ * Returns a reference locked channel.

1742

++ */

1743

+ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,

1744

+ 						 u16 cid)

1745

+ {

1746

+@@ -119,15 +120,19 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,

1747

+

1748

+ 	mutex_lock(&conn->chan_lock);

1749

+ 	c = __l2cap_get_chan_by_scid(conn, cid);

1750

+-	if (c)

1751

+-		l2cap_chan_lock(c);

1752

++	if (c) {

1753

++		/* Only lock if chan reference is not 0 */

1754

++		c = l2cap_chan_hold_unless_zero(c);

1755

++		if (c)

1756

++			l2cap_chan_lock(c);

1757

++	}

1758

+ 	mutex_unlock(&conn->chan_lock);

1759

+

1760

+ 	return c;

1761

+ }

1762

+

1763

+ /* Find channel with given DCID.

1764

+- * Returns locked channel.

1765

++ * Returns a reference locked channel.

1766

+  */

1767

+ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,

1768

+ 						 u16 cid)

1769

+@@ -136,8 +141,12 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,

1770

+

1771

+ 	mutex_lock(&conn->chan_lock);

1772

+ 	c = __l2cap_get_chan_by_dcid(conn, cid);

1773

+-	if (c)

1774

+-		l2cap_chan_lock(c);

1775

++	if (c) {

1776

++		/* Only lock if chan reference is not 0 */

1777

++		c = l2cap_chan_hold_unless_zero(c);

1778

++		if (c)

1779

++			l2cap_chan_lock(c);

1780

++	}

1781

+ 	mutex_unlock(&conn->chan_lock);

1782

+

1783

+ 	return c;

1784

+@@ -162,8 +171,12 @@ static struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn,

1785

+

1786

+ 	mutex_lock(&conn->chan_lock);

1787

+ 	c = __l2cap_get_chan_by_ident(conn, ident);

1788

+-	if (c)

1789

+-		l2cap_chan_lock(c);

1790

++	if (c) {

1791

++		/* Only lock if chan reference is not 0 */

1792

++		c = l2cap_chan_hold_unless_zero(c);

1793

++		if (c)

1794

++			l2cap_chan_lock(c);

1795

++	}

1796

+ 	mutex_unlock(&conn->chan_lock);

1797

+

1798

+ 	return c;

1799

+@@ -497,6 +510,16 @@ void l2cap_chan_hold(struct l2cap_chan *c)

1800

+ 	kref_get(&c->kref);

1801

+ }

1802

+

1803

++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c)

1804

++{

1805

++	BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));

1806

++

1807

++	if (!kref_get_unless_zero(&c->kref))

1808

++		return NULL;

1809

++

1810

++	return c;

1811

++}

1812

++

1813

+ void l2cap_chan_put(struct l2cap_chan *c)

1814

+ {

1815

+ 	BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref));

1816

+@@ -1965,7 +1988,10 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,

1817

+ 			src_match = !bacmp(&c->src, src);

1818

+ 			dst_match = !bacmp(&c->dst, dst);

1819

+ 			if (src_match && dst_match) {

1820

+-				l2cap_chan_hold(c);

1821

++				c = l2cap_chan_hold_unless_zero(c);

1822

++				if (!c)

1823

++					continue;

1824

++

1825

+ 				read_unlock(&chan_list_lock);

1826

+ 				return c;

1827

+ 			}

1828

+@@ -1980,7 +2006,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,

1829

+ 	}

1830

+

1831

+ 	if (c1)

1832

+-		l2cap_chan_hold(c1);

1833

++		c1 = l2cap_chan_hold_unless_zero(c1);

1834

+

1835

+ 	read_unlock(&chan_list_lock);

1836

+

1837

+@@ -4460,6 +4486,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,

1838

+

1839

+ unlock:

1840

+ 	l2cap_chan_unlock(chan);

1841

++	l2cap_chan_put(chan);

1842

+ 	return err;

1843

+ }

1844

+

1845

+@@ -4573,6 +4600,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,

1846

+

1847

+ done:

1848

+ 	l2cap_chan_unlock(chan);

1849

++	l2cap_chan_put(chan);

1850

+ 	return err;

1851

+ }

1852

+

1853

+@@ -5300,6 +5328,7 @@ send_move_response:

1854

+ 	l2cap_send_move_chan_rsp(chan, result);

1855

+

1856

+ 	l2cap_chan_unlock(chan);

1857

++	l2cap_chan_put(chan);

1858

+

1859

+ 	return 0;

1860

+ }

1861

+@@ -5392,6 +5421,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result)

1862

+ 	}

1863

+

1864

+ 	l2cap_chan_unlock(chan);

1865

++	l2cap_chan_put(chan);

1866

+ }

1867

+

1868

+ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,

1869

+@@ -5421,6 +5451,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,

1870

+ 	l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED);

1871

+

1872

+ 	l2cap_chan_unlock(chan);

1873

++	l2cap_chan_put(chan);

1874

+ }

1875

+

1876

+ static int l2cap_move_channel_rsp(struct l2cap_conn *conn,

1877

+@@ -5484,6 +5515,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn *conn,

1878

+ 	l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid);

1879

+

1880

+ 	l2cap_chan_unlock(chan);

1881

++	l2cap_chan_put(chan);

1882

+

1883

+ 	return 0;

1884

+ }

1885

+@@ -5519,6 +5551,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn,

1886

+ 	}

1887

+

1888

+ 	l2cap_chan_unlock(chan);

1889

++	l2cap_chan_put(chan);

1890

+

1891

+ 	return 0;

1892

+ }

1893

+@@ -5891,12 +5924,11 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,

1894

+ 	if (credits > max_credits) {

1895

+ 		BT_ERR("LE credits overflow");

1896

+ 		l2cap_send_disconn_req(chan, ECONNRESET);

1897

+-		l2cap_chan_unlock(chan);

1898

+

1899

+ 		/* Return 0 so that we don't trigger an unnecessary

1900

+ 		 * command reject packet.

1901

+ 		 */

1902

+-		return 0;

1903

++		goto unlock;

1904

+ 	}

1905

+

1906

+ 	chan->tx_credits += credits;

1907

+@@ -5907,7 +5939,9 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,

1908

+ 	if (chan->tx_credits)

1909

+ 		chan->ops->resume(chan);

1910

+

1911

++unlock:

1912

+ 	l2cap_chan_unlock(chan);

1913

++	l2cap_chan_put(chan);

1914

+

1915

+ 	return 0;

1916

+ }

1917

+@@ -7587,6 +7621,7 @@ drop:

1918

+

1919

+ done:

1920

+ 	l2cap_chan_unlock(chan);

1921

++	l2cap_chan_put(chan);

1922

+ }

1923

+

1924

+ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,

1925

+@@ -8074,7 +8109,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,

1926

+ 		if (src_type != c->src_type)

1927

+ 			continue;

1928

+

1929

+-		l2cap_chan_hold(c);

1930

++		c = l2cap_chan_hold_unless_zero(c);

1931

+ 		read_unlock(&chan_list_lock);

1932

+ 		return c;

1933

+ 	}

1934

+diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c

1935

+index eb684f31fd698..f8b231bbbe381 100644

1936

+--- a/net/bpf/test_run.c

1937

++++ b/net/bpf/test_run.c

1938

+@@ -10,20 +10,86 @@

1939

+ #include <net/bpf_sk_storage.h>

1940

+ #include <net/sock.h>

1941

+ #include <net/tcp.h>

1942

++#include <net/net_namespace.h>

1943

+ #include <linux/error-injection.h>

1944

+ #include <linux/smp.h>

1945

++#include <linux/sock_diag.h>

1946

+

1947

+ #define CREATE_TRACE_POINTS

1948

+ #include <trace/events/bpf_test_run.h>

1949

+

1950

++struct bpf_test_timer {

1951

++	enum { NO_PREEMPT, NO_MIGRATE } mode;

1952

++	u32 i;

1953

++	u64 time_start, time_spent;

1954

++};

1955

++

1956

++static void bpf_test_timer_enter(struct bpf_test_timer *t)

1957

++	__acquires(rcu)

1958

++{

1959

++	rcu_read_lock();

1960

++	if (t->mode == NO_PREEMPT)

1961

++		preempt_disable();

1962

++	else

1963

++		migrate_disable();

1964

++

1965

++	t->time_start = ktime_get_ns();

1966

++}

1967

++

1968

++static void bpf_test_timer_leave(struct bpf_test_timer *t)

1969

++	__releases(rcu)

1970

++{

1971

++	t->time_start = 0;

1972

++

1973

++	if (t->mode == NO_PREEMPT)

1974

++		preempt_enable();

1975

++	else

1976

++		migrate_enable();

1977

++	rcu_read_unlock();

1978

++}

1979

++

1980

++static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration)

1981

++	__must_hold(rcu)

1982

++{

1983

++	t->i++;

1984

++	if (t->i >= repeat) {

1985

++		/* We're done. */

1986

++		t->time_spent += ktime_get_ns() - t->time_start;

1987

++		do_div(t->time_spent, t->i);

1988

++		*duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent;

1989

++		*err = 0;

1990

++		goto reset;

1991

++	}

1992

++

1993

++	if (signal_pending(current)) {

1994

++		/* During iteration: we've been cancelled, abort. */

1995

++		*err = -EINTR;

1996

++		goto reset;

1997

++	}

1998

++

1999

++	if (need_resched()) {

2000

++		/* During iteration: we need to reschedule between runs. */

2001

++		t->time_spent += ktime_get_ns() - t->time_start;

2002

++		bpf_test_timer_leave(t);

2003

++		cond_resched();

2004

++		bpf_test_timer_enter(t);

2005

++	}

2006

++

2007

++	/* Do another round. */

2008

++	return true;

2009

++

2010

++reset:

2011

++	t->i = 0;

2012

++	return false;

2013

++}

2014

++

2015

+ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,

2016

+ 			u32 *retval, u32 *time, bool xdp)

2017

+ {

2018

+ 	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };

2019

++	struct bpf_test_timer t = { NO_MIGRATE };

2020

+ 	enum bpf_cgroup_storage_type stype;

2021

+-	u64 time_start, time_spent = 0;

2022

+-	int ret = 0;

2023

+-	u32 i;

2024

++	int ret;

2025

+

2026

+ 	for_each_cgroup_storage_type(stype) {

2027

+ 		storage[stype] = bpf_cgroup_storage_alloc(prog, stype);

2028

+@@ -38,10 +104,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,

2029

+ 	if (!repeat)

2030

+ 		repeat = 1;

2031

+

2032

+-	rcu_read_lock();

2033

+-	migrate_disable();

2034

+-	time_start = ktime_get_ns();

2035

+-	for (i = 0; i < repeat; i++) {

2036

++	bpf_test_timer_enter(&t);

2037

++	do {

2038

+ 		ret = bpf_cgroup_storage_set(storage);

2039

+ 		if (ret)

2040

+ 			break;

2041

+@@ -53,29 +117,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,

2042

+

2043

+ 		bpf_cgroup_storage_unset();

2044

+

2045

+-		if (signal_pending(current)) {

2046

+-			ret = -EINTR;

2047

+-			break;

2048

+-		}

2049

+-

2050

+-		if (need_resched()) {

2051

+-			time_spent += ktime_get_ns() - time_start;

2052

+-			migrate_enable();

2053

+-			rcu_read_unlock();

2054

+-

2055

+-			cond_resched();

2056

+-

2057

+-			rcu_read_lock();

2058

+-			migrate_disable();

2059

+-			time_start = ktime_get_ns();

2060

+-		}

2061

+-	}

2062

+-	time_spent += ktime_get_ns() - time_start;

2063

+-	migrate_enable();

2064

+-	rcu_read_unlock();

2065

+-

2066

+-	do_div(time_spent, repeat);

2067

+-	*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;

2068

++	} while (bpf_test_timer_continue(&t, repeat, &ret, time));

2069

++	bpf_test_timer_leave(&t);

2070

+

2071

+ 	for_each_cgroup_storage_type(stype)

2072

+ 		bpf_cgroup_storage_free(storage[stype]);

2073

+@@ -688,18 +731,17 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,

2074

+ 				     const union bpf_attr *kattr,

2075

+ 				     union bpf_attr __user *uattr)

2076

+ {

2077

++	struct bpf_test_timer t = { NO_PREEMPT };

2078

+ 	u32 size = kattr->test.data_size_in;

2079

+ 	struct bpf_flow_dissector ctx = {};

2080

+ 	u32 repeat = kattr->test.repeat;

2081

+ 	struct bpf_flow_keys *user_ctx;

2082

+ 	struct bpf_flow_keys flow_keys;

2083

+-	u64 time_start, time_spent = 0;

2084

+ 	const struct ethhdr *eth;

2085

+ 	unsigned int flags = 0;

2086

+ 	u32 retval, duration;

2087

+ 	void *data;

2088

+ 	int ret;

2089

+-	u32 i;

2090

+

2091

+ 	if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)

2092

+ 		return -EINVAL;

2093

+@@ -735,48 +777,127 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,

2094

+ 	ctx.data = data;

2095

+ 	ctx.data_end = (__u8 *)data + size;

2096

+

2097

+-	rcu_read_lock();

2098

+-	preempt_disable();

2099

+-	time_start = ktime_get_ns();

2100

+-	for (i = 0; i < repeat; i++) {

2101

++	bpf_test_timer_enter(&t);

2102

++	do {

2103

+ 		retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,

2104

+ 					  size, flags);

2105

++	} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));

2106

++	bpf_test_timer_leave(&t);

2107

+

2108

+-		if (signal_pending(current)) {

2109

+-			preempt_enable();

2110

+-			rcu_read_unlock();

2111

++	if (ret < 0)

2112

++		goto out;

2113

+

2114

+-			ret = -EINTR;

2115

+-			goto out;

2116

+-		}

2117

++	ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),

2118

++			      retval, duration);

2119

++	if (!ret)

2120

++		ret = bpf_ctx_finish(kattr, uattr, user_ctx,

2121

++				     sizeof(struct bpf_flow_keys));

2122

+

2123

+-		if (need_resched()) {

2124

+-			time_spent += ktime_get_ns() - time_start;

2125

+-			preempt_enable();

2126

+-			rcu_read_unlock();

2127

++out:

2128

++	kfree(user_ctx);

2129

++	kfree(data);

2130

++	return ret;

2131

++}

2132

+

2133

+-			cond_resched();

2134

++int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr,

2135

++				union bpf_attr __user *uattr)

2136

++{

2137

++	struct bpf_test_timer t = { NO_PREEMPT };

2138

++	struct bpf_prog_array *progs = NULL;

2139

++	struct bpf_sk_lookup_kern ctx = {};

2140

++	u32 repeat = kattr->test.repeat;

2141

++	struct bpf_sk_lookup *user_ctx;

2142

++	u32 retval, duration;

2143

++	int ret = -EINVAL;

2144

+

2145

+-			rcu_read_lock();

2146

+-			preempt_disable();

2147

+-			time_start = ktime_get_ns();

2148

+-		}

2149

++	if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)

2150

++		return -EINVAL;

2151

++

2152

++	if (kattr->test.flags || kattr->test.cpu)

2153

++		return -EINVAL;

2154

++

2155

++	if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out ||

2156

++	    kattr->test.data_size_out)

2157

++		return -EINVAL;

2158

++

2159

++	if (!repeat)

2160

++		repeat = 1;

2161

++

2162

++	user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx));

2163

++	if (IS_ERR(user_ctx))

2164

++		return PTR_ERR(user_ctx);

2165

++

2166

++	if (!user_ctx)

2167

++		return -EINVAL;

2168

++

2169

++	if (user_ctx->sk)

2170

++		goto out;

2171

++

2172

++	if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))

2173

++		goto out;

2174

++

2175

++	if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {

2176

++		ret = -ERANGE;

2177

++		goto out;

2178

+ 	}

2179

+-	time_spent += ktime_get_ns() - time_start;

2180

+-	preempt_enable();

2181

+-	rcu_read_unlock();

2182

+

2183

+-	do_div(time_spent, repeat);

2184

+-	duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;

2185

++	ctx.family = (u16)user_ctx->family;

2186

++	ctx.protocol = (u16)user_ctx->protocol;

2187

++	ctx.dport = (u16)user_ctx->local_port;

2188

++	ctx.sport = (__force __be16)user_ctx->remote_port;

2189

+

2190

+-	ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),

2191

+-			      retval, duration);

2192

++	switch (ctx.family) {

2193

++	case AF_INET:

2194

++		ctx.v4.daddr = (__force __be32)user_ctx->local_ip4;

2195

++		ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4;

2196

++		break;

2197

++

2198

++#if IS_ENABLED(CONFIG_IPV6)

2199

++	case AF_INET6:

2200

++		ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6;

2201

++		ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6;

2202

++		break;

2203

++#endif

2204

++

2205

++	default:

2206

++		ret = -EAFNOSUPPORT;

2207

++		goto out;

2208

++	}

2209

++

2210

++	progs = bpf_prog_array_alloc(1, GFP_KERNEL);

2211

++	if (!progs) {

2212

++		ret = -ENOMEM;

2213

++		goto out;

2214

++	}

2215

++

2216

++	progs->items[0].prog = prog;

2217

++

2218

++	bpf_test_timer_enter(&t);

2219

++	do {

2220

++		ctx.selected_sk = NULL;

2221

++		retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN);

2222

++	} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));

2223

++	bpf_test_timer_leave(&t);

2224

++

2225

++	if (ret < 0)

2226

++		goto out;

2227

++

2228

++	user_ctx->cookie = 0;

2229

++	if (ctx.selected_sk) {

2230

++		if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) {

2231

++			ret = -EOPNOTSUPP;

2232

++			goto out;

2233

++		}

2234

++

2235

++		user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);

2236

++	}

2237

++

2238

++	ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);

2239

+ 	if (!ret)

2240

+-		ret = bpf_ctx_finish(kattr, uattr, user_ctx,

2241

+-				     sizeof(struct bpf_flow_keys));

2242

++		ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));

2243

+

2244

+ out:

2245

++	bpf_prog_array_free(progs);

2246

+ 	kfree(user_ctx);

2247

+-	kfree(data);

2248

+ 	return ret;

2249

+ }

2250

+diff --git a/net/core/filter.c b/net/core/filter.c

2251

+index e2b491665775f..815edf7bc4390 100644

2252

+--- a/net/core/filter.c

2253

++++ b/net/core/filter.c

2254

+@@ -10334,6 +10334,7 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,

2255

+ }

2256

+

2257

+ const struct bpf_prog_ops sk_lookup_prog_ops = {

2258

++	.test_run = bpf_prog_test_run_sk_lookup,

2259

+ };

2260

+

2261

+ const struct bpf_verifier_ops sk_lookup_verifier_ops = {

2262

+diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c

2263

+index 428cc3a4c36f1..c71b863093ace 100644

2264

+--- a/net/ipv4/igmp.c

2265

++++ b/net/ipv4/igmp.c

2266

+@@ -827,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev)

2267

+ 	struct net *net = dev_net(in_dev->dev);

2268

+ 	if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))

2269

+ 		return;

2270

+-	WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);

2271

++	WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));

2272

+ 	igmp_ifc_start_timer(in_dev, 1);

2273

+ }

2274

+

2275

+@@ -1009,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,

2276

+ 		 * received value was zero, use the default or statically

2277

+ 		 * configured value.

2278

+ 		 */

2279

+-		in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;

2280

++		in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);

2281

+ 		in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;

2282

+

2283

+ 		/* RFC3376, 8.3. Query Response Interval:

2284

+@@ -1189,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,

2285

+ 	pmc->interface = im->interface;

2286

+ 	in_dev_hold(in_dev);

2287

+ 	pmc->multiaddr = im->multiaddr;

2288

+-	pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;

2289

++	pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);

2290

+ 	pmc->sfmode = im->sfmode;

2291

+ 	if (pmc->sfmode == MCAST_INCLUDE) {

2292

+ 		struct ip_sf_list *psf;

2293

+@@ -1240,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)

2294

+ 			swap(im->tomb, pmc->tomb);

2295

+ 			swap(im->sources, pmc->sources);

2296

+ 			for (psf = im->sources; psf; psf = psf->sf_next)

2297

+-				psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;

2298

++				psf->sf_crcount = in_dev->mr_qrv ?:

2299

++					READ_ONCE(net->ipv4.sysctl_igmp_qrv);

2300

+ 		} else {

2301

+-			im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;

2302

++			im->crcount = in_dev->mr_qrv ?:

2303

++				READ_ONCE(net->ipv4.sysctl_igmp_qrv);

2304

+ 		}

2305

+ 		in_dev_put(pmc->interface);

2306

+ 		kfree_pmc(pmc);

2307

+@@ -1349,7 +1351,7 @@ static void igmp_group_added(struct ip_mc_list *im)

2308

+ 	if (in_dev->dead)

2309

+ 		return;

2310

+

2311

+-	im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;

2312

++	im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);

2313

+ 	if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {

2314

+ 		spin_lock_bh(&im->lock);

2315

+ 		igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);

2316

+@@ -1363,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im)

2317

+ 	 * IN() to IN(A).

2318

+ 	 */

2319

+ 	if (im->sfmode == MCAST_EXCLUDE)

2320

+-		im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;

2321

++		im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);

2322

+

2323

+ 	igmp_ifc_event(in_dev);

2324

+ #endif

2325

+@@ -1754,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev)

2326

+

2327

+ 	in_dev->mr_qi = IGMP_QUERY_INTERVAL;

2328

+ 	in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;

2329

+-	in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;

2330

++	in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);

2331

+ }

2332

+ #else

2333

+ static void ip_mc_reset(struct in_device *in_dev)

2334

+@@ -1888,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,

2335

+ #ifdef CONFIG_IP_MULTICAST

2336

+ 		if (psf->sf_oldin &&

2337

+ 		    !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {

2338

+-			psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;

2339

++			psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);

2340

+ 			psf->sf_next = pmc->tomb;

2341

+ 			pmc->tomb = psf;

2342

+ 			rv = 1;

2343

+@@ -1952,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,

2344

+ 		/* filter mode change */

2345

+ 		pmc->sfmode = MCAST_INCLUDE;

2346

+ #ifdef CONFIG_IP_MULTICAST

2347

+-		pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;

2348

++		pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);

2349

+ 		WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);

2350

+ 		for (psf = pmc->sources; psf; psf = psf->sf_next)

2351

+ 			psf->sf_crcount = 0;

2352

+@@ -2131,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,

2353

+ #ifdef CONFIG_IP_MULTICAST

2354

+ 		/* else no filters; keep old mode for reports */

2355

+

2356

+-		pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;

2357

++		pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);

2358

+ 		WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);

2359

+ 		for (psf = pmc->sources; psf; psf = psf->sf_next)

2360

+ 			psf->sf_crcount = 0;

2361

+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c

2362

+index f1fd26bb199ce..78460eb39b3af 100644

2363

+--- a/net/ipv4/tcp.c

2364

++++ b/net/ipv4/tcp.c

2365

+@@ -698,7 +698,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,

2366

+ 				int size_goal)

2367

+ {

2368

+ 	return skb->len < size_goal &&

2369

+-	       sock_net(sk)->ipv4.sysctl_tcp_autocorking &&

2370

++	       READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&

2371

+ 	       !tcp_rtx_queue_empty(sk) &&

2372

+ 	       refcount_read(&sk->sk_wmem_alloc) > skb->truesize;

2373

+ }

2374

+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

2375

+index d817f8c31c9ce..d35e88b5ffcbe 100644

2376

+--- a/net/ipv4/tcp_input.c

2377

++++ b/net/ipv4/tcp_input.c

2378

+@@ -503,7 +503,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)

2379

+  */

2380

+ static void tcp_init_buffer_space(struct sock *sk)

2381

+ {

2382

+-	int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;

2383

++	int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);

2384

+ 	struct tcp_sock *tp = tcp_sk(sk);

2385

+ 	int maxwin;

2386

+

2387

+@@ -693,7 +693,7 @@ void tcp_rcv_space_adjust(struct sock *sk)

2388

+ 	 * <prev RTT . ><current RTT .. ><next RTT .... >

2389

+ 	 */

2390

+

2391

+-	if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&

2392

++	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&

2393

+ 	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {

2394

+ 		int rcvmem, rcvbuf;

2395

+ 		u64 rcvwin, grow;

2396

+@@ -2135,7 +2135,7 @@ void tcp_enter_loss(struct sock *sk)

2397

+ 	 * loss recovery is underway except recurring timeout(s) on

2398

+ 	 * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing

2399

+ 	 */

2400

+-	tp->frto = net->ipv4.sysctl_tcp_frto &&

2401

++	tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&

2402

+ 		   (new_recovery || icsk->icsk_retransmits) &&

2403

+ 		   !inet_csk(sk)->icsk_mtup.probe_size;

2404

+ }

2405

+@@ -3004,7 +3004,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,

2406

+

2407

+ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)

2408

+ {

2409

+-	u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;

2410

++	u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;

2411

+ 	struct tcp_sock *tp = tcp_sk(sk);

2412

+

2413

+ 	if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {

2414

+@@ -3528,7 +3528,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,

2415

+ 	if (*last_oow_ack_time) {

2416

+ 		s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);

2417

+

2418

+-		if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {

2419

++		if (0 <= elapsed &&

2420

++		    elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {

2421

+ 			NET_INC_STATS(net, mib_idx);

2422

+ 			return true;	/* rate-limited: don't send yet! */

2423

+ 		}

2424

+@@ -3576,7 +3577,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)

2425

+ 	/* Then check host-wide RFC 5961 rate limit. */

2426

+ 	now = jiffies / HZ;

2427

+ 	if (now != challenge_timestamp) {

2428

+-		u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;

2429

++		u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);

2430

+ 		u32 half = (ack_limit + 1) >> 1;

2431

+

2432

+ 		challenge_timestamp = now;

2433

+@@ -4367,7 +4368,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)

2434

+ {

2435

+ 	struct tcp_sock *tp = tcp_sk(sk);

2436

+

2437

+-	if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {

2438

++	if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {

2439

+ 		int mib_idx;

2440

+

2441

+ 		if (before(seq, tp->rcv_nxt))

2442

+@@ -4414,7 +4415,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)

2443

+ 		NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);

2444

+ 		tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);

2445

+

2446

+-		if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {

2447

++		if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {

2448

+ 			u32 end_seq = TCP_SKB_CB(skb)->end_seq;

2449

+

2450

+ 			tcp_rcv_spurious_retrans(sk, skb);

2451

+@@ -5439,7 +5440,7 @@ send_now:

2452

+ 	}

2453

+

2454

+ 	if (!tcp_is_sack(tp) ||

2455

+-	    tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)

2456

++	    tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))

2457

+ 		goto send_now;

2458

+

2459

+ 	if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {

2460

+@@ -5460,11 +5461,12 @@ send_now:

2461

+ 	if (tp->srtt_us && tp->srtt_us < rtt)

2462

+ 		rtt = tp->srtt_us;

2463

+

2464

+-	delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,

2465

++	delay = min_t(unsigned long,

2466

++		      READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),

2467

+ 		      rtt * (NSEC_PER_USEC >> 3)/20);

2468

+ 	sock_hold(sk);

2469

+ 	hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),

2470

+-			       sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,

2471

++			       READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),

2472

+ 			       HRTIMER_MODE_REL_PINNED_SOFT);

2473

+ }

2474

+

2475

+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

2476

+index d5f13ff7d9004..0d165ce2d80a7 100644

2477

+--- a/net/ipv4/tcp_ipv4.c

2478

++++ b/net/ipv4/tcp_ipv4.c

2479

+@@ -983,7 +983,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,

2480

+ 	if (skb) {

2481

+ 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);

2482

+

2483

+-		tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?

2484

++		tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?

2485

+ 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |

2486

+ 				(inet_sk(sk)->tos & INET_ECN_MASK) :

2487

+ 				inet_sk(sk)->tos;

2488

+@@ -1558,7 +1558,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,

2489

+ 	/* Set ToS of the new socket based upon the value of incoming SYN.

2490

+ 	 * ECT bits are set later in tcp_init_transfer().

2491

+ 	 */

2492

+-	if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)

2493

++	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))

2494

+ 		newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;

2495

+

2496

+ 	if (!dst) {

2497

+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c

2498

+index 8d7e32f4abf67..f3ca6eea2ca39 100644

2499

+--- a/net/ipv4/tcp_metrics.c

2500

++++ b/net/ipv4/tcp_metrics.c

2501

+@@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)

2502

+ 	int m;

2503

+

2504

+ 	sk_dst_confirm(sk);

2505

+-	if (net->ipv4.sysctl_tcp_nometrics_save || !dst)

2506

++	if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)

2507

+ 		return;

2508

+

2509

+ 	rcu_read_lock();

2510

+@@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk)

2511

+

2512

+ 	if (tcp_in_initial_slowstart(tp)) {

2513

+ 		/* Slow start still did not finish. */

2514

+-		if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&

2515

++		if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&

2516

+ 		    !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {

2517

+ 			val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);

2518

+ 			if (val && (tp->snd_cwnd >> 1) > val)

2519

+@@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk)

2520

+ 	} else if (!tcp_in_slow_start(tp) &&

2521

+ 		   icsk->icsk_ca_state == TCP_CA_Open) {

2522

+ 		/* Cong. avoidance phase, cwnd is reliable. */

2523

+-		if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&

2524

++		if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&

2525

+ 		    !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))

2526

+ 			tcp_metric_set(tm, TCP_METRIC_SSTHRESH,

2527

+ 				       max(tp->snd_cwnd >> 1, tp->snd_ssthresh));

2528

+@@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk)

2529

+ 			tcp_metric_set(tm, TCP_METRIC_CWND,

2530

+ 				       (val + tp->snd_ssthresh) >> 1);

2531

+ 		}

2532

+-		if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&

2533

++		if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&

2534

+ 		    !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {

2535

+ 			val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);

2536

+ 			if (val && tp->snd_ssthresh > val)

2537

+@@ -463,7 +463,7 @@ void tcp_init_metrics(struct sock *sk)

2538

+ 	if (tcp_metric_locked(tm, TCP_METRIC_CWND))

2539

+ 		tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);

2540

+

2541

+-	val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?

2542

++	val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?

2543

+ 	      0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);

2544

+ 	if (val) {

2545

+ 		tp->snd_ssthresh = val;

2546

+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

2547

+index 9b67c61576e4c..657b0a4d93599 100644

2548

+--- a/net/ipv4/tcp_output.c

2549

++++ b/net/ipv4/tcp_output.c

2550

+@@ -167,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp,

2551

+ 	if (tcp_packets_in_flight(tp) == 0)

2552

+ 		tcp_ca_event(sk, CA_EVENT_TX_START);

2553

+

2554

+-	/* If this is the first data packet sent in response to the

2555

+-	 * previous received data,

2556

+-	 * and it is a reply for ato after last received packet,

2557

+-	 * increase pingpong count.

2558

+-	 */

2559

+-	if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&

2560

+-	    (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)

2561

+-		inet_csk_inc_pingpong_cnt(sk);

2562

+-

2563

+ 	tp->lsndtime = now;

2564

++

2565

++	/* If it is a reply for ato after last received

2566

++	 * packet, enter pingpong mode.

2567

++	 */

2568

++	if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)

2569

++		inet_csk_enter_pingpong_mode(sk);

2570

+ }

2571

+

2572

+ /* Account for an ACK we sent. */

2573

+@@ -1987,7 +1984,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)

2574

+

2575

+ 	min_tso = ca_ops->min_tso_segs ?

2576

+ 			ca_ops->min_tso_segs(sk) :

2577

+-			sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;

2578

++			READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);

2579

+

2580

+ 	tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);

2581

+ 	return min_t(u32, tso_segs, sk->sk_gso_max_segs);

2582

+@@ -2502,7 +2499,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,

2583

+ 		      sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));

2584

+ 	if (sk->sk_pacing_status == SK_PACING_NONE)

2585

+ 		limit = min_t(unsigned long, limit,

2586

+-			      sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);

2587

++			      READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));

2588

+ 	limit <<= factor;

2589

+

2590

+ 	if (static_branch_unlikely(&tcp_tx_delay_enabled) &&

2591

+diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c

2592

+index 6ac88fe24a8e0..135e3a060caa8 100644

2593

+--- a/net/ipv6/ping.c

2594

++++ b/net/ipv6/ping.c

2595

+@@ -22,6 +22,11 @@

2596

+ #include <linux/proc_fs.h>

2597

+ #include <net/ping.h>

2598

+

2599

++static void ping_v6_destroy(struct sock *sk)

2600

++{

2601

++	inet6_destroy_sock(sk);

2602

++}

2603

++

2604

+ /* Compatibility glue so we can support IPv6 when it's compiled as a module */

2605

+ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,

2606

+ 				 int *addr_len)

2607

+@@ -166,6 +171,7 @@ struct proto pingv6_prot = {

2608

+ 	.owner =	THIS_MODULE,

2609

+ 	.init =		ping_init_sock,

2610

+ 	.close =	ping_close,

2611

++	.destroy =	ping_v6_destroy,

2612

+ 	.connect =	ip6_datagram_connect_v6_only,

2613

+ 	.disconnect =	__udp_disconnect,

2614

+ 	.setsockopt =	ipv6_setsockopt,

2615

+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c

2616

+index 303b54414a6cc..8d91f36cb11bc 100644

2617

+--- a/net/ipv6/tcp_ipv6.c

2618

++++ b/net/ipv6/tcp_ipv6.c

2619

+@@ -542,7 +542,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,

2620

+ 		if (np->repflow && ireq->pktopts)

2621

+ 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));

2622

+

2623

+-		tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?

2624

++		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?

2625

+ 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |

2626

+ 				(np->tclass & INET_ECN_MASK) :

2627

+ 				np->tclass;

2628

+@@ -1344,7 +1344,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *

2629

+ 	/* Set ToS of the new socket based upon the value of incoming SYN.

2630

+ 	 * ECT bits are set later in tcp_init_transfer().

2631

+ 	 */

2632

+-	if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)

2633

++	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))

2634

+ 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;

2635

+

2636

+ 	/* Clone native IPv6 options from listening socket (if any)

2637

+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c

2638

+index 8123c79e27913..d0e91aa7b30e5 100644

2639

+--- a/net/mptcp/protocol.c

2640

++++ b/net/mptcp/protocol.c

2641

+@@ -1421,7 +1421,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)

2642

+ 	if (msk->rcvq_space.copied <= msk->rcvq_space.space)

2643

+ 		goto new_measure;

2644

+

2645

+-	if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&

2646

++	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&

2647

+ 	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {

2648

+ 		int rcvmem, rcvbuf;

2649

+ 		u64 rcvwin, grow;

2650

+diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c

2651

+index 1640da5c50776..72d30922ed290 100644

2652

+--- a/net/netfilter/nfnetlink_queue.c

2653

++++ b/net/netfilter/nfnetlink_queue.c

2654

+@@ -838,11 +838,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)

2655

+ }

2656

+

2657

+ static int

2658

+-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)

2659

++nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)

2660

+ {

2661

+ 	struct sk_buff *nskb;

2662

+

2663

+ 	if (diff < 0) {

2664

++		unsigned int min_len = skb_transport_offset(e->skb);

2665

++

2666

++		if (data_len < min_len)

2667

++			return -EINVAL;

2668

++

2669

+ 		if (pskb_trim(e->skb, data_len))

2670

+ 			return -ENOMEM;

2671

+ 	} else if (diff > 0) {

2672

+diff --git a/net/sctp/associola.c b/net/sctp/associola.c

2673

+index fdb69d46276d6..2d4ec61877553 100644

2674

+--- a/net/sctp/associola.c

2675

++++ b/net/sctp/associola.c

2676

+@@ -226,9 +226,8 @@ static struct sctp_association *sctp_association_init(

2677

+ 	if (!sctp_ulpq_init(&asoc->ulpq, asoc))

2678

+ 		goto fail_init;

2679

+

2680

+-	if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,

2681

+-			     0, gfp))

2682

+-		goto fail_init;

2683

++	if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp))

2684

++		goto stream_free;

2685

+

2686

+ 	/* Initialize default path MTU. */

2687

+ 	asoc->pathmtu = sp->pathmtu;

2688

+diff --git a/net/sctp/stream.c b/net/sctp/stream.c

2689

+index 6dc95dcc0ff4f..ef9fceadef8d5 100644

2690

+--- a/net/sctp/stream.c

2691

++++ b/net/sctp/stream.c

2692

+@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,

2693

+

2694

+ 	ret = sctp_stream_alloc_out(stream, outcnt, gfp);

2695

+ 	if (ret)

2696

+-		goto out_err;

2697

++		return ret;

2698

+

2699

+ 	for (i = 0; i < stream->outcnt; i++)

2700

+ 		SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;

2701

+@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,

2702

+ handle_in:

2703

+ 	sctp_stream_interleave_init(stream);

2704

+ 	if (!incnt)

2705

+-		goto out;

2706

+-

2707

+-	ret = sctp_stream_alloc_in(stream, incnt, gfp);

2708

+-	if (ret)

2709

+-		goto in_err;

2710

+-

2711

+-	goto out;

2712

++		return 0;

2713

+

2714

+-in_err:

2715

+-	sched->free(stream);

2716

+-	genradix_free(&stream->in);

2717

+-out_err:

2718

+-	genradix_free(&stream->out);

2719

+-	stream->outcnt = 0;

2720

+-out:

2721

+-	return ret;

2722

++	return sctp_stream_alloc_in(stream, incnt, gfp);

2723

+ }

2724

+

2725

+ int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)

2726

+diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c

2727

+index 99e5f69fbb742..a2e1d34f52c5b 100644

2728

+--- a/net/sctp/stream_sched.c

2729

++++ b/net/sctp/stream_sched.c

2730

+@@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc,

2731

+ 		if (!SCTP_SO(&asoc->stream, i)->ext)

2732

+ 			continue;

2733

+

2734

+-		ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);

2735

++		ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC);

2736

+ 		if (ret)

2737

+ 			goto err;

2738

+ 	}

2739

+diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c

2740

+index 23eab7ac43ee5..5cb6846544cc7 100644

2741

+--- a/net/tls/tls_device.c

2742

++++ b/net/tls/tls_device.c

2743

+@@ -1349,8 +1349,13 @@ static int tls_device_down(struct net_device *netdev)

2744

+ 		 * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.

2745

+ 		 * Now release the ref taken above.

2746

+ 		 */

2747

+-		if (refcount_dec_and_test(&ctx->refcount))

2748

++		if (refcount_dec_and_test(&ctx->refcount)) {

2749

++			/* sk_destruct ran after tls_device_down took a ref, and

2750

++			 * it returned early. Complete the destruction here.

2751

++			 */

2752

++			list_del(&ctx->list);

2753

+ 			tls_device_free_ctx(ctx);

2754

++		}

2755

+ 	}

2756

+

2757

+ 	up_write(&device_offload_lock);

2758

+diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

2759

+index e440cd7f32a6f..b9ee2ded381ab 100644

2760

+--- a/tools/include/uapi/linux/bpf.h

2761

++++ b/tools/include/uapi/linux/bpf.h

2762

+@@ -5006,7 +5006,10 @@ struct bpf_pidns_info {

2763

+

2764

+ /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */

2765

+ struct bpf_sk_lookup {

2766

+-	__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */

2767

++	union {

2768

++		__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */

2769

++		__u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */

2770

++	};

2771

+

2772

+ 	__u32 family;		/* Protocol family (AF_INET, AF_INET6) */

2773

+ 	__u32 protocol;		/* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */

2774

+diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c

2775

+index 94809aed8b447..1cab29d45bfb3 100644

2776

+--- a/tools/perf/util/symbol-elf.c

2777

++++ b/tools/perf/util/symbol-elf.c

2778

+@@ -232,6 +232,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,

2779

+ 	return NULL;

2780

+ }

2781

+

2782

++static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)

2783

++{

2784

++	size_t i, phdrnum;

2785

++	u64 sz;

2786

++

2787

++	if (elf_getphdrnum(elf, &phdrnum))

2788

++		return -1;

2789

++

2790

++	for (i = 0; i < phdrnum; i++) {

2791

++		if (gelf_getphdr(elf, i, phdr) == NULL)

2792

++			return -1;

2793

++

2794

++		if (phdr->p_type != PT_LOAD)

2795

++			continue;

2796

++

2797

++		sz = max(phdr->p_memsz, phdr->p_filesz);

2798

++		if (!sz)

2799

++			continue;

2800

++

2801

++		if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz))

2802

++			return 0;

2803

++	}

2804

++

2805

++	/* Not found any valid program header */

2806

++	return -1;

2807

++}

2808

++

2809

+ static bool want_demangle(bool is_kernel_sym)

2810

+ {

2811

+ 	return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;

2812

+@@ -1181,6 +1208,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,

2813

+ 					sym.st_value);

2814

+ 			used_opd = true;

2815

+ 		}

2816

++

2817

+ 		/*

2818

+ 		 * When loading symbols in a data mapping, ABS symbols (which

2819

+ 		 * has a value of SHN_ABS in its st_shndx) failed at

2820

+@@ -1217,11 +1245,20 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,

2821

+ 				goto out_elf_end;

2822

+ 		} else if ((used_opd && runtime_ss->adjust_symbols) ||

2823

+ 			   (!used_opd && syms_ss->adjust_symbols)) {

2824

++			GElf_Phdr phdr;

2825

++

2826

++			if (elf_read_program_header(syms_ss->elf,

2827

++						    (u64)sym.st_value, &phdr)) {

2828

++				pr_warning("%s: failed to find program header for "

2829

++					   "symbol: %s st_value: %#" PRIx64 "\n",

2830

++					   __func__, elf_name, (u64)sym.st_value);

2831

++				continue;

2832

++			}

2833

+ 			pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "

2834

+-				  "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,

2835

+-				  (u64)sym.st_value, (u64)shdr.sh_addr,

2836

+-				  (u64)shdr.sh_offset);

2837

+-			sym.st_value -= shdr.sh_addr - shdr.sh_offset;

2838

++				  "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n",

2839

++				  __func__, (u64)sym.st_value, (u64)phdr.p_vaddr,

2840

++				  (u64)phdr.p_offset);

2841

++			sym.st_value -= phdr.p_vaddr - phdr.p_offset;

2842

+ 		}

2843

+

2844

+ 		demangled = demangle_sym(dso, kmodule, elf_name);

2845

+diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c

2846

+index a4c55fcb0e7b1..0fb92d9a319b7 100644

2847

+--- a/tools/testing/selftests/bpf/test_verifier.c

2848

++++ b/tools/testing/selftests/bpf/test_verifier.c

2849

+@@ -100,7 +100,7 @@ struct bpf_test {

2850

+ 	enum bpf_prog_type prog_type;

2851

+ 	uint8_t flags;

2852

+ 	void (*fill_helper)(struct bpf_test *self);

2853

+-	uint8_t runs;

2854

++	int runs;

2855

+ #define bpf_testdata_struct_t					\

2856

+ 	struct {						\

2857

+ 		uint32_t retval, retval_unpriv;			\

2858

+@@ -1054,7 +1054,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,

2859

+

2860

+ 	run_errs = 0;

2861

+ 	run_successes = 0;

2862

+-	if (!alignment_prevented_execution && fd_prog >= 0) {

2863

++	if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) {

2864

+ 		uint32_t expected_val;

2865

+ 		int i;

2866

+

2867

+diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c

2868

+index 2ad5f974451c3..fd3b62a084b9f 100644

2869

+--- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c

2870

++++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c

2871

+@@ -239,6 +239,7 @@

2872

+ 	.result = ACCEPT,

2873

+ 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,

2874

+ 	.expected_attach_type = BPF_SK_LOOKUP,

2875

++	.runs = -1,

2876

+ },

2877

+ /* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */

2878

+ {

Gentoo Archives: gentoo-commits