Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:5.16 commit in: /
Date: Sat, 29 Jan 2022 17:40:30
Message-Id: 1643478011.9a6d05b7a247feb26f16e24eb6a180e52f1bb30e.mpagano@gentoo
1 commit: 9a6d05b7a247feb26f16e24eb6a180e52f1bb30e
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Sat Jan 29 17:40:11 2022 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Sat Jan 29 17:40:11 2022 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=9a6d05b7
7
8 Linux patch 5.16.4
9
10 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>
11
12 0000_README | 4 +
13 1003_linux-5.16.4.patch | 1035 +++++++++++++++++++++++++++++++++++++++++++++++
14 2 files changed, 1039 insertions(+)
15
16 diff --git a/0000_README b/0000_README
17 index 1eb44c73..ff7d994b 100644
18 --- a/0000_README
19 +++ b/0000_README
20 @@ -55,6 +55,10 @@ Patch: 1002_linux-5.16.3.patch
21 From: http://www.kernel.org
22 Desc: Linux 5.16.3
23
24 +Patch: 1003_linux-5.16.4.patch
25 +From: http://www.kernel.org
26 +Desc: Linux 5.16.4
27 +
28 Patch: 1500_XATTR_USER_PREFIX.patch
29 From: https://bugs.gentoo.org/show_bug.cgi?id=470644
30 Desc: Support for namespace user.pax.* on tmpfs.
31
32 diff --git a/1003_linux-5.16.4.patch b/1003_linux-5.16.4.patch
33 new file mode 100644
34 index 00000000..a5939fae
35 --- /dev/null
36 +++ b/1003_linux-5.16.4.patch
37 @@ -0,0 +1,1035 @@
38 +diff --git a/Makefile b/Makefile
39 +index acb8ffee65dc5..36ff4ed4763b3 100644
40 +--- a/Makefile
41 ++++ b/Makefile
42 +@@ -1,7 +1,7 @@
43 + # SPDX-License-Identifier: GPL-2.0
44 + VERSION = 5
45 + PATCHLEVEL = 16
46 +-SUBLEVEL = 3
47 ++SUBLEVEL = 4
48 + EXTRAVERSION =
49 + NAME = Gobble Gobble
50 +
51 +diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h
52 +index 8b300dd28defd..72b0e71cc3de8 100644
53 +--- a/arch/arm64/include/asm/extable.h
54 ++++ b/arch/arm64/include/asm/extable.h
55 +@@ -33,15 +33,6 @@ do { \
56 + (b)->data = (tmp).data; \
57 + } while (0)
58 +
59 +-static inline bool in_bpf_jit(struct pt_regs *regs)
60 +-{
61 +- if (!IS_ENABLED(CONFIG_BPF_JIT))
62 +- return false;
63 +-
64 +- return regs->pc >= BPF_JIT_REGION_START &&
65 +- regs->pc < BPF_JIT_REGION_END;
66 +-}
67 +-
68 + #ifdef CONFIG_BPF_JIT
69 + bool ex_handler_bpf(const struct exception_table_entry *ex,
70 + struct pt_regs *regs);
71 +diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
72 +index 1b9a1e2426127..0af70d9abede3 100644
73 +--- a/arch/arm64/include/asm/memory.h
74 ++++ b/arch/arm64/include/asm/memory.h
75 +@@ -44,11 +44,8 @@
76 + #define _PAGE_OFFSET(va) (-(UL(1) << (va)))
77 + #define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS))
78 + #define KIMAGE_VADDR (MODULES_END)
79 +-#define BPF_JIT_REGION_START (_PAGE_END(VA_BITS_MIN))
80 +-#define BPF_JIT_REGION_SIZE (SZ_128M)
81 +-#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
82 + #define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
83 +-#define MODULES_VADDR (BPF_JIT_REGION_END)
84 ++#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
85 + #define MODULES_VSIZE (SZ_128M)
86 + #define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
87 + #define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
88 +diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
89 +index 7b21213a570fc..e8986e6067a91 100644
90 +--- a/arch/arm64/kernel/traps.c
91 ++++ b/arch/arm64/kernel/traps.c
92 +@@ -994,7 +994,7 @@ static struct break_hook bug_break_hook = {
93 + static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr)
94 + {
95 + pr_err("%s generated an invalid instruction at %pS!\n",
96 +- in_bpf_jit(regs) ? "BPF JIT" : "Kernel text patching",
97 ++ "Kernel text patching",
98 + (void *)instruction_pointer(regs));
99 +
100 + /* We cannot handle this */
101 +diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
102 +index 1c403536c9bb0..9bc4066c5bf33 100644
103 +--- a/arch/arm64/mm/ptdump.c
104 ++++ b/arch/arm64/mm/ptdump.c
105 +@@ -41,8 +41,6 @@ static struct addr_marker address_markers[] = {
106 + { 0 /* KASAN_SHADOW_START */, "Kasan shadow start" },
107 + { KASAN_SHADOW_END, "Kasan shadow end" },
108 + #endif
109 +- { BPF_JIT_REGION_START, "BPF start" },
110 +- { BPF_JIT_REGION_END, "BPF end" },
111 + { MODULES_VADDR, "Modules start" },
112 + { MODULES_END, "Modules end" },
113 + { VMALLOC_START, "vmalloc() area" },
114 +diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
115 +index 1090a957b3abc..71ef9dcd9b578 100644
116 +--- a/arch/arm64/net/bpf_jit_comp.c
117 ++++ b/arch/arm64/net/bpf_jit_comp.c
118 +@@ -1145,15 +1145,12 @@ out:
119 +
120 + u64 bpf_jit_alloc_exec_limit(void)
121 + {
122 +- return BPF_JIT_REGION_SIZE;
123 ++ return VMALLOC_END - VMALLOC_START;
124 + }
125 +
126 + void *bpf_jit_alloc_exec(unsigned long size)
127 + {
128 +- return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
129 +- BPF_JIT_REGION_END, GFP_KERNEL,
130 +- PAGE_KERNEL, 0, NUMA_NO_NODE,
131 +- __builtin_return_address(0));
132 ++ return vmalloc(size);
133 + }
134 +
135 + void bpf_jit_free_exec(void *addr)
136 +diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
137 +index 8c2b77eb94593..162ae71861247 100644
138 +--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
139 ++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
140 +@@ -119,6 +119,12 @@ int dcn31_smu_send_msg_with_param(
141 +
142 + result = dcn31_smu_wait_for_response(clk_mgr, 10, 200000);
143 +
144 ++ if (result == VBIOSSMC_Result_Failed) {
145 ++ ASSERT(0);
146 ++ REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Result_OK);
147 ++ return -1;
148 ++ }
149 ++
150 + if (IS_SMU_TIMEOUT(result)) {
151 + ASSERT(0);
152 + dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000);
153 +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
154 +index da85169006d4f..a0aa6dbe120e2 100644
155 +--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
156 ++++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
157 +@@ -305,6 +305,7 @@ struct drm_i915_gem_object {
158 + #define I915_BO_READONLY BIT(6)
159 + #define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */
160 + #define I915_BO_PROTECTED BIT(8)
161 ++#define I915_BO_WAS_BOUND_BIT 9
162 + /**
163 + * @mem_flags - Mutable placement-related flags
164 + *
165 +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
166 +index 1d3f40abd0258..9053cea3395a6 100644
167 +--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
168 ++++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
169 +@@ -10,6 +10,8 @@
170 + #include "i915_gem_lmem.h"
171 + #include "i915_gem_mman.h"
172 +
173 ++#include "gt/intel_gt.h"
174 ++
175 + void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
176 + struct sg_table *pages,
177 + unsigned int sg_page_sizes)
178 +@@ -217,6 +219,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
179 + __i915_gem_object_reset_page_iter(obj);
180 + obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
181 +
182 ++ if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
183 ++ struct drm_i915_private *i915 = to_i915(obj->base.dev);
184 ++ intel_wakeref_t wakeref;
185 ++
186 ++ with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
187 ++ intel_gt_invalidate_tlbs(&i915->gt);
188 ++ }
189 ++
190 + return pages;
191 + }
192 +
193 +diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
194 +index 1cb1948ac9594..7df7bbf5845ee 100644
195 +--- a/drivers/gpu/drm/i915/gt/intel_gt.c
196 ++++ b/drivers/gpu/drm/i915/gt/intel_gt.c
197 +@@ -30,6 +30,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
198 +
199 + spin_lock_init(&gt->irq_lock);
200 +
201 ++ mutex_init(&gt->tlb_invalidate_lock);
202 ++
203 + INIT_LIST_HEAD(&gt->closed_vma);
204 + spin_lock_init(&gt->closed_lock);
205 +
206 +@@ -907,3 +909,103 @@ void intel_gt_info_print(const struct intel_gt_info *info,
207 +
208 + intel_sseu_dump(&info->sseu, p);
209 + }
210 ++
211 ++struct reg_and_bit {
212 ++ i915_reg_t reg;
213 ++ u32 bit;
214 ++};
215 ++
216 ++static struct reg_and_bit
217 ++get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
218 ++ const i915_reg_t *regs, const unsigned int num)
219 ++{
220 ++ const unsigned int class = engine->class;
221 ++ struct reg_and_bit rb = { };
222 ++
223 ++ if (drm_WARN_ON_ONCE(&engine->i915->drm,
224 ++ class >= num || !regs[class].reg))
225 ++ return rb;
226 ++
227 ++ rb.reg = regs[class];
228 ++ if (gen8 && class == VIDEO_DECODE_CLASS)
229 ++ rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
230 ++ else
231 ++ rb.bit = engine->instance;
232 ++
233 ++ rb.bit = BIT(rb.bit);
234 ++
235 ++ return rb;
236 ++}
237 ++
238 ++void intel_gt_invalidate_tlbs(struct intel_gt *gt)
239 ++{
240 ++ static const i915_reg_t gen8_regs[] = {
241 ++ [RENDER_CLASS] = GEN8_RTCR,
242 ++ [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */
243 ++ [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR,
244 ++ [COPY_ENGINE_CLASS] = GEN8_BTCR,
245 ++ };
246 ++ static const i915_reg_t gen12_regs[] = {
247 ++ [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR,
248 ++ [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR,
249 ++ [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR,
250 ++ [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
251 ++ };
252 ++ struct drm_i915_private *i915 = gt->i915;
253 ++ struct intel_uncore *uncore = gt->uncore;
254 ++ struct intel_engine_cs *engine;
255 ++ enum intel_engine_id id;
256 ++ const i915_reg_t *regs;
257 ++ unsigned int num = 0;
258 ++
259 ++ if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
260 ++ return;
261 ++
262 ++ if (GRAPHICS_VER(i915) == 12) {
263 ++ regs = gen12_regs;
264 ++ num = ARRAY_SIZE(gen12_regs);
265 ++ } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
266 ++ regs = gen8_regs;
267 ++ num = ARRAY_SIZE(gen8_regs);
268 ++ } else if (GRAPHICS_VER(i915) < 8) {
269 ++ return;
270 ++ }
271 ++
272 ++ if (drm_WARN_ONCE(&i915->drm, !num,
273 ++ "Platform does not implement TLB invalidation!"))
274 ++ return;
275 ++
276 ++ GEM_TRACE("\n");
277 ++
278 ++ assert_rpm_wakelock_held(&i915->runtime_pm);
279 ++
280 ++ mutex_lock(&gt->tlb_invalidate_lock);
281 ++ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
282 ++
283 ++ for_each_engine(engine, gt, id) {
284 ++ /*
285 ++ * HW architecture suggest typical invalidation time at 40us,
286 ++ * with pessimistic cases up to 100us and a recommendation to
287 ++ * cap at 1ms. We go a bit higher just in case.
288 ++ */
289 ++ const unsigned int timeout_us = 100;
290 ++ const unsigned int timeout_ms = 4;
291 ++ struct reg_and_bit rb;
292 ++
293 ++ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
294 ++ if (!i915_mmio_reg_offset(rb.reg))
295 ++ continue;
296 ++
297 ++ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
298 ++ if (__intel_wait_for_register_fw(uncore,
299 ++ rb.reg, rb.bit, 0,
300 ++ timeout_us, timeout_ms,
301 ++ NULL))
302 ++ drm_err_ratelimited(&gt->i915->drm,
303 ++ "%s TLB invalidation did not complete in %ums!\n",
304 ++ engine->name, timeout_ms);
305 ++ }
306 ++
307 ++ intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
308 ++ mutex_unlock(&gt->tlb_invalidate_lock);
309 ++}
310 +diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
311 +index 74e771871a9bd..c0169d6017c2d 100644
312 +--- a/drivers/gpu/drm/i915/gt/intel_gt.h
313 ++++ b/drivers/gpu/drm/i915/gt/intel_gt.h
314 +@@ -90,4 +90,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
315 +
316 + void intel_gt_watchdog_work(struct work_struct *work);
317 +
318 ++void intel_gt_invalidate_tlbs(struct intel_gt *gt);
319 ++
320 + #endif /* __INTEL_GT_H__ */
321 +diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
322 +index 14216cc471b1b..f206877964908 100644
323 +--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
324 ++++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
325 +@@ -73,6 +73,8 @@ struct intel_gt {
326 +
327 + struct intel_uc uc;
328 +
329 ++ struct mutex tlb_invalidate_lock;
330 ++
331 + struct i915_wa_list wa_list;
332 +
333 + struct intel_gt_timelines {
334 +diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
335 +index bcee121bec5ad..14ce8809efdd5 100644
336 +--- a/drivers/gpu/drm/i915/i915_reg.h
337 ++++ b/drivers/gpu/drm/i915/i915_reg.h
338 +@@ -2697,6 +2697,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
339 + #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28)
340 + #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24)
341 +
342 ++#define GEN8_RTCR _MMIO(0x4260)
343 ++#define GEN8_M1TCR _MMIO(0x4264)
344 ++#define GEN8_M2TCR _MMIO(0x4268)
345 ++#define GEN8_BTCR _MMIO(0x426c)
346 ++#define GEN8_VTCR _MMIO(0x4270)
347 ++
348 + #if 0
349 + #define PRB0_TAIL _MMIO(0x2030)
350 + #define PRB0_HEAD _MMIO(0x2034)
351 +@@ -2792,6 +2798,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
352 + #define FAULT_VA_HIGH_BITS (0xf << 0)
353 + #define FAULT_GTT_SEL (1 << 4)
354 +
355 ++#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8)
356 ++#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc)
357 ++#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0)
358 ++#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4)
359 ++
360 + #define GEN12_AUX_ERR_DBG _MMIO(0x43f4)
361 +
362 + #define FPGA_DBG _MMIO(0x42300)
363 +diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
364 +index bef795e265a66..cb288e6bdc020 100644
365 +--- a/drivers/gpu/drm/i915/i915_vma.c
366 ++++ b/drivers/gpu/drm/i915/i915_vma.c
367 +@@ -431,6 +431,9 @@ int i915_vma_bind(struct i915_vma *vma,
368 + vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
369 + }
370 +
371 ++ if (vma->obj)
372 ++ set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
373 ++
374 + atomic_or(bind_flags, &vma->flags);
375 + return 0;
376 + }
377 +diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
378 +index e072054adac57..e21c779cb487f 100644
379 +--- a/drivers/gpu/drm/i915/intel_uncore.c
380 ++++ b/drivers/gpu/drm/i915/intel_uncore.c
381 +@@ -724,7 +724,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore,
382 + }
383 +
384 + static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
385 +- enum forcewake_domains fw_domains)
386 ++ enum forcewake_domains fw_domains,
387 ++ bool delayed)
388 + {
389 + struct intel_uncore_forcewake_domain *domain;
390 + unsigned int tmp;
391 +@@ -739,7 +740,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
392 + continue;
393 + }
394 +
395 +- fw_domains_put(uncore, domain->mask);
396 ++ if (delayed &&
397 ++ !(domain->uncore->fw_domains_timer & domain->mask))
398 ++ fw_domain_arm_timer(domain);
399 ++ else
400 ++ fw_domains_put(uncore, domain->mask);
401 + }
402 + }
403 +
404 +@@ -760,7 +765,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore,
405 + return;
406 +
407 + spin_lock_irqsave(&uncore->lock, irqflags);
408 +- __intel_uncore_forcewake_put(uncore, fw_domains);
409 ++ __intel_uncore_forcewake_put(uncore, fw_domains, false);
410 ++ spin_unlock_irqrestore(&uncore->lock, irqflags);
411 ++}
412 ++
413 ++void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
414 ++ enum forcewake_domains fw_domains)
415 ++{
416 ++ unsigned long irqflags;
417 ++
418 ++ if (!uncore->fw_get_funcs)
419 ++ return;
420 ++
421 ++ spin_lock_irqsave(&uncore->lock, irqflags);
422 ++ __intel_uncore_forcewake_put(uncore, fw_domains, true);
423 + spin_unlock_irqrestore(&uncore->lock, irqflags);
424 + }
425 +
426 +@@ -802,7 +820,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore,
427 + if (!uncore->fw_get_funcs)
428 + return;
429 +
430 +- __intel_uncore_forcewake_put(uncore, fw_domains);
431 ++ __intel_uncore_forcewake_put(uncore, fw_domains, false);
432 + }
433 +
434 + void assert_forcewakes_inactive(struct intel_uncore *uncore)
435 +diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
436 +index 3248e4e2c540c..d08088fa4c7e9 100644
437 +--- a/drivers/gpu/drm/i915/intel_uncore.h
438 ++++ b/drivers/gpu/drm/i915/intel_uncore.h
439 +@@ -243,6 +243,8 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore,
440 + enum forcewake_domains domains);
441 + void intel_uncore_forcewake_put(struct intel_uncore *uncore,
442 + enum forcewake_domains domains);
443 ++void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
444 ++ enum forcewake_domains domains);
445 + void intel_uncore_forcewake_flush(struct intel_uncore *uncore,
446 + enum forcewake_domains fw_domains);
447 +
448 +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
449 +index 2a7cec4cb8a89..f9f28516ffb41 100644
450 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
451 ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
452 +@@ -1112,15 +1112,14 @@ extern int vmw_execbuf_fence_commands(struct drm_file *file_priv,
453 + struct vmw_private *dev_priv,
454 + struct vmw_fence_obj **p_fence,
455 + uint32_t *p_handle);
456 +-extern void vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
457 ++extern int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
458 + struct vmw_fpriv *vmw_fp,
459 + int ret,
460 + struct drm_vmw_fence_rep __user
461 + *user_fence_rep,
462 + struct vmw_fence_obj *fence,
463 + uint32_t fence_handle,
464 +- int32_t out_fence_fd,
465 +- struct sync_file *sync_file);
466 ++ int32_t out_fence_fd);
467 + bool vmw_cmd_describe(const void *buf, u32 *size, char const **cmd);
468 +
469 + /**
470 +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
471 +index 5f2ffa9de5c8f..9144e8f88c812 100644
472 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
473 ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
474 +@@ -3823,17 +3823,17 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv,
475 + * Also if copying fails, user-space will be unable to signal the fence object
476 + * so we wait for it immediately, and then unreference the user-space reference.
477 + */
478 +-void
479 ++int
480 + vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
481 + struct vmw_fpriv *vmw_fp, int ret,
482 + struct drm_vmw_fence_rep __user *user_fence_rep,
483 + struct vmw_fence_obj *fence, uint32_t fence_handle,
484 +- int32_t out_fence_fd, struct sync_file *sync_file)
485 ++ int32_t out_fence_fd)
486 + {
487 + struct drm_vmw_fence_rep fence_rep;
488 +
489 + if (user_fence_rep == NULL)
490 +- return;
491 ++ return 0;
492 +
493 + memset(&fence_rep, 0, sizeof(fence_rep));
494 +
495 +@@ -3861,20 +3861,14 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
496 + * handle.
497 + */
498 + if (unlikely(ret != 0) && (fence_rep.error == 0)) {
499 +- if (sync_file)
500 +- fput(sync_file->file);
501 +-
502 +- if (fence_rep.fd != -1) {
503 +- put_unused_fd(fence_rep.fd);
504 +- fence_rep.fd = -1;
505 +- }
506 +-
507 + ttm_ref_object_base_unref(vmw_fp->tfile, fence_handle,
508 + TTM_REF_USAGE);
509 + VMW_DEBUG_USER("Fence copy error. Syncing.\n");
510 + (void) vmw_fence_obj_wait(fence, false, false,
511 + VMW_FENCE_WAIT_TIMEOUT);
512 + }
513 ++
514 ++ return ret ? -EFAULT : 0;
515 + }
516 +
517 + /**
518 +@@ -4212,16 +4206,23 @@ int vmw_execbuf_process(struct drm_file *file_priv,
519 +
520 + (void) vmw_fence_obj_wait(fence, false, false,
521 + VMW_FENCE_WAIT_TIMEOUT);
522 ++ }
523 ++ }
524 ++
525 ++ ret = vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret,
526 ++ user_fence_rep, fence, handle, out_fence_fd);
527 ++
528 ++ if (sync_file) {
529 ++ if (ret) {
530 ++ /* usercopy of fence failed, put the file object */
531 ++ fput(sync_file->file);
532 ++ put_unused_fd(out_fence_fd);
533 + } else {
534 + /* Link the fence with the FD created earlier */
535 + fd_install(out_fence_fd, sync_file->file);
536 + }
537 + }
538 +
539 +- vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret,
540 +- user_fence_rep, fence, handle, out_fence_fd,
541 +- sync_file);
542 +-
543 + /* Don't unreference when handing fence out */
544 + if (unlikely(out_fence != NULL)) {
545 + *out_fence = fence;
546 +@@ -4239,7 +4240,7 @@ int vmw_execbuf_process(struct drm_file *file_priv,
547 + */
548 + vmw_validation_unref_lists(&val_ctx);
549 +
550 +- return 0;
551 ++ return ret;
552 +
553 + out_unlock_binding:
554 + mutex_unlock(&dev_priv->binding_mutex);
555 +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
556 +index 9fe12329a4d58..b4d9d7258a546 100644
557 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
558 ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
559 +@@ -1159,7 +1159,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
560 + }
561 +
562 + vmw_execbuf_copy_fence_user(dev_priv, vmw_fp, 0, user_fence_rep, fence,
563 +- handle, -1, NULL);
564 ++ handle, -1);
565 + vmw_fence_obj_unreference(&fence);
566 + return 0;
567 + out_no_create:
568 +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
569 +index 74fa419092138..14e8f665b13be 100644
570 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
571 ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
572 +@@ -2516,7 +2516,7 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv,
573 + if (file_priv)
574 + vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv),
575 + ret, user_fence_rep, fence,
576 +- handle, -1, NULL);
577 ++ handle, -1);
578 + if (out_fence)
579 + *out_fence = fence;
580 + else
581 +diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
582 +index 2b06d78baa086..a19dd6797070c 100644
583 +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
584 ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
585 +@@ -1850,6 +1850,14 @@ struct bnx2x {
586 +
587 + /* Vxlan/Geneve related information */
588 + u16 udp_tunnel_ports[BNX2X_UDP_PORT_MAX];
589 ++
590 ++#define FW_CAP_INVALIDATE_VF_FP_HSI BIT(0)
591 ++ u32 fw_cap;
592 ++
593 ++ u32 fw_major;
594 ++ u32 fw_minor;
595 ++ u32 fw_rev;
596 ++ u32 fw_eng;
597 + };
598 +
599 + /* Tx queues may be less or equal to Rx queues */
600 +@@ -2525,5 +2533,6 @@ void bnx2x_register_phc(struct bnx2x *bp);
601 + * Meant for implicit re-load flows.
602 + */
603 + int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp);
604 +-
605 ++int bnx2x_init_firmware(struct bnx2x *bp);
606 ++void bnx2x_release_firmware(struct bnx2x *bp);
607 + #endif /* bnx2x.h */
608 +diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
609 +index e8e8c2d593c55..e57fe0034ce2a 100644
610 +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
611 ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
612 +@@ -2364,10 +2364,8 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 load_code, bool print_err)
613 + if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
614 + load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
615 + /* build my FW version dword */
616 +- u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) +
617 +- (BCM_5710_FW_MINOR_VERSION << 8) +
618 +- (BCM_5710_FW_REVISION_VERSION << 16) +
619 +- (BCM_5710_FW_ENGINEERING_VERSION << 24);
620 ++ u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) +
621 ++ (bp->fw_rev << 16) + (bp->fw_eng << 24);
622 +
623 + /* read loaded FW from chip */
624 + u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
625 +diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
626 +index 3f8435208bf49..a84d015da5dfa 100644
627 +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
628 ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
629 +@@ -241,6 +241,8 @@
630 + IRO[221].m2))
631 + #define XSTORM_VF_TO_PF_OFFSET(funcId) \
632 + (IRO[48].base + ((funcId) * IRO[48].m1))
633 ++#define XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(fid) \
634 ++ (IRO[386].base + ((fid) * IRO[386].m1))
635 + #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0
636 +
637 + /* eth hsi version */
638 +diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
639 +index 622fadc50316e..611efee758340 100644
640 +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
641 ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
642 +@@ -3024,7 +3024,8 @@ struct afex_stats {
643 +
644 + #define BCM_5710_FW_MAJOR_VERSION 7
645 + #define BCM_5710_FW_MINOR_VERSION 13
646 +-#define BCM_5710_FW_REVISION_VERSION 15
647 ++#define BCM_5710_FW_REVISION_VERSION 21
648 ++#define BCM_5710_FW_REVISION_VERSION_V15 15
649 + #define BCM_5710_FW_ENGINEERING_VERSION 0
650 + #define BCM_5710_FW_COMPILE_FLAGS 1
651 +
652 +diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
653 +index aec666e976831..125dafe1db7ee 100644
654 +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
655 ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
656 +@@ -74,9 +74,19 @@
657 + __stringify(BCM_5710_FW_MINOR_VERSION) "." \
658 + __stringify(BCM_5710_FW_REVISION_VERSION) "." \
659 + __stringify(BCM_5710_FW_ENGINEERING_VERSION)
660 ++
661 ++#define FW_FILE_VERSION_V15 \
662 ++ __stringify(BCM_5710_FW_MAJOR_VERSION) "." \
663 ++ __stringify(BCM_5710_FW_MINOR_VERSION) "." \
664 ++ __stringify(BCM_5710_FW_REVISION_VERSION_V15) "." \
665 ++ __stringify(BCM_5710_FW_ENGINEERING_VERSION)
666 ++
667 + #define FW_FILE_NAME_E1 "bnx2x/bnx2x-e1-" FW_FILE_VERSION ".fw"
668 + #define FW_FILE_NAME_E1H "bnx2x/bnx2x-e1h-" FW_FILE_VERSION ".fw"
669 + #define FW_FILE_NAME_E2 "bnx2x/bnx2x-e2-" FW_FILE_VERSION ".fw"
670 ++#define FW_FILE_NAME_E1_V15 "bnx2x/bnx2x-e1-" FW_FILE_VERSION_V15 ".fw"
671 ++#define FW_FILE_NAME_E1H_V15 "bnx2x/bnx2x-e1h-" FW_FILE_VERSION_V15 ".fw"
672 ++#define FW_FILE_NAME_E2_V15 "bnx2x/bnx2x-e2-" FW_FILE_VERSION_V15 ".fw"
673 +
674 + /* Time in jiffies before concluding the transmitter is hung */
675 + #define TX_TIMEOUT (5*HZ)
676 +@@ -747,9 +757,7 @@ static int bnx2x_mc_assert(struct bnx2x *bp)
677 + CHIP_IS_E1(bp) ? "everest1" :
678 + CHIP_IS_E1H(bp) ? "everest1h" :
679 + CHIP_IS_E2(bp) ? "everest2" : "everest3",
680 +- BCM_5710_FW_MAJOR_VERSION,
681 +- BCM_5710_FW_MINOR_VERSION,
682 +- BCM_5710_FW_REVISION_VERSION);
683 ++ bp->fw_major, bp->fw_minor, bp->fw_rev);
684 +
685 + return rc;
686 + }
687 +@@ -12308,6 +12316,15 @@ static int bnx2x_init_bp(struct bnx2x *bp)
688 +
689 + bnx2x_read_fwinfo(bp);
690 +
691 ++ if (IS_PF(bp)) {
692 ++ rc = bnx2x_init_firmware(bp);
693 ++
694 ++ if (rc) {
695 ++ bnx2x_free_mem_bp(bp);
696 ++ return rc;
697 ++ }
698 ++ }
699 ++
700 + func = BP_FUNC(bp);
701 +
702 + /* need to reset chip if undi was active */
703 +@@ -12320,6 +12337,7 @@ static int bnx2x_init_bp(struct bnx2x *bp)
704 +
705 + rc = bnx2x_prev_unload(bp);
706 + if (rc) {
707 ++ bnx2x_release_firmware(bp);
708 + bnx2x_free_mem_bp(bp);
709 + return rc;
710 + }
711 +@@ -13317,16 +13335,11 @@ static int bnx2x_check_firmware(struct bnx2x *bp)
712 + /* Check FW version */
713 + offset = be32_to_cpu(fw_hdr->fw_version.offset);
714 + fw_ver = firmware->data + offset;
715 +- if ((fw_ver[0] != BCM_5710_FW_MAJOR_VERSION) ||
716 +- (fw_ver[1] != BCM_5710_FW_MINOR_VERSION) ||
717 +- (fw_ver[2] != BCM_5710_FW_REVISION_VERSION) ||
718 +- (fw_ver[3] != BCM_5710_FW_ENGINEERING_VERSION)) {
719 ++ if (fw_ver[0] != bp->fw_major || fw_ver[1] != bp->fw_minor ||
720 ++ fw_ver[2] != bp->fw_rev || fw_ver[3] != bp->fw_eng) {
721 + BNX2X_ERR("Bad FW version:%d.%d.%d.%d. Should be %d.%d.%d.%d\n",
722 +- fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
723 +- BCM_5710_FW_MAJOR_VERSION,
724 +- BCM_5710_FW_MINOR_VERSION,
725 +- BCM_5710_FW_REVISION_VERSION,
726 +- BCM_5710_FW_ENGINEERING_VERSION);
727 ++ fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
728 ++ bp->fw_major, bp->fw_minor, bp->fw_rev, bp->fw_eng);
729 + return -EINVAL;
730 + }
731 +
732 +@@ -13404,34 +13417,51 @@ do { \
733 + (u8 *)bp->arr, len); \
734 + } while (0)
735 +
736 +-static int bnx2x_init_firmware(struct bnx2x *bp)
737 ++int bnx2x_init_firmware(struct bnx2x *bp)
738 + {
739 +- const char *fw_file_name;
740 ++ const char *fw_file_name, *fw_file_name_v15;
741 + struct bnx2x_fw_file_hdr *fw_hdr;
742 + int rc;
743 +
744 + if (bp->firmware)
745 + return 0;
746 +
747 +- if (CHIP_IS_E1(bp))
748 ++ if (CHIP_IS_E1(bp)) {
749 + fw_file_name = FW_FILE_NAME_E1;
750 +- else if (CHIP_IS_E1H(bp))
751 ++ fw_file_name_v15 = FW_FILE_NAME_E1_V15;
752 ++ } else if (CHIP_IS_E1H(bp)) {
753 + fw_file_name = FW_FILE_NAME_E1H;
754 +- else if (!CHIP_IS_E1x(bp))
755 ++ fw_file_name_v15 = FW_FILE_NAME_E1H_V15;
756 ++ } else if (!CHIP_IS_E1x(bp)) {
757 + fw_file_name = FW_FILE_NAME_E2;
758 +- else {
759 ++ fw_file_name_v15 = FW_FILE_NAME_E2_V15;
760 ++ } else {
761 + BNX2X_ERR("Unsupported chip revision\n");
762 + return -EINVAL;
763 + }
764 ++
765 + BNX2X_DEV_INFO("Loading %s\n", fw_file_name);
766 +
767 + rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev->dev);
768 + if (rc) {
769 +- BNX2X_ERR("Can't load firmware file %s\n",
770 +- fw_file_name);
771 +- goto request_firmware_exit;
772 ++ BNX2X_DEV_INFO("Trying to load older fw %s\n", fw_file_name_v15);
773 ++
774 ++ /* try to load prev version */
775 ++ rc = request_firmware(&bp->firmware, fw_file_name_v15, &bp->pdev->dev);
776 ++
777 ++ if (rc)
778 ++ goto request_firmware_exit;
779 ++
780 ++ bp->fw_rev = BCM_5710_FW_REVISION_VERSION_V15;
781 ++ } else {
782 ++ bp->fw_cap |= FW_CAP_INVALIDATE_VF_FP_HSI;
783 ++ bp->fw_rev = BCM_5710_FW_REVISION_VERSION;
784 + }
785 +
786 ++ bp->fw_major = BCM_5710_FW_MAJOR_VERSION;
787 ++ bp->fw_minor = BCM_5710_FW_MINOR_VERSION;
788 ++ bp->fw_eng = BCM_5710_FW_ENGINEERING_VERSION;
789 ++
790 + rc = bnx2x_check_firmware(bp);
791 + if (rc) {
792 + BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name);
793 +@@ -13487,7 +13517,7 @@ request_firmware_exit:
794 + return rc;
795 + }
796 +
797 +-static void bnx2x_release_firmware(struct bnx2x *bp)
798 ++void bnx2x_release_firmware(struct bnx2x *bp)
799 + {
800 + kfree(bp->init_ops_offsets);
801 + kfree(bp->init_ops);
802 +@@ -14004,6 +14034,7 @@ static int bnx2x_init_one(struct pci_dev *pdev,
803 + return 0;
804 +
805 + init_one_freemem:
806 ++ bnx2x_release_firmware(bp);
807 + bnx2x_free_mem_bp(bp);
808 +
809 + init_one_exit:
810 +diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
811 +index 74a8931ce1d1d..11d15cd036005 100644
812 +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
813 ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
814 +@@ -758,9 +758,18 @@ static void bnx2x_vf_igu_reset(struct bnx2x *bp, struct bnx2x_virtf *vf)
815 +
816 + void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid)
817 + {
818 ++ u16 abs_fid;
819 ++
820 ++ abs_fid = FW_VF_HANDLE(abs_vfid);
821 ++
822 + /* set the VF-PF association in the FW */
823 +- storm_memset_vf_to_pf(bp, FW_VF_HANDLE(abs_vfid), BP_FUNC(bp));
824 +- storm_memset_func_en(bp, FW_VF_HANDLE(abs_vfid), 1);
825 ++ storm_memset_vf_to_pf(bp, abs_fid, BP_FUNC(bp));
826 ++ storm_memset_func_en(bp, abs_fid, 1);
827 ++
828 ++ /* Invalidate fp_hsi version for vfs */
829 ++ if (bp->fw_cap & FW_CAP_INVALIDATE_VF_FP_HSI)
830 ++ REG_WR8(bp, BAR_XSTRORM_INTMEM +
831 ++ XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(abs_fid), 0);
832 +
833 + /* clear vf errors*/
834 + bnx2x_vf_semi_clear_err(bp, abs_vfid);
835 +diff --git a/fs/io_uring.c b/fs/io_uring.c
836 +index e0fbb940fe5c3..15f303180d70c 100644
837 +--- a/fs/io_uring.c
838 ++++ b/fs/io_uring.c
839 +@@ -1830,6 +1830,18 @@ static inline void io_get_task_refs(int nr)
840 + io_task_refs_refill(tctx);
841 + }
842 +
843 ++static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
844 ++{
845 ++ struct io_uring_task *tctx = task->io_uring;
846 ++ unsigned int refs = tctx->cached_refs;
847 ++
848 ++ if (refs) {
849 ++ tctx->cached_refs = 0;
850 ++ percpu_counter_sub(&tctx->inflight, refs);
851 ++ put_task_struct_many(task, refs);
852 ++ }
853 ++}
854 ++
855 + static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
856 + s32 res, u32 cflags)
857 + {
858 +@@ -2250,6 +2262,10 @@ static void tctx_task_work(struct callback_head *cb)
859 + }
860 +
861 + ctx_flush_and_put(ctx, &locked);
862 ++
863 ++ /* relaxed read is enough as only the task itself sets ->in_idle */
864 ++ if (unlikely(atomic_read(&tctx->in_idle)))
865 ++ io_uring_drop_tctx_refs(current);
866 + }
867 +
868 + static void io_req_task_work_add(struct io_kiocb *req)
869 +@@ -9818,18 +9834,6 @@ static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
870 + return percpu_counter_sum(&tctx->inflight);
871 + }
872 +
873 +-static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
874 +-{
875 +- struct io_uring_task *tctx = task->io_uring;
876 +- unsigned int refs = tctx->cached_refs;
877 +-
878 +- if (refs) {
879 +- tctx->cached_refs = 0;
880 +- percpu_counter_sub(&tctx->inflight, refs);
881 +- put_task_struct_many(task, refs);
882 +- }
883 +-}
884 +-
885 + /*
886 + * Find any io_uring ctx that this task has registered or done IO on, and cancel
887 + * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
888 +@@ -9887,10 +9891,14 @@ static __cold void io_uring_cancel_generic(bool cancel_all,
889 + schedule();
890 + finish_wait(&tctx->wait, &wait);
891 + } while (1);
892 +- atomic_dec(&tctx->in_idle);
893 +
894 + io_uring_clean_tctx(tctx);
895 + if (cancel_all) {
896 ++ /*
897 ++ * We shouldn't run task_works after cancel, so just leave
898 ++ * ->in_idle set for normal exit.
899 ++ */
900 ++ atomic_dec(&tctx->in_idle);
901 + /* for exec all current's requests should be gone, kill tctx */
902 + __io_uring_free(current);
903 + }
904 +diff --git a/fs/select.c b/fs/select.c
905 +index 945896d0ac9e7..5edffee1162c2 100644
906 +--- a/fs/select.c
907 ++++ b/fs/select.c
908 +@@ -458,9 +458,11 @@ get_max:
909 + return max;
910 + }
911 +
912 +-#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR)
913 +-#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR)
914 +-#define POLLEX_SET (EPOLLPRI)
915 ++#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR |\
916 ++ EPOLLNVAL)
917 ++#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR |\
918 ++ EPOLLNVAL)
919 ++#define POLLEX_SET (EPOLLPRI | EPOLLNVAL)
920 +
921 + static inline void wait_key_set(poll_table *wait, unsigned long in,
922 + unsigned long out, unsigned long bit,
923 +@@ -527,6 +529,7 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
924 + break;
925 + if (!(bit & all_bits))
926 + continue;
927 ++ mask = EPOLLNVAL;
928 + f = fdget(i);
929 + if (f.file) {
930 + wait_key_set(wait, in, out, bit,
931 +@@ -534,34 +537,34 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
932 + mask = vfs_poll(f.file, wait);
933 +
934 + fdput(f);
935 +- if ((mask & POLLIN_SET) && (in & bit)) {
936 +- res_in |= bit;
937 +- retval++;
938 +- wait->_qproc = NULL;
939 +- }
940 +- if ((mask & POLLOUT_SET) && (out & bit)) {
941 +- res_out |= bit;
942 +- retval++;
943 +- wait->_qproc = NULL;
944 +- }
945 +- if ((mask & POLLEX_SET) && (ex & bit)) {
946 +- res_ex |= bit;
947 +- retval++;
948 +- wait->_qproc = NULL;
949 +- }
950 +- /* got something, stop busy polling */
951 +- if (retval) {
952 +- can_busy_loop = false;
953 +- busy_flag = 0;
954 +-
955 +- /*
956 +- * only remember a returned
957 +- * POLL_BUSY_LOOP if we asked for it
958 +- */
959 +- } else if (busy_flag & mask)
960 +- can_busy_loop = true;
961 +-
962 + }
963 ++ if ((mask & POLLIN_SET) && (in & bit)) {
964 ++ res_in |= bit;
965 ++ retval++;
966 ++ wait->_qproc = NULL;
967 ++ }
968 ++ if ((mask & POLLOUT_SET) && (out & bit)) {
969 ++ res_out |= bit;
970 ++ retval++;
971 ++ wait->_qproc = NULL;
972 ++ }
973 ++ if ((mask & POLLEX_SET) && (ex & bit)) {
974 ++ res_ex |= bit;
975 ++ retval++;
976 ++ wait->_qproc = NULL;
977 ++ }
978 ++ /* got something, stop busy polling */
979 ++ if (retval) {
980 ++ can_busy_loop = false;
981 ++ busy_flag = 0;
982 ++
983 ++ /*
984 ++ * only remember a returned
985 ++ * POLL_BUSY_LOOP if we asked for it
986 ++ */
987 ++ } else if (busy_flag & mask)
988 ++ can_busy_loop = true;
989 ++
990 + }
991 + if (res_in)
992 + *rinp = res_in;
993 +diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
994 +index 906b6887622d3..28fd0cef9b1fb 100644
995 +--- a/kernel/rcu/tree.c
996 ++++ b/kernel/rcu/tree.c
997 +@@ -1590,10 +1590,11 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
998 + struct rcu_data *rdp)
999 + {
1000 + rcu_lockdep_assert_cblist_protected(rdp);
1001 +- if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) ||
1002 +- !raw_spin_trylock_rcu_node(rnp))
1003 ++ if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !raw_spin_trylock_rcu_node(rnp))
1004 + return;
1005 +- WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
1006 ++ // The grace period cannot end while we hold the rcu_node lock.
1007 ++ if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))
1008 ++ WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
1009 + raw_spin_unlock_rcu_node(rnp);
1010 + }
1011 +
1012 +diff --git a/mm/memcontrol.c b/mm/memcontrol.c
1013 +index 2ed5f2a0879d3..fdc952f270c14 100644
1014 +--- a/mm/memcontrol.c
1015 ++++ b/mm/memcontrol.c
1016 +@@ -629,11 +629,17 @@ static DEFINE_SPINLOCK(stats_flush_lock);
1017 + static DEFINE_PER_CPU(unsigned int, stats_updates);
1018 + static atomic_t stats_flush_threshold = ATOMIC_INIT(0);
1019 +
1020 +-static inline void memcg_rstat_updated(struct mem_cgroup *memcg)
1021 ++static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
1022 + {
1023 ++ unsigned int x;
1024 ++
1025 + cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
1026 +- if (!(__this_cpu_inc_return(stats_updates) % MEMCG_CHARGE_BATCH))
1027 +- atomic_inc(&stats_flush_threshold);
1028 ++
1029 ++ x = __this_cpu_add_return(stats_updates, abs(val));
1030 ++ if (x > MEMCG_CHARGE_BATCH) {
1031 ++ atomic_add(x / MEMCG_CHARGE_BATCH, &stats_flush_threshold);
1032 ++ __this_cpu_write(stats_updates, 0);
1033 ++ }
1034 + }
1035 +
1036 + static void __mem_cgroup_flush_stats(void)
1037 +@@ -656,7 +662,7 @@ void mem_cgroup_flush_stats(void)
1038 +
1039 + static void flush_memcg_stats_dwork(struct work_struct *w)
1040 + {
1041 +- mem_cgroup_flush_stats();
1042 ++ __mem_cgroup_flush_stats();
1043 + queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
1044 + }
1045 +
1046 +@@ -672,7 +678,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
1047 + return;
1048 +
1049 + __this_cpu_add(memcg->vmstats_percpu->state[idx], val);
1050 +- memcg_rstat_updated(memcg);
1051 ++ memcg_rstat_updated(memcg, val);
1052 + }
1053 +
1054 + /* idx can be of type enum memcg_stat_item or node_stat_item. */
1055 +@@ -705,7 +711,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
1056 + /* Update lruvec */
1057 + __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
1058 +
1059 +- memcg_rstat_updated(memcg);
1060 ++ memcg_rstat_updated(memcg, val);
1061 + }
1062 +
1063 + /**
1064 +@@ -789,7 +795,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
1065 + return;
1066 +
1067 + __this_cpu_add(memcg->vmstats_percpu->events[idx], count);
1068 +- memcg_rstat_updated(memcg);
1069 ++ memcg_rstat_updated(memcg, count);
1070 + }
1071 +
1072 + static unsigned long memcg_events(struct mem_cgroup *memcg, int event)