Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:5.15 commit in: /
Date: Sat, 29 Jan 2022 17:42:37
Message-Id: 1643478140.9f1dc42781b7ce875ece2d5836a929d15ea75a21.mpagano@gentoo
1 commit: 9f1dc42781b7ce875ece2d5836a929d15ea75a21
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Sat Jan 29 17:42:20 2022 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Sat Jan 29 17:42:20 2022 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=9f1dc427
7
8 Linux patch 5.15.18
9
10 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>
11
12 0000_README | 4 +
13 1017_linux-5.15.18.patch | 1238 ++++++++++++++++++++++++++++++++++++++++++++++
14 2 files changed, 1242 insertions(+)
15
16 diff --git a/0000_README b/0000_README
17 index eddb9032..dcf875ed 100644
18 --- a/0000_README
19 +++ b/0000_README
20 @@ -111,6 +111,10 @@ Patch: 1016_linux-5.15.17.patch
21 From: http://www.kernel.org
22 Desc: Linux 5.15.17
23
24 +Patch: 1017_linux-5.15.18.patch
25 +From: http://www.kernel.org
26 +Desc: Linux 5.15.18
27 +
28 Patch: 1500_XATTR_USER_PREFIX.patch
29 From: https://bugs.gentoo.org/show_bug.cgi?id=470644
30 Desc: Support for namespace user.pax.* on tmpfs.
31
32 diff --git a/1017_linux-5.15.18.patch b/1017_linux-5.15.18.patch
33 new file mode 100644
34 index 00000000..eb664b95
35 --- /dev/null
36 +++ b/1017_linux-5.15.18.patch
37 @@ -0,0 +1,1238 @@
38 +diff --git a/Makefile b/Makefile
39 +index 088197ed3f66c..385286f987d89 100644
40 +--- a/Makefile
41 ++++ b/Makefile
42 +@@ -1,7 +1,7 @@
43 + # SPDX-License-Identifier: GPL-2.0
44 + VERSION = 5
45 + PATCHLEVEL = 15
46 +-SUBLEVEL = 17
47 ++SUBLEVEL = 18
48 + EXTRAVERSION =
49 + NAME = Trick or Treat
50 +
51 +diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h
52 +index b15eb4a3e6b20..840a35ed92ec8 100644
53 +--- a/arch/arm64/include/asm/extable.h
54 ++++ b/arch/arm64/include/asm/extable.h
55 +@@ -22,15 +22,6 @@ struct exception_table_entry
56 +
57 + #define ARCH_HAS_RELATIVE_EXTABLE
58 +
59 +-static inline bool in_bpf_jit(struct pt_regs *regs)
60 +-{
61 +- if (!IS_ENABLED(CONFIG_BPF_JIT))
62 +- return false;
63 +-
64 +- return regs->pc >= BPF_JIT_REGION_START &&
65 +- regs->pc < BPF_JIT_REGION_END;
66 +-}
67 +-
68 + #ifdef CONFIG_BPF_JIT
69 + int arm64_bpf_fixup_exception(const struct exception_table_entry *ex,
70 + struct pt_regs *regs);
71 +diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
72 +index f1745a8434144..05886322c300c 100644
73 +--- a/arch/arm64/include/asm/memory.h
74 ++++ b/arch/arm64/include/asm/memory.h
75 +@@ -44,11 +44,8 @@
76 + #define _PAGE_OFFSET(va) (-(UL(1) << (va)))
77 + #define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS))
78 + #define KIMAGE_VADDR (MODULES_END)
79 +-#define BPF_JIT_REGION_START (_PAGE_END(VA_BITS_MIN))
80 +-#define BPF_JIT_REGION_SIZE (SZ_128M)
81 +-#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
82 + #define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
83 +-#define MODULES_VADDR (BPF_JIT_REGION_END)
84 ++#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
85 + #define MODULES_VSIZE (SZ_128M)
86 + #define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
87 + #define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
88 +diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
89 +index b03e383d944ab..fe0cd0568813e 100644
90 +--- a/arch/arm64/kernel/traps.c
91 ++++ b/arch/arm64/kernel/traps.c
92 +@@ -988,7 +988,7 @@ static struct break_hook bug_break_hook = {
93 + static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr)
94 + {
95 + pr_err("%s generated an invalid instruction at %pS!\n",
96 +- in_bpf_jit(regs) ? "BPF JIT" : "Kernel text patching",
97 ++ "Kernel text patching",
98 + (void *)instruction_pointer(regs));
99 +
100 + /* We cannot handle this */
101 +diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
102 +index aa0060178343a..60a8b6a8a42b5 100644
103 +--- a/arch/arm64/mm/extable.c
104 ++++ b/arch/arm64/mm/extable.c
105 +@@ -9,14 +9,19 @@
106 + int fixup_exception(struct pt_regs *regs)
107 + {
108 + const struct exception_table_entry *fixup;
109 ++ unsigned long addr;
110 +
111 +- fixup = search_exception_tables(instruction_pointer(regs));
112 +- if (!fixup)
113 +- return 0;
114 ++ addr = instruction_pointer(regs);
115 +
116 +- if (in_bpf_jit(regs))
117 ++ /* Search the BPF tables first, these are formatted differently */
118 ++ fixup = search_bpf_extables(addr);
119 ++ if (fixup)
120 + return arm64_bpf_fixup_exception(fixup, regs);
121 +
122 ++ fixup = search_exception_tables(addr);
123 ++ if (!fixup)
124 ++ return 0;
125 ++
126 + regs->pc = (unsigned long)&fixup->fixup + fixup->fixup;
127 + return 1;
128 + }
129 +diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
130 +index 1c403536c9bb0..9bc4066c5bf33 100644
131 +--- a/arch/arm64/mm/ptdump.c
132 ++++ b/arch/arm64/mm/ptdump.c
133 +@@ -41,8 +41,6 @@ static struct addr_marker address_markers[] = {
134 + { 0 /* KASAN_SHADOW_START */, "Kasan shadow start" },
135 + { KASAN_SHADOW_END, "Kasan shadow end" },
136 + #endif
137 +- { BPF_JIT_REGION_START, "BPF start" },
138 +- { BPF_JIT_REGION_END, "BPF end" },
139 + { MODULES_VADDR, "Modules start" },
140 + { MODULES_END, "Modules end" },
141 + { VMALLOC_START, "vmalloc() area" },
142 +diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
143 +index 803e7773fa869..465c44d0c72fc 100644
144 +--- a/arch/arm64/net/bpf_jit_comp.c
145 ++++ b/arch/arm64/net/bpf_jit_comp.c
146 +@@ -1138,15 +1138,12 @@ out:
147 +
148 + u64 bpf_jit_alloc_exec_limit(void)
149 + {
150 +- return BPF_JIT_REGION_SIZE;
151 ++ return VMALLOC_END - VMALLOC_START;
152 + }
153 +
154 + void *bpf_jit_alloc_exec(unsigned long size)
155 + {
156 +- return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
157 +- BPF_JIT_REGION_END, GFP_KERNEL,
158 +- PAGE_KERNEL, 0, NUMA_NO_NODE,
159 +- __builtin_return_address(0));
160 ++ return vmalloc(size);
161 + }
162 +
163 + void bpf_jit_free_exec(void *addr)
164 +diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
165 +index b5d93247237b1..c67e212443429 100644
166 +--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
167 ++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
168 +@@ -72,6 +72,9 @@
169 + #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d
170 + #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2
171 +
172 ++#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea
173 ++#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2
174 ++
175 +
176 + static const char *gfxhub_client_ids[] = {
177 + "CB",
178 +@@ -1103,6 +1106,8 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
179 + u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
180 + unsigned size;
181 +
182 ++ /* TODO move to DC so GMC doesn't need to hard-code DCN registers */
183 ++
184 + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
185 + size = AMDGPU_VBIOS_VGA_ALLOCATION;
186 + } else {
187 +@@ -1110,7 +1115,6 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
188 +
189 + switch (adev->asic_type) {
190 + case CHIP_RAVEN:
191 +- case CHIP_RENOIR:
192 + viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
193 + size = (REG_GET_FIELD(viewport,
194 + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
195 +@@ -1118,6 +1122,14 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
196 + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
197 + 4);
198 + break;
199 ++ case CHIP_RENOIR:
200 ++ viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2);
201 ++ size = (REG_GET_FIELD(viewport,
202 ++ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
203 ++ REG_GET_FIELD(viewport,
204 ++ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
205 ++ 4);
206 ++ break;
207 + case CHIP_VEGA10:
208 + case CHIP_VEGA12:
209 + case CHIP_VEGA20:
210 +diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
211 +index 8c2b77eb94593..162ae71861247 100644
212 +--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
213 ++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
214 +@@ -119,6 +119,12 @@ int dcn31_smu_send_msg_with_param(
215 +
216 + result = dcn31_smu_wait_for_response(clk_mgr, 10, 200000);
217 +
218 ++ if (result == VBIOSSMC_Result_Failed) {
219 ++ ASSERT(0);
220 ++ REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Result_OK);
221 ++ return -1;
222 ++ }
223 ++
224 + if (IS_SMU_TIMEOUT(result)) {
225 + ASSERT(0);
226 + dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000);
227 +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
228 +index 2471f36aaff38..3012cbe5b0b7c 100644
229 +--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
230 ++++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
231 +@@ -298,6 +298,7 @@ struct drm_i915_gem_object {
232 + I915_BO_ALLOC_USER)
233 + #define I915_BO_READONLY BIT(4)
234 + #define I915_TILING_QUIRK_BIT 5 /* unknown swizzling; do not release! */
235 ++#define I915_BO_WAS_BOUND_BIT 6
236 +
237 + /**
238 + * @mem_flags - Mutable placement-related flags
239 +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
240 +index 8eb1c3a6fc9cd..8d6c38a622016 100644
241 +--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
242 ++++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
243 +@@ -10,6 +10,8 @@
244 + #include "i915_gem_lmem.h"
245 + #include "i915_gem_mman.h"
246 +
247 ++#include "gt/intel_gt.h"
248 ++
249 + void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
250 + struct sg_table *pages,
251 + unsigned int sg_page_sizes)
252 +@@ -218,6 +220,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
253 + __i915_gem_object_reset_page_iter(obj);
254 + obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
255 +
256 ++ if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
257 ++ struct drm_i915_private *i915 = to_i915(obj->base.dev);
258 ++ intel_wakeref_t wakeref;
259 ++
260 ++ with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
261 ++ intel_gt_invalidate_tlbs(&i915->gt);
262 ++ }
263 ++
264 + return pages;
265 + }
266 +
267 +diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
268 +index 62d40c9866427..e1e1d17d49fdd 100644
269 +--- a/drivers/gpu/drm/i915/gt/intel_gt.c
270 ++++ b/drivers/gpu/drm/i915/gt/intel_gt.c
271 +@@ -29,6 +29,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
272 +
273 + spin_lock_init(&gt->irq_lock);
274 +
275 ++ mutex_init(&gt->tlb_invalidate_lock);
276 ++
277 + INIT_LIST_HEAD(&gt->closed_vma);
278 + spin_lock_init(&gt->closed_lock);
279 +
280 +@@ -895,3 +897,103 @@ void intel_gt_info_print(const struct intel_gt_info *info,
281 +
282 + intel_sseu_dump(&info->sseu, p);
283 + }
284 ++
285 ++struct reg_and_bit {
286 ++ i915_reg_t reg;
287 ++ u32 bit;
288 ++};
289 ++
290 ++static struct reg_and_bit
291 ++get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
292 ++ const i915_reg_t *regs, const unsigned int num)
293 ++{
294 ++ const unsigned int class = engine->class;
295 ++ struct reg_and_bit rb = { };
296 ++
297 ++ if (drm_WARN_ON_ONCE(&engine->i915->drm,
298 ++ class >= num || !regs[class].reg))
299 ++ return rb;
300 ++
301 ++ rb.reg = regs[class];
302 ++ if (gen8 && class == VIDEO_DECODE_CLASS)
303 ++ rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
304 ++ else
305 ++ rb.bit = engine->instance;
306 ++
307 ++ rb.bit = BIT(rb.bit);
308 ++
309 ++ return rb;
310 ++}
311 ++
312 ++void intel_gt_invalidate_tlbs(struct intel_gt *gt)
313 ++{
314 ++ static const i915_reg_t gen8_regs[] = {
315 ++ [RENDER_CLASS] = GEN8_RTCR,
316 ++ [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */
317 ++ [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR,
318 ++ [COPY_ENGINE_CLASS] = GEN8_BTCR,
319 ++ };
320 ++ static const i915_reg_t gen12_regs[] = {
321 ++ [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR,
322 ++ [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR,
323 ++ [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR,
324 ++ [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
325 ++ };
326 ++ struct drm_i915_private *i915 = gt->i915;
327 ++ struct intel_uncore *uncore = gt->uncore;
328 ++ struct intel_engine_cs *engine;
329 ++ enum intel_engine_id id;
330 ++ const i915_reg_t *regs;
331 ++ unsigned int num = 0;
332 ++
333 ++ if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
334 ++ return;
335 ++
336 ++ if (GRAPHICS_VER(i915) == 12) {
337 ++ regs = gen12_regs;
338 ++ num = ARRAY_SIZE(gen12_regs);
339 ++ } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
340 ++ regs = gen8_regs;
341 ++ num = ARRAY_SIZE(gen8_regs);
342 ++ } else if (GRAPHICS_VER(i915) < 8) {
343 ++ return;
344 ++ }
345 ++
346 ++ if (drm_WARN_ONCE(&i915->drm, !num,
347 ++ "Platform does not implement TLB invalidation!"))
348 ++ return;
349 ++
350 ++ GEM_TRACE("\n");
351 ++
352 ++ assert_rpm_wakelock_held(&i915->runtime_pm);
353 ++
354 ++ mutex_lock(&gt->tlb_invalidate_lock);
355 ++ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
356 ++
357 ++ for_each_engine(engine, gt, id) {
358 ++ /*
359 ++ * HW architecture suggest typical invalidation time at 40us,
360 ++ * with pessimistic cases up to 100us and a recommendation to
361 ++ * cap at 1ms. We go a bit higher just in case.
362 ++ */
363 ++ const unsigned int timeout_us = 100;
364 ++ const unsigned int timeout_ms = 4;
365 ++ struct reg_and_bit rb;
366 ++
367 ++ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
368 ++ if (!i915_mmio_reg_offset(rb.reg))
369 ++ continue;
370 ++
371 ++ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
372 ++ if (__intel_wait_for_register_fw(uncore,
373 ++ rb.reg, rb.bit, 0,
374 ++ timeout_us, timeout_ms,
375 ++ NULL))
376 ++ drm_err_ratelimited(&gt->i915->drm,
377 ++ "%s TLB invalidation did not complete in %ums!\n",
378 ++ engine->name, timeout_ms);
379 ++ }
380 ++
381 ++ intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
382 ++ mutex_unlock(&gt->tlb_invalidate_lock);
383 ++}
384 +diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
385 +index 74e771871a9bd..c0169d6017c2d 100644
386 +--- a/drivers/gpu/drm/i915/gt/intel_gt.h
387 ++++ b/drivers/gpu/drm/i915/gt/intel_gt.h
388 +@@ -90,4 +90,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
389 +
390 + void intel_gt_watchdog_work(struct work_struct *work);
391 +
392 ++void intel_gt_invalidate_tlbs(struct intel_gt *gt);
393 ++
394 + #endif /* __INTEL_GT_H__ */
395 +diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
396 +index a81e21bf1bd1a..9fbcbcc6c35db 100644
397 +--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
398 ++++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
399 +@@ -72,6 +72,8 @@ struct intel_gt {
400 +
401 + struct intel_uc uc;
402 +
403 ++ struct mutex tlb_invalidate_lock;
404 ++
405 + struct intel_gt_timelines {
406 + spinlock_t lock; /* protects active_list */
407 + struct list_head active_list;
408 +diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
409 +index 9023d4ecf3b37..c65473fc90935 100644
410 +--- a/drivers/gpu/drm/i915/i915_reg.h
411 ++++ b/drivers/gpu/drm/i915/i915_reg.h
412 +@@ -2669,6 +2669,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
413 + #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28)
414 + #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24)
415 +
416 ++#define GEN8_RTCR _MMIO(0x4260)
417 ++#define GEN8_M1TCR _MMIO(0x4264)
418 ++#define GEN8_M2TCR _MMIO(0x4268)
419 ++#define GEN8_BTCR _MMIO(0x426c)
420 ++#define GEN8_VTCR _MMIO(0x4270)
421 ++
422 + #if 0
423 + #define PRB0_TAIL _MMIO(0x2030)
424 + #define PRB0_HEAD _MMIO(0x2034)
425 +@@ -2763,6 +2769,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
426 + #define FAULT_VA_HIGH_BITS (0xf << 0)
427 + #define FAULT_GTT_SEL (1 << 4)
428 +
429 ++#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8)
430 ++#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc)
431 ++#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0)
432 ++#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4)
433 ++
434 + #define GEN12_AUX_ERR_DBG _MMIO(0x43f4)
435 +
436 + #define FPGA_DBG _MMIO(0x42300)
437 +diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
438 +index 4b7fc4647e460..dfd20060812bc 100644
439 +--- a/drivers/gpu/drm/i915/i915_vma.c
440 ++++ b/drivers/gpu/drm/i915/i915_vma.c
441 +@@ -434,6 +434,9 @@ int i915_vma_bind(struct i915_vma *vma,
442 + vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
443 + }
444 +
445 ++ if (vma->obj)
446 ++ set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
447 ++
448 + atomic_or(bind_flags, &vma->flags);
449 + return 0;
450 + }
451 +diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
452 +index 6b38bc2811c1b..de8d0558389c4 100644
453 +--- a/drivers/gpu/drm/i915/intel_uncore.c
454 ++++ b/drivers/gpu/drm/i915/intel_uncore.c
455 +@@ -718,7 +718,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore,
456 + }
457 +
458 + static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
459 +- enum forcewake_domains fw_domains)
460 ++ enum forcewake_domains fw_domains,
461 ++ bool delayed)
462 + {
463 + struct intel_uncore_forcewake_domain *domain;
464 + unsigned int tmp;
465 +@@ -733,7 +734,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
466 + continue;
467 + }
468 +
469 +- uncore->funcs.force_wake_put(uncore, domain->mask);
470 ++ if (delayed &&
471 ++ !(domain->uncore->fw_domains_timer & domain->mask))
472 ++ fw_domain_arm_timer(domain);
473 ++ else
474 ++ uncore->funcs.force_wake_put(uncore, domain->mask);
475 + }
476 + }
477 +
478 +@@ -754,7 +759,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore,
479 + return;
480 +
481 + spin_lock_irqsave(&uncore->lock, irqflags);
482 +- __intel_uncore_forcewake_put(uncore, fw_domains);
483 ++ __intel_uncore_forcewake_put(uncore, fw_domains, false);
484 ++ spin_unlock_irqrestore(&uncore->lock, irqflags);
485 ++}
486 ++
487 ++void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
488 ++ enum forcewake_domains fw_domains)
489 ++{
490 ++ unsigned long irqflags;
491 ++
492 ++ if (!uncore->funcs.force_wake_put)
493 ++ return;
494 ++
495 ++ spin_lock_irqsave(&uncore->lock, irqflags);
496 ++ __intel_uncore_forcewake_put(uncore, fw_domains, true);
497 + spin_unlock_irqrestore(&uncore->lock, irqflags);
498 + }
499 +
500 +@@ -796,7 +814,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore,
501 + if (!uncore->funcs.force_wake_put)
502 + return;
503 +
504 +- __intel_uncore_forcewake_put(uncore, fw_domains);
505 ++ __intel_uncore_forcewake_put(uncore, fw_domains, false);
506 + }
507 +
508 + void assert_forcewakes_inactive(struct intel_uncore *uncore)
509 +diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
510 +index 3c0b0a8b5250d..4c63209dcf530 100644
511 +--- a/drivers/gpu/drm/i915/intel_uncore.h
512 ++++ b/drivers/gpu/drm/i915/intel_uncore.h
513 +@@ -229,6 +229,8 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore,
514 + enum forcewake_domains domains);
515 + void intel_uncore_forcewake_put(struct intel_uncore *uncore,
516 + enum forcewake_domains domains);
517 ++void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
518 ++ enum forcewake_domains domains);
519 + void intel_uncore_forcewake_flush(struct intel_uncore *uncore,
520 + enum forcewake_domains fw_domains);
521 +
522 +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
523 +index 2a7cec4cb8a89..f9f28516ffb41 100644
524 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
525 ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
526 +@@ -1112,15 +1112,14 @@ extern int vmw_execbuf_fence_commands(struct drm_file *file_priv,
527 + struct vmw_private *dev_priv,
528 + struct vmw_fence_obj **p_fence,
529 + uint32_t *p_handle);
530 +-extern void vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
531 ++extern int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
532 + struct vmw_fpriv *vmw_fp,
533 + int ret,
534 + struct drm_vmw_fence_rep __user
535 + *user_fence_rep,
536 + struct vmw_fence_obj *fence,
537 + uint32_t fence_handle,
538 +- int32_t out_fence_fd,
539 +- struct sync_file *sync_file);
540 ++ int32_t out_fence_fd);
541 + bool vmw_cmd_describe(const void *buf, u32 *size, char const **cmd);
542 +
543 + /**
544 +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
545 +index 5f2ffa9de5c8f..9144e8f88c812 100644
546 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
547 ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
548 +@@ -3823,17 +3823,17 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv,
549 + * Also if copying fails, user-space will be unable to signal the fence object
550 + * so we wait for it immediately, and then unreference the user-space reference.
551 + */
552 +-void
553 ++int
554 + vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
555 + struct vmw_fpriv *vmw_fp, int ret,
556 + struct drm_vmw_fence_rep __user *user_fence_rep,
557 + struct vmw_fence_obj *fence, uint32_t fence_handle,
558 +- int32_t out_fence_fd, struct sync_file *sync_file)
559 ++ int32_t out_fence_fd)
560 + {
561 + struct drm_vmw_fence_rep fence_rep;
562 +
563 + if (user_fence_rep == NULL)
564 +- return;
565 ++ return 0;
566 +
567 + memset(&fence_rep, 0, sizeof(fence_rep));
568 +
569 +@@ -3861,20 +3861,14 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
570 + * handle.
571 + */
572 + if (unlikely(ret != 0) && (fence_rep.error == 0)) {
573 +- if (sync_file)
574 +- fput(sync_file->file);
575 +-
576 +- if (fence_rep.fd != -1) {
577 +- put_unused_fd(fence_rep.fd);
578 +- fence_rep.fd = -1;
579 +- }
580 +-
581 + ttm_ref_object_base_unref(vmw_fp->tfile, fence_handle,
582 + TTM_REF_USAGE);
583 + VMW_DEBUG_USER("Fence copy error. Syncing.\n");
584 + (void) vmw_fence_obj_wait(fence, false, false,
585 + VMW_FENCE_WAIT_TIMEOUT);
586 + }
587 ++
588 ++ return ret ? -EFAULT : 0;
589 + }
590 +
591 + /**
592 +@@ -4212,16 +4206,23 @@ int vmw_execbuf_process(struct drm_file *file_priv,
593 +
594 + (void) vmw_fence_obj_wait(fence, false, false,
595 + VMW_FENCE_WAIT_TIMEOUT);
596 ++ }
597 ++ }
598 ++
599 ++ ret = vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret,
600 ++ user_fence_rep, fence, handle, out_fence_fd);
601 ++
602 ++ if (sync_file) {
603 ++ if (ret) {
604 ++ /* usercopy of fence failed, put the file object */
605 ++ fput(sync_file->file);
606 ++ put_unused_fd(out_fence_fd);
607 + } else {
608 + /* Link the fence with the FD created earlier */
609 + fd_install(out_fence_fd, sync_file->file);
610 + }
611 + }
612 +
613 +- vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret,
614 +- user_fence_rep, fence, handle, out_fence_fd,
615 +- sync_file);
616 +-
617 + /* Don't unreference when handing fence out */
618 + if (unlikely(out_fence != NULL)) {
619 + *out_fence = fence;
620 +@@ -4239,7 +4240,7 @@ int vmw_execbuf_process(struct drm_file *file_priv,
621 + */
622 + vmw_validation_unref_lists(&val_ctx);
623 +
624 +- return 0;
625 ++ return ret;
626 +
627 + out_unlock_binding:
628 + mutex_unlock(&dev_priv->binding_mutex);
629 +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
630 +index 9fe12329a4d58..b4d9d7258a546 100644
631 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
632 ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
633 +@@ -1159,7 +1159,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
634 + }
635 +
636 + vmw_execbuf_copy_fence_user(dev_priv, vmw_fp, 0, user_fence_rep, fence,
637 +- handle, -1, NULL);
638 ++ handle, -1);
639 + vmw_fence_obj_unreference(&fence);
640 + return 0;
641 + out_no_create:
642 +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
643 +index 74fa419092138..14e8f665b13be 100644
644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
645 ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
646 +@@ -2516,7 +2516,7 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv,
647 + if (file_priv)
648 + vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv),
649 + ret, user_fence_rep, fence,
650 +- handle, -1, NULL);
651 ++ handle, -1);
652 + if (out_fence)
653 + *out_fence = fence;
654 + else
655 +diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
656 +index e789430f407c3..72bdbebf25cec 100644
657 +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
658 ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
659 +@@ -1850,6 +1850,14 @@ struct bnx2x {
660 +
661 + /* Vxlan/Geneve related information */
662 + u16 udp_tunnel_ports[BNX2X_UDP_PORT_MAX];
663 ++
664 ++#define FW_CAP_INVALIDATE_VF_FP_HSI BIT(0)
665 ++ u32 fw_cap;
666 ++
667 ++ u32 fw_major;
668 ++ u32 fw_minor;
669 ++ u32 fw_rev;
670 ++ u32 fw_eng;
671 + };
672 +
673 + /* Tx queues may be less or equal to Rx queues */
674 +@@ -2525,5 +2533,6 @@ void bnx2x_register_phc(struct bnx2x *bp);
675 + * Meant for implicit re-load flows.
676 + */
677 + int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp);
678 +-
679 ++int bnx2x_init_firmware(struct bnx2x *bp);
680 ++void bnx2x_release_firmware(struct bnx2x *bp);
681 + #endif /* bnx2x.h */
682 +diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
683 +index b5d954cb409ae..41ebbb2c7d3ac 100644
684 +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
685 ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
686 +@@ -2364,10 +2364,8 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 load_code, bool print_err)
687 + if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
688 + load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
689 + /* build my FW version dword */
690 +- u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) +
691 +- (BCM_5710_FW_MINOR_VERSION << 8) +
692 +- (BCM_5710_FW_REVISION_VERSION << 16) +
693 +- (BCM_5710_FW_ENGINEERING_VERSION << 24);
694 ++ u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) +
695 ++ (bp->fw_rev << 16) + (bp->fw_eng << 24);
696 +
697 + /* read loaded FW from chip */
698 + u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
699 +diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
700 +index 3f8435208bf49..a84d015da5dfa 100644
701 +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
702 ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
703 +@@ -241,6 +241,8 @@
704 + IRO[221].m2))
705 + #define XSTORM_VF_TO_PF_OFFSET(funcId) \
706 + (IRO[48].base + ((funcId) * IRO[48].m1))
707 ++#define XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(fid) \
708 ++ (IRO[386].base + ((fid) * IRO[386].m1))
709 + #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0
710 +
711 + /* eth hsi version */
712 +diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
713 +index 622fadc50316e..611efee758340 100644
714 +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
715 ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
716 +@@ -3024,7 +3024,8 @@ struct afex_stats {
717 +
718 + #define BCM_5710_FW_MAJOR_VERSION 7
719 + #define BCM_5710_FW_MINOR_VERSION 13
720 +-#define BCM_5710_FW_REVISION_VERSION 15
721 ++#define BCM_5710_FW_REVISION_VERSION 21
722 ++#define BCM_5710_FW_REVISION_VERSION_V15 15
723 + #define BCM_5710_FW_ENGINEERING_VERSION 0
724 + #define BCM_5710_FW_COMPILE_FLAGS 1
725 +
726 +diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
727 +index ae87296ae1ffa..10a5b43976d20 100644
728 +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
729 ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
730 +@@ -74,9 +74,19 @@
731 + __stringify(BCM_5710_FW_MINOR_VERSION) "." \
732 + __stringify(BCM_5710_FW_REVISION_VERSION) "." \
733 + __stringify(BCM_5710_FW_ENGINEERING_VERSION)
734 ++
735 ++#define FW_FILE_VERSION_V15 \
736 ++ __stringify(BCM_5710_FW_MAJOR_VERSION) "." \
737 ++ __stringify(BCM_5710_FW_MINOR_VERSION) "." \
738 ++ __stringify(BCM_5710_FW_REVISION_VERSION_V15) "." \
739 ++ __stringify(BCM_5710_FW_ENGINEERING_VERSION)
740 ++
741 + #define FW_FILE_NAME_E1 "bnx2x/bnx2x-e1-" FW_FILE_VERSION ".fw"
742 + #define FW_FILE_NAME_E1H "bnx2x/bnx2x-e1h-" FW_FILE_VERSION ".fw"
743 + #define FW_FILE_NAME_E2 "bnx2x/bnx2x-e2-" FW_FILE_VERSION ".fw"
744 ++#define FW_FILE_NAME_E1_V15 "bnx2x/bnx2x-e1-" FW_FILE_VERSION_V15 ".fw"
745 ++#define FW_FILE_NAME_E1H_V15 "bnx2x/bnx2x-e1h-" FW_FILE_VERSION_V15 ".fw"
746 ++#define FW_FILE_NAME_E2_V15 "bnx2x/bnx2x-e2-" FW_FILE_VERSION_V15 ".fw"
747 +
748 + /* Time in jiffies before concluding the transmitter is hung */
749 + #define TX_TIMEOUT (5*HZ)
750 +@@ -747,9 +757,7 @@ static int bnx2x_mc_assert(struct bnx2x *bp)
751 + CHIP_IS_E1(bp) ? "everest1" :
752 + CHIP_IS_E1H(bp) ? "everest1h" :
753 + CHIP_IS_E2(bp) ? "everest2" : "everest3",
754 +- BCM_5710_FW_MAJOR_VERSION,
755 +- BCM_5710_FW_MINOR_VERSION,
756 +- BCM_5710_FW_REVISION_VERSION);
757 ++ bp->fw_major, bp->fw_minor, bp->fw_rev);
758 +
759 + return rc;
760 + }
761 +@@ -12302,6 +12310,15 @@ static int bnx2x_init_bp(struct bnx2x *bp)
762 +
763 + bnx2x_read_fwinfo(bp);
764 +
765 ++ if (IS_PF(bp)) {
766 ++ rc = bnx2x_init_firmware(bp);
767 ++
768 ++ if (rc) {
769 ++ bnx2x_free_mem_bp(bp);
770 ++ return rc;
771 ++ }
772 ++ }
773 ++
774 + func = BP_FUNC(bp);
775 +
776 + /* need to reset chip if undi was active */
777 +@@ -12314,6 +12331,7 @@ static int bnx2x_init_bp(struct bnx2x *bp)
778 +
779 + rc = bnx2x_prev_unload(bp);
780 + if (rc) {
781 ++ bnx2x_release_firmware(bp);
782 + bnx2x_free_mem_bp(bp);
783 + return rc;
784 + }
785 +@@ -13311,16 +13329,11 @@ static int bnx2x_check_firmware(struct bnx2x *bp)
786 + /* Check FW version */
787 + offset = be32_to_cpu(fw_hdr->fw_version.offset);
788 + fw_ver = firmware->data + offset;
789 +- if ((fw_ver[0] != BCM_5710_FW_MAJOR_VERSION) ||
790 +- (fw_ver[1] != BCM_5710_FW_MINOR_VERSION) ||
791 +- (fw_ver[2] != BCM_5710_FW_REVISION_VERSION) ||
792 +- (fw_ver[3] != BCM_5710_FW_ENGINEERING_VERSION)) {
793 ++ if (fw_ver[0] != bp->fw_major || fw_ver[1] != bp->fw_minor ||
794 ++ fw_ver[2] != bp->fw_rev || fw_ver[3] != bp->fw_eng) {
795 + BNX2X_ERR("Bad FW version:%d.%d.%d.%d. Should be %d.%d.%d.%d\n",
796 +- fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
797 +- BCM_5710_FW_MAJOR_VERSION,
798 +- BCM_5710_FW_MINOR_VERSION,
799 +- BCM_5710_FW_REVISION_VERSION,
800 +- BCM_5710_FW_ENGINEERING_VERSION);
801 ++ fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
802 ++ bp->fw_major, bp->fw_minor, bp->fw_rev, bp->fw_eng);
803 + return -EINVAL;
804 + }
805 +
806 +@@ -13398,34 +13411,51 @@ do { \
807 + (u8 *)bp->arr, len); \
808 + } while (0)
809 +
810 +-static int bnx2x_init_firmware(struct bnx2x *bp)
811 ++int bnx2x_init_firmware(struct bnx2x *bp)
812 + {
813 +- const char *fw_file_name;
814 ++ const char *fw_file_name, *fw_file_name_v15;
815 + struct bnx2x_fw_file_hdr *fw_hdr;
816 + int rc;
817 +
818 + if (bp->firmware)
819 + return 0;
820 +
821 +- if (CHIP_IS_E1(bp))
822 ++ if (CHIP_IS_E1(bp)) {
823 + fw_file_name = FW_FILE_NAME_E1;
824 +- else if (CHIP_IS_E1H(bp))
825 ++ fw_file_name_v15 = FW_FILE_NAME_E1_V15;
826 ++ } else if (CHIP_IS_E1H(bp)) {
827 + fw_file_name = FW_FILE_NAME_E1H;
828 +- else if (!CHIP_IS_E1x(bp))
829 ++ fw_file_name_v15 = FW_FILE_NAME_E1H_V15;
830 ++ } else if (!CHIP_IS_E1x(bp)) {
831 + fw_file_name = FW_FILE_NAME_E2;
832 +- else {
833 ++ fw_file_name_v15 = FW_FILE_NAME_E2_V15;
834 ++ } else {
835 + BNX2X_ERR("Unsupported chip revision\n");
836 + return -EINVAL;
837 + }
838 ++
839 + BNX2X_DEV_INFO("Loading %s\n", fw_file_name);
840 +
841 + rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev->dev);
842 + if (rc) {
843 +- BNX2X_ERR("Can't load firmware file %s\n",
844 +- fw_file_name);
845 +- goto request_firmware_exit;
846 ++ BNX2X_DEV_INFO("Trying to load older fw %s\n", fw_file_name_v15);
847 ++
848 ++ /* try to load prev version */
849 ++ rc = request_firmware(&bp->firmware, fw_file_name_v15, &bp->pdev->dev);
850 ++
851 ++ if (rc)
852 ++ goto request_firmware_exit;
853 ++
854 ++ bp->fw_rev = BCM_5710_FW_REVISION_VERSION_V15;
855 ++ } else {
856 ++ bp->fw_cap |= FW_CAP_INVALIDATE_VF_FP_HSI;
857 ++ bp->fw_rev = BCM_5710_FW_REVISION_VERSION;
858 + }
859 +
860 ++ bp->fw_major = BCM_5710_FW_MAJOR_VERSION;
861 ++ bp->fw_minor = BCM_5710_FW_MINOR_VERSION;
862 ++ bp->fw_eng = BCM_5710_FW_ENGINEERING_VERSION;
863 ++
864 + rc = bnx2x_check_firmware(bp);
865 + if (rc) {
866 + BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name);
867 +@@ -13481,7 +13511,7 @@ request_firmware_exit:
868 + return rc;
869 + }
870 +
871 +-static void bnx2x_release_firmware(struct bnx2x *bp)
872 ++void bnx2x_release_firmware(struct bnx2x *bp)
873 + {
874 + kfree(bp->init_ops_offsets);
875 + kfree(bp->init_ops);
876 +@@ -13998,6 +14028,7 @@ static int bnx2x_init_one(struct pci_dev *pdev,
877 + return 0;
878 +
879 + init_one_freemem:
880 ++ bnx2x_release_firmware(bp);
881 + bnx2x_free_mem_bp(bp);
882 +
883 + init_one_exit:
884 +diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
885 +index 6fbf735fca31c..5613957314501 100644
886 +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
887 ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
888 +@@ -758,9 +758,18 @@ static void bnx2x_vf_igu_reset(struct bnx2x *bp, struct bnx2x_virtf *vf)
889 +
890 + void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid)
891 + {
892 ++ u16 abs_fid;
893 ++
894 ++ abs_fid = FW_VF_HANDLE(abs_vfid);
895 ++
896 + /* set the VF-PF association in the FW */
897 +- storm_memset_vf_to_pf(bp, FW_VF_HANDLE(abs_vfid), BP_FUNC(bp));
898 +- storm_memset_func_en(bp, FW_VF_HANDLE(abs_vfid), 1);
899 ++ storm_memset_vf_to_pf(bp, abs_fid, BP_FUNC(bp));
900 ++ storm_memset_func_en(bp, abs_fid, 1);
901 ++
902 ++ /* Invalidate fp_hsi version for vfs */
903 ++ if (bp->fw_cap & FW_CAP_INVALIDATE_VF_FP_HSI)
904 ++ REG_WR8(bp, BAR_XSTRORM_INTMEM +
905 ++ XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(abs_fid), 0);
906 +
907 + /* clear vf errors*/
908 + bnx2x_vf_semi_clear_err(bp, abs_vfid);
909 +diff --git a/fs/io_uring.c b/fs/io_uring.c
910 +index ecffeddf90c68..f713b91537f41 100644
911 +--- a/fs/io_uring.c
912 ++++ b/fs/io_uring.c
913 +@@ -1760,6 +1760,18 @@ static inline void io_get_task_refs(int nr)
914 + io_task_refs_refill(tctx);
915 + }
916 +
917 ++static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
918 ++{
919 ++ struct io_uring_task *tctx = task->io_uring;
920 ++ unsigned int refs = tctx->cached_refs;
921 ++
922 ++ if (refs) {
923 ++ tctx->cached_refs = 0;
924 ++ percpu_counter_sub(&tctx->inflight, refs);
925 ++ put_task_struct_many(task, refs);
926 ++ }
927 ++}
928 ++
929 + static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
930 + long res, unsigned int cflags)
931 + {
932 +@@ -2200,6 +2212,10 @@ static void tctx_task_work(struct callback_head *cb)
933 + }
934 +
935 + ctx_flush_and_put(ctx, &locked);
936 ++
937 ++ /* relaxed read is enough as only the task itself sets ->in_idle */
938 ++ if (unlikely(atomic_read(&tctx->in_idle)))
939 ++ io_uring_drop_tctx_refs(current);
940 + }
941 +
942 + static void io_req_task_work_add(struct io_kiocb *req)
943 +@@ -9766,18 +9782,6 @@ static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
944 + return percpu_counter_sum(&tctx->inflight);
945 + }
946 +
947 +-static void io_uring_drop_tctx_refs(struct task_struct *task)
948 +-{
949 +- struct io_uring_task *tctx = task->io_uring;
950 +- unsigned int refs = tctx->cached_refs;
951 +-
952 +- if (refs) {
953 +- tctx->cached_refs = 0;
954 +- percpu_counter_sub(&tctx->inflight, refs);
955 +- put_task_struct_many(task, refs);
956 +- }
957 +-}
958 +-
959 + /*
960 + * Find any io_uring ctx that this task has registered or done IO on, and cancel
961 + * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
962 +@@ -9834,10 +9838,14 @@ static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
963 + schedule();
964 + finish_wait(&tctx->wait, &wait);
965 + } while (1);
966 +- atomic_dec(&tctx->in_idle);
967 +
968 + io_uring_clean_tctx(tctx);
969 + if (cancel_all) {
970 ++ /*
971 ++ * We shouldn't run task_works after cancel, so just leave
972 ++ * ->in_idle set for normal exit.
973 ++ */
974 ++ atomic_dec(&tctx->in_idle);
975 + /* for exec all current's requests should be gone, kill tctx */
976 + __io_uring_free(current);
977 + }
978 +diff --git a/fs/select.c b/fs/select.c
979 +index 945896d0ac9e7..5edffee1162c2 100644
980 +--- a/fs/select.c
981 ++++ b/fs/select.c
982 +@@ -458,9 +458,11 @@ get_max:
983 + return max;
984 + }
985 +
986 +-#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR)
987 +-#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR)
988 +-#define POLLEX_SET (EPOLLPRI)
989 ++#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR |\
990 ++ EPOLLNVAL)
991 ++#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR |\
992 ++ EPOLLNVAL)
993 ++#define POLLEX_SET (EPOLLPRI | EPOLLNVAL)
994 +
995 + static inline void wait_key_set(poll_table *wait, unsigned long in,
996 + unsigned long out, unsigned long bit,
997 +@@ -527,6 +529,7 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
998 + break;
999 + if (!(bit & all_bits))
1000 + continue;
1001 ++ mask = EPOLLNVAL;
1002 + f = fdget(i);
1003 + if (f.file) {
1004 + wait_key_set(wait, in, out, bit,
1005 +@@ -534,34 +537,34 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
1006 + mask = vfs_poll(f.file, wait);
1007 +
1008 + fdput(f);
1009 +- if ((mask & POLLIN_SET) && (in & bit)) {
1010 +- res_in |= bit;
1011 +- retval++;
1012 +- wait->_qproc = NULL;
1013 +- }
1014 +- if ((mask & POLLOUT_SET) && (out & bit)) {
1015 +- res_out |= bit;
1016 +- retval++;
1017 +- wait->_qproc = NULL;
1018 +- }
1019 +- if ((mask & POLLEX_SET) && (ex & bit)) {
1020 +- res_ex |= bit;
1021 +- retval++;
1022 +- wait->_qproc = NULL;
1023 +- }
1024 +- /* got something, stop busy polling */
1025 +- if (retval) {
1026 +- can_busy_loop = false;
1027 +- busy_flag = 0;
1028 +-
1029 +- /*
1030 +- * only remember a returned
1031 +- * POLL_BUSY_LOOP if we asked for it
1032 +- */
1033 +- } else if (busy_flag & mask)
1034 +- can_busy_loop = true;
1035 +-
1036 + }
1037 ++ if ((mask & POLLIN_SET) && (in & bit)) {
1038 ++ res_in |= bit;
1039 ++ retval++;
1040 ++ wait->_qproc = NULL;
1041 ++ }
1042 ++ if ((mask & POLLOUT_SET) && (out & bit)) {
1043 ++ res_out |= bit;
1044 ++ retval++;
1045 ++ wait->_qproc = NULL;
1046 ++ }
1047 ++ if ((mask & POLLEX_SET) && (ex & bit)) {
1048 ++ res_ex |= bit;
1049 ++ retval++;
1050 ++ wait->_qproc = NULL;
1051 ++ }
1052 ++ /* got something, stop busy polling */
1053 ++ if (retval) {
1054 ++ can_busy_loop = false;
1055 ++ busy_flag = 0;
1056 ++
1057 ++ /*
1058 ++ * only remember a returned
1059 ++ * POLL_BUSY_LOOP if we asked for it
1060 ++ */
1061 ++ } else if (busy_flag & mask)
1062 ++ can_busy_loop = true;
1063 ++
1064 + }
1065 + if (res_in)
1066 + *rinp = res_in;
1067 +diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
1068 +index 7ae10fab68b8f..4ca6d5b199e8e 100644
1069 +--- a/kernel/rcu/tree.c
1070 ++++ b/kernel/rcu/tree.c
1071 +@@ -1594,10 +1594,11 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
1072 + struct rcu_data *rdp)
1073 + {
1074 + rcu_lockdep_assert_cblist_protected(rdp);
1075 +- if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) ||
1076 +- !raw_spin_trylock_rcu_node(rnp))
1077 ++ if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !raw_spin_trylock_rcu_node(rnp))
1078 + return;
1079 +- WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
1080 ++ // The grace period cannot end while we hold the rcu_node lock.
1081 ++ if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))
1082 ++ WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
1083 + raw_spin_unlock_rcu_node(rnp);
1084 + }
1085 +
1086 +diff --git a/mm/memcontrol.c b/mm/memcontrol.c
1087 +index 87e41c3cac101..96cd7eae800b0 100644
1088 +--- a/mm/memcontrol.c
1089 ++++ b/mm/memcontrol.c
1090 +@@ -103,11 +103,6 @@ static bool do_memsw_account(void)
1091 + return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_noswap;
1092 + }
1093 +
1094 +-/* memcg and lruvec stats flushing */
1095 +-static void flush_memcg_stats_dwork(struct work_struct *w);
1096 +-static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork);
1097 +-static DEFINE_SPINLOCK(stats_flush_lock);
1098 +-
1099 + #define THRESHOLDS_EVENTS_TARGET 128
1100 + #define SOFTLIMIT_EVENTS_TARGET 1024
1101 +
1102 +@@ -635,6 +630,64 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
1103 + return mz;
1104 + }
1105 +
1106 ++/*
1107 ++ * memcg and lruvec stats flushing
1108 ++ *
1109 ++ * Many codepaths leading to stats update or read are performance sensitive and
1110 ++ * adding stats flushing in such codepaths is not desirable. So, to optimize the
1111 ++ * flushing the kernel does:
1112 ++ *
1113 ++ * 1) Periodically and asynchronously flush the stats every 2 seconds to not let
1114 ++ * rstat update tree grow unbounded.
1115 ++ *
1116 ++ * 2) Flush the stats synchronously on reader side only when there are more than
1117 ++ * (MEMCG_CHARGE_BATCH * nr_cpus) update events. Though this optimization
1118 ++ * will let stats be out of sync by atmost (MEMCG_CHARGE_BATCH * nr_cpus) but
1119 ++ * only for 2 seconds due to (1).
1120 ++ */
1121 ++static void flush_memcg_stats_dwork(struct work_struct *w);
1122 ++static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork);
1123 ++static DEFINE_SPINLOCK(stats_flush_lock);
1124 ++static DEFINE_PER_CPU(unsigned int, stats_updates);
1125 ++static atomic_t stats_flush_threshold = ATOMIC_INIT(0);
1126 ++
1127 ++static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
1128 ++{
1129 ++ unsigned int x;
1130 ++
1131 ++ cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
1132 ++
1133 ++ x = __this_cpu_add_return(stats_updates, abs(val));
1134 ++ if (x > MEMCG_CHARGE_BATCH) {
1135 ++ atomic_add(x / MEMCG_CHARGE_BATCH, &stats_flush_threshold);
1136 ++ __this_cpu_write(stats_updates, 0);
1137 ++ }
1138 ++}
1139 ++
1140 ++static void __mem_cgroup_flush_stats(void)
1141 ++{
1142 ++ unsigned long flag;
1143 ++
1144 ++ if (!spin_trylock_irqsave(&stats_flush_lock, flag))
1145 ++ return;
1146 ++
1147 ++ cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
1148 ++ atomic_set(&stats_flush_threshold, 0);
1149 ++ spin_unlock_irqrestore(&stats_flush_lock, flag);
1150 ++}
1151 ++
1152 ++void mem_cgroup_flush_stats(void)
1153 ++{
1154 ++ if (atomic_read(&stats_flush_threshold) > num_online_cpus())
1155 ++ __mem_cgroup_flush_stats();
1156 ++}
1157 ++
1158 ++static void flush_memcg_stats_dwork(struct work_struct *w)
1159 ++{
1160 ++ __mem_cgroup_flush_stats();
1161 ++ queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
1162 ++}
1163 ++
1164 + /**
1165 + * __mod_memcg_state - update cgroup memory statistics
1166 + * @memcg: the memory cgroup
1167 +@@ -647,7 +700,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
1168 + return;
1169 +
1170 + __this_cpu_add(memcg->vmstats_percpu->state[idx], val);
1171 +- cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
1172 ++ memcg_rstat_updated(memcg, val);
1173 + }
1174 +
1175 + /* idx can be of type enum memcg_stat_item or node_stat_item. */
1176 +@@ -675,10 +728,12 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
1177 + memcg = pn->memcg;
1178 +
1179 + /* Update memcg */
1180 +- __mod_memcg_state(memcg, idx, val);
1181 ++ __this_cpu_add(memcg->vmstats_percpu->state[idx], val);
1182 +
1183 + /* Update lruvec */
1184 + __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
1185 ++
1186 ++ memcg_rstat_updated(memcg, val);
1187 + }
1188 +
1189 + /**
1190 +@@ -780,7 +835,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
1191 + return;
1192 +
1193 + __this_cpu_add(memcg->vmstats_percpu->events[idx], count);
1194 +- cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
1195 ++ memcg_rstat_updated(memcg, count);
1196 + }
1197 +
1198 + static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
1199 +@@ -1414,7 +1469,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
1200 + *
1201 + * Current memory state:
1202 + */
1203 +- cgroup_rstat_flush(memcg->css.cgroup);
1204 ++ mem_cgroup_flush_stats();
1205 +
1206 + for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
1207 + u64 size;
1208 +@@ -3507,8 +3562,7 @@ static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
1209 + unsigned long val;
1210 +
1211 + if (mem_cgroup_is_root(memcg)) {
1212 +- /* mem_cgroup_threshold() calls here from irqsafe context */
1213 +- cgroup_rstat_flush_irqsafe(memcg->css.cgroup);
1214 ++ mem_cgroup_flush_stats();
1215 + val = memcg_page_state(memcg, NR_FILE_PAGES) +
1216 + memcg_page_state(memcg, NR_ANON_MAPPED);
1217 + if (swap)
1218 +@@ -3889,7 +3943,7 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
1219 + int nid;
1220 + struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
1221 +
1222 +- cgroup_rstat_flush(memcg->css.cgroup);
1223 ++ mem_cgroup_flush_stats();
1224 +
1225 + for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
1226 + seq_printf(m, "%s=%lu", stat->name,
1227 +@@ -3961,7 +4015,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
1228 +
1229 + BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
1230 +
1231 +- cgroup_rstat_flush(memcg->css.cgroup);
1232 ++ mem_cgroup_flush_stats();
1233 +
1234 + for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
1235 + unsigned long nr;
1236 +@@ -4464,7 +4518,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
1237 + struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
1238 + struct mem_cgroup *parent;
1239 +
1240 +- cgroup_rstat_flush_irqsafe(memcg->css.cgroup);
1241 ++ mem_cgroup_flush_stats();
1242 +
1243 + *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
1244 + *pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
1245 +@@ -5330,21 +5384,6 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
1246 + memcg_wb_domain_size_changed(memcg);
1247 + }
1248 +
1249 +-void mem_cgroup_flush_stats(void)
1250 +-{
1251 +- if (!spin_trylock(&stats_flush_lock))
1252 +- return;
1253 +-
1254 +- cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
1255 +- spin_unlock(&stats_flush_lock);
1256 +-}
1257 +-
1258 +-static void flush_memcg_stats_dwork(struct work_struct *w)
1259 +-{
1260 +- mem_cgroup_flush_stats();
1261 +- queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
1262 +-}
1263 +-
1264 + static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
1265 + {
1266 + struct mem_cgroup *memcg = mem_cgroup_from_css(css);
1267 +@@ -6362,7 +6401,7 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
1268 + int i;
1269 + struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
1270 +
1271 +- cgroup_rstat_flush(memcg->css.cgroup);
1272 ++ mem_cgroup_flush_stats();
1273 +
1274 + for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
1275 + int nid;