Gentoo Archives: gentoo-commits

From: "Mike Pagano (mpagano)" <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] linux-patches r2182 - genpatches-2.6/trunk/3.0
Date: Wed, 01 Aug 2012 23:36:16
Message-Id: 20120801233556.5D72C2004B@flycatcher.gentoo.org
1 Author: mpagano
2 Date: 2012-08-01 23:35:55 +0000 (Wed, 01 Aug 2012)
3 New Revision: 2182
4
5 Added:
6 genpatches-2.6/trunk/3.0/1036_linux-3.0.37.patch
7 genpatches-2.6/trunk/3.0/1037_linux-3.0.38.patch
8 genpatches-2.6/trunk/3.0/1038_linux-3.0.39.patch
9 Modified:
10 genpatches-2.6/trunk/3.0/0000_README
11 Log:
12 Linux patches 3.0.37, 3.0.38 and 3.0.39
13
14 Modified: genpatches-2.6/trunk/3.0/0000_README
15 ===================================================================
16 --- genpatches-2.6/trunk/3.0/0000_README 2012-07-30 17:13:24 UTC (rev 2181)
17 +++ genpatches-2.6/trunk/3.0/0000_README 2012-08-01 23:35:55 UTC (rev 2182)
18 @@ -179,6 +179,18 @@
19 From: http://www.kernel.org
20 Desc: Linux 3.0.36
21
22 +Patch: 1036_linux-3.0.37.patch
23 +From: http://www.kernel.org
24 +Desc: Linux 3.0.37
25 +
26 +Patch: 1037_linux-3.0.38.patch
27 +From: http://www.kernel.org
28 +Desc: Linux 3.0.38
29 +
30 +Patch: 1038_linux-3.0.39.patch
31 +From: http://www.kernel.org
32 +Desc: Linux 3.0.39
33 +
34 Patch: 1800_fix-zcache-build.patch
35 From: http://bugs.gentoo.org/show_bug.cgi?id=376325
36 Desc: Fix zcache build error
37
38 Added: genpatches-2.6/trunk/3.0/1036_linux-3.0.37.patch
39 ===================================================================
40 --- genpatches-2.6/trunk/3.0/1036_linux-3.0.37.patch (rev 0)
41 +++ genpatches-2.6/trunk/3.0/1036_linux-3.0.37.patch 2012-08-01 23:35:55 UTC (rev 2182)
42 @@ -0,0 +1,1934 @@
43 +diff --git a/Documentation/stable_kernel_rules.txt b/Documentation/stable_kernel_rules.txt
44 +index 21fd05c..e1f856b 100644
45 +--- a/Documentation/stable_kernel_rules.txt
46 ++++ b/Documentation/stable_kernel_rules.txt
47 +@@ -12,6 +12,12 @@ Rules on what kind of patches are accepted, and which ones are not, into the
48 + marked CONFIG_BROKEN), an oops, a hang, data corruption, a real
49 + security issue, or some "oh, that's not good" issue. In short, something
50 + critical.
51 ++ - Serious issues as reported by a user of a distribution kernel may also
52 ++ be considered if they fix a notable performance or interactivity issue.
53 ++ As these fixes are not as obvious and have a higher risk of a subtle
54 ++ regression they should only be submitted by a distribution kernel
55 ++ maintainer and include an addendum linking to a bugzilla entry if it
56 ++ exists and additional information on the user-visible impact.
57 + - New device IDs and quirks are also accepted.
58 + - No "theoretical race condition" issues, unless an explanation of how the
59 + race can be exploited is also provided.
60 +diff --git a/Makefile b/Makefile
61 +index cc34921..009160e 100644
62 +--- a/Makefile
63 ++++ b/Makefile
64 +@@ -1,6 +1,6 @@
65 + VERSION = 3
66 + PATCHLEVEL = 0
67 +-SUBLEVEL = 36
68 ++SUBLEVEL = 37
69 + EXTRAVERSION =
70 + NAME = Sneaky Weasel
71 +
72 +diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
73 +index fea97f6..4469924 100644
74 +--- a/arch/arm/kernel/smp.c
75 ++++ b/arch/arm/kernel/smp.c
76 +@@ -445,9 +445,7 @@ static DEFINE_PER_CPU(struct clock_event_device, percpu_clockevent);
77 + static void ipi_timer(void)
78 + {
79 + struct clock_event_device *evt = &__get_cpu_var(percpu_clockevent);
80 +- irq_enter();
81 + evt->event_handler(evt);
82 +- irq_exit();
83 + }
84 +
85 + #ifdef CONFIG_LOCAL_TIMERS
86 +@@ -458,7 +456,9 @@ asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs)
87 +
88 + if (local_timer_ack()) {
89 + __inc_irq_stat(cpu, local_timer_irqs);
90 ++ irq_enter();
91 + ipi_timer();
92 ++ irq_exit();
93 + }
94 +
95 + set_irq_regs(old_regs);
96 +@@ -568,7 +568,9 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
97 +
98 + switch (ipinr) {
99 + case IPI_TIMER:
100 ++ irq_enter();
101 + ipi_timer();
102 ++ irq_exit();
103 + break;
104 +
105 + case IPI_RESCHEDULE:
106 +@@ -576,15 +578,21 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
107 + break;
108 +
109 + case IPI_CALL_FUNC:
110 ++ irq_enter();
111 + generic_smp_call_function_interrupt();
112 ++ irq_exit();
113 + break;
114 +
115 + case IPI_CALL_FUNC_SINGLE:
116 ++ irq_enter();
117 + generic_smp_call_function_single_interrupt();
118 ++ irq_exit();
119 + break;
120 +
121 + case IPI_CPU_STOP:
122 ++ irq_enter();
123 + ipi_cpu_stop(cpu);
124 ++ irq_exit();
125 + break;
126 +
127 + default:
128 +diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
129 +index 42541bb..ace1784 100644
130 +--- a/arch/powerpc/xmon/xmon.c
131 ++++ b/arch/powerpc/xmon/xmon.c
132 +@@ -975,7 +975,7 @@ static int cpu_cmd(void)
133 + /* print cpus waiting or in xmon */
134 + printf("cpus stopped:");
135 + count = 0;
136 +- for (cpu = 0; cpu < NR_CPUS; ++cpu) {
137 ++ for_each_possible_cpu(cpu) {
138 + if (cpumask_test_cpu(cpu, &cpus_in_xmon)) {
139 + if (count == 0)
140 + printf(" %x", cpu);
141 +diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
142 +index 71cc380..c5d941f 100644
143 +--- a/arch/x86/include/asm/cpufeature.h
144 ++++ b/arch/x86/include/asm/cpufeature.h
145 +@@ -173,7 +173,7 @@
146 + #define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */
147 + #define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */
148 + #define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
149 +-#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */
150 ++#define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */
151 +
152 + /* Virtualization flags: Linux defined, word 8 */
153 + #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
154 +diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
155 +index 4558f0d..479d03c 100644
156 +--- a/arch/x86/kernel/acpi/boot.c
157 ++++ b/arch/x86/kernel/acpi/boot.c
158 +@@ -416,12 +416,14 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
159 + return 0;
160 + }
161 +
162 +- if (intsrc->source_irq == 0 && intsrc->global_irq == 2) {
163 ++ if (intsrc->source_irq == 0) {
164 + if (acpi_skip_timer_override) {
165 +- printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
166 ++ printk(PREFIX "BIOS IRQ0 override ignored.\n");
167 + return 0;
168 + }
169 +- if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
170 ++
171 ++ if ((intsrc->global_irq == 2) && acpi_fix_pin2_polarity
172 ++ && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
173 + intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK;
174 + printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n");
175 + }
176 +@@ -1327,17 +1329,12 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d)
177 + }
178 +
179 + /*
180 +- * Force ignoring BIOS IRQ0 pin2 override
181 ++ * Force ignoring BIOS IRQ0 override
182 + */
183 + static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
184 + {
185 +- /*
186 +- * The ati_ixp4x0_rev() early PCI quirk should have set
187 +- * the acpi_skip_timer_override flag already:
188 +- */
189 + if (!acpi_skip_timer_override) {
190 +- WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n");
191 +- pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n",
192 ++ pr_notice("%s detected: Ignoring BIOS IRQ0 override\n",
193 + d->ident);
194 + acpi_skip_timer_override = 1;
195 + }
196 +@@ -1431,7 +1428,7 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
197 + * is enabled. This input is incorrectly designated the
198 + * ISA IRQ 0 via an interrupt source override even though
199 + * it is wired to the output of the master 8259A and INTIN0
200 +- * is not connected at all. Force ignoring BIOS IRQ0 pin2
201 ++ * is not connected at all. Force ignoring BIOS IRQ0
202 + * override in that cases.
203 + */
204 + {
205 +@@ -1466,6 +1463,14 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
206 + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"),
207 + },
208 + },
209 ++ {
210 ++ .callback = dmi_ignore_irq0_timer_override,
211 ++ .ident = "FUJITSU SIEMENS",
212 ++ .matches = {
213 ++ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
214 ++ DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"),
215 ++ },
216 ++ },
217 + {}
218 + };
219 +
220 +diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
221 +index c7f64e6..ea6106c 100644
222 +--- a/arch/x86/kernel/cpu/scattered.c
223 ++++ b/arch/x86/kernel/cpu/scattered.c
224 +@@ -31,7 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
225 + const struct cpuid_bit *cb;
226 +
227 + static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
228 +- { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 },
229 ++ { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 },
230 + { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 },
231 + { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 },
232 + { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 },
233 +diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
234 +index d4a705f..89d6877 100644
235 +--- a/arch/x86/kernel/reboot.c
236 ++++ b/arch/x86/kernel/reboot.c
237 +@@ -452,6 +452,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
238 + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"),
239 + },
240 + },
241 ++ { /* Handle problems with rebooting on the Precision M6600. */
242 ++ .callback = set_pci_reboot,
243 ++ .ident = "Dell OptiPlex 990",
244 ++ .matches = {
245 ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
246 ++ DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
247 ++ },
248 ++ },
249 + { }
250 + };
251 +
252 +diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
253 +index a43fa1a..1502c502 100644
254 +--- a/drivers/acpi/acpi_pad.c
255 ++++ b/drivers/acpi/acpi_pad.c
256 +@@ -36,6 +36,7 @@
257 + #define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator"
258 + #define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80
259 + static DEFINE_MUTEX(isolated_cpus_lock);
260 ++static DEFINE_MUTEX(round_robin_lock);
261 +
262 + static unsigned long power_saving_mwait_eax;
263 +
264 +@@ -107,7 +108,7 @@ static void round_robin_cpu(unsigned int tsk_index)
265 + if (!alloc_cpumask_var(&tmp, GFP_KERNEL))
266 + return;
267 +
268 +- mutex_lock(&isolated_cpus_lock);
269 ++ mutex_lock(&round_robin_lock);
270 + cpumask_clear(tmp);
271 + for_each_cpu(cpu, pad_busy_cpus)
272 + cpumask_or(tmp, tmp, topology_thread_cpumask(cpu));
273 +@@ -116,7 +117,7 @@ static void round_robin_cpu(unsigned int tsk_index)
274 + if (cpumask_empty(tmp))
275 + cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus);
276 + if (cpumask_empty(tmp)) {
277 +- mutex_unlock(&isolated_cpus_lock);
278 ++ mutex_unlock(&round_robin_lock);
279 + return;
280 + }
281 + for_each_cpu(cpu, tmp) {
282 +@@ -131,7 +132,7 @@ static void round_robin_cpu(unsigned int tsk_index)
283 + tsk_in_cpu[tsk_index] = preferred_cpu;
284 + cpumask_set_cpu(preferred_cpu, pad_busy_cpus);
285 + cpu_weight[preferred_cpu]++;
286 +- mutex_unlock(&isolated_cpus_lock);
287 ++ mutex_unlock(&round_robin_lock);
288 +
289 + set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu));
290 + }
291 +diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
292 +index 77255f2..0364b05 100644
293 +--- a/drivers/acpi/sysfs.c
294 ++++ b/drivers/acpi/sysfs.c
295 +@@ -173,7 +173,7 @@ static int param_set_trace_state(const char *val, struct kernel_param *kp)
296 + {
297 + int result = 0;
298 +
299 +- if (!strncmp(val, "enable", strlen("enable") - 1)) {
300 ++ if (!strncmp(val, "enable", strlen("enable"))) {
301 + result = acpi_debug_trace(trace_method_name, trace_debug_level,
302 + trace_debug_layer, 0);
303 + if (result)
304 +@@ -181,7 +181,7 @@ static int param_set_trace_state(const char *val, struct kernel_param *kp)
305 + goto exit;
306 + }
307 +
308 +- if (!strncmp(val, "disable", strlen("disable") - 1)) {
309 ++ if (!strncmp(val, "disable", strlen("disable"))) {
310 + int name = 0;
311 + result = acpi_debug_trace((char *)&name, trace_debug_level,
312 + trace_debug_layer, 0);
313 +diff --git a/drivers/block/umem.c b/drivers/block/umem.c
314 +index 031ca72..afa8463 100644
315 +--- a/drivers/block/umem.c
316 ++++ b/drivers/block/umem.c
317 +@@ -513,6 +513,44 @@ static void process_page(unsigned long data)
318 + }
319 + }
320 +
321 ++struct mm_plug_cb {
322 ++ struct blk_plug_cb cb;
323 ++ struct cardinfo *card;
324 ++};
325 ++
326 ++static void mm_unplug(struct blk_plug_cb *cb)
327 ++{
328 ++ struct mm_plug_cb *mmcb = container_of(cb, struct mm_plug_cb, cb);
329 ++
330 ++ spin_lock_irq(&mmcb->card->lock);
331 ++ activate(mmcb->card);
332 ++ spin_unlock_irq(&mmcb->card->lock);
333 ++ kfree(mmcb);
334 ++}
335 ++
336 ++static int mm_check_plugged(struct cardinfo *card)
337 ++{
338 ++ struct blk_plug *plug = current->plug;
339 ++ struct mm_plug_cb *mmcb;
340 ++
341 ++ if (!plug)
342 ++ return 0;
343 ++
344 ++ list_for_each_entry(mmcb, &plug->cb_list, cb.list) {
345 ++ if (mmcb->cb.callback == mm_unplug && mmcb->card == card)
346 ++ return 1;
347 ++ }
348 ++ /* Not currently on the callback list */
349 ++ mmcb = kmalloc(sizeof(*mmcb), GFP_ATOMIC);
350 ++ if (!mmcb)
351 ++ return 0;
352 ++
353 ++ mmcb->card = card;
354 ++ mmcb->cb.callback = mm_unplug;
355 ++ list_add(&mmcb->cb.list, &plug->cb_list);
356 ++ return 1;
357 ++}
358 ++
359 + static int mm_make_request(struct request_queue *q, struct bio *bio)
360 + {
361 + struct cardinfo *card = q->queuedata;
362 +@@ -523,6 +561,8 @@ static int mm_make_request(struct request_queue *q, struct bio *bio)
363 + *card->biotail = bio;
364 + bio->bi_next = NULL;
365 + card->biotail = &bio->bi_next;
366 ++ if (bio->bi_rw & REQ_SYNC || !mm_check_plugged(card))
367 ++ activate(card);
368 + spin_unlock_irq(&card->lock);
369 +
370 + return 0;
371 +diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
372 +index 1bbb85b..a303b61 100644
373 +--- a/drivers/gpu/drm/drm_edid.c
374 ++++ b/drivers/gpu/drm/drm_edid.c
375 +@@ -584,7 +584,7 @@ static bool
376 + drm_monitor_supports_rb(struct edid *edid)
377 + {
378 + if (edid->revision >= 4) {
379 +- bool ret;
380 ++ bool ret = false;
381 + drm_for_each_detailed_block((u8 *)edid, is_rb, &ret);
382 + return ret;
383 + }
384 +diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
385 +index bc7dcaa..5ad0b51 100644
386 +--- a/drivers/gpu/drm/i915/i915_suspend.c
387 ++++ b/drivers/gpu/drm/i915/i915_suspend.c
388 +@@ -739,8 +739,11 @@ static void i915_restore_display(struct drm_device *dev)
389 + if (HAS_PCH_SPLIT(dev)) {
390 + I915_WRITE(BLC_PWM_PCH_CTL1, dev_priv->saveBLC_PWM_CTL);
391 + I915_WRITE(BLC_PWM_PCH_CTL2, dev_priv->saveBLC_PWM_CTL2);
392 +- I915_WRITE(BLC_PWM_CPU_CTL, dev_priv->saveBLC_CPU_PWM_CTL);
393 ++ /* NOTE: BLC_PWM_CPU_CTL must be written after BLC_PWM_CPU_CTL2;
394 ++ * otherwise we get blank eDP screen after S3 on some machines
395 ++ */
396 + I915_WRITE(BLC_PWM_CPU_CTL2, dev_priv->saveBLC_CPU_PWM_CTL2);
397 ++ I915_WRITE(BLC_PWM_CPU_CTL, dev_priv->saveBLC_CPU_PWM_CTL);
398 + I915_WRITE(PCH_PP_ON_DELAYS, dev_priv->savePP_ON_DELAYS);
399 + I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->savePP_OFF_DELAYS);
400 + I915_WRITE(PCH_PP_DIVISOR, dev_priv->savePP_DIVISOR);
401 +diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
402 +index 39aee6d..ea71f78 100644
403 +--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
404 ++++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
405 +@@ -487,7 +487,7 @@ int nouveau_fbcon_init(struct drm_device *dev)
406 + nfbdev->helper.funcs = &nouveau_fbcon_helper_funcs;
407 +
408 + ret = drm_fb_helper_init(dev, &nfbdev->helper,
409 +- nv_two_heads(dev) ? 2 : 1, 4);
410 ++ dev->mode_config.num_crtc, 4);
411 + if (ret) {
412 + kfree(nfbdev);
413 + return ret;
414 +diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
415 +index 4c07436..d99aa84 100644
416 +--- a/drivers/hwmon/applesmc.c
417 ++++ b/drivers/hwmon/applesmc.c
418 +@@ -215,7 +215,7 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len)
419 + int i;
420 +
421 + if (send_command(cmd) || send_argument(key)) {
422 +- pr_warn("%s: read arg fail\n", key);
423 ++ pr_warn("%.4s: read arg fail\n", key);
424 + return -EIO;
425 + }
426 +
427 +@@ -223,7 +223,7 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len)
428 +
429 + for (i = 0; i < len; i++) {
430 + if (__wait_status(0x05)) {
431 +- pr_warn("%s: read data fail\n", key);
432 ++ pr_warn("%.4s: read data fail\n", key);
433 + return -EIO;
434 + }
435 + buffer[i] = inb(APPLESMC_DATA_PORT);
436 +diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
437 +index 3cf2353..252defd 100644
438 +--- a/drivers/hwmon/coretemp.c
439 ++++ b/drivers/hwmon/coretemp.c
440 +@@ -709,7 +709,7 @@ static void __cpuinit get_core_online(unsigned int cpu)
441 + * sensors. We check this bit only, all the early CPUs
442 + * without thermal sensors will be filtered out.
443 + */
444 +- if (!cpu_has(c, X86_FEATURE_DTS))
445 ++ if (!cpu_has(c, X86_FEATURE_DTHERM))
446 + return;
447 +
448 + if (!pdev) {
449 +diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
450 +index 0d6c42f..b65a7c5 100644
451 +--- a/drivers/md/raid10.c
452 ++++ b/drivers/md/raid10.c
453 +@@ -1858,6 +1858,12 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
454 + /* want to reconstruct this device */
455 + rb2 = r10_bio;
456 + sect = raid10_find_virt(conf, sector_nr, i);
457 ++ if (sect >= mddev->resync_max_sectors) {
458 ++ /* last stripe is not complete - don't
459 ++ * try to recover this sector.
460 ++ */
461 ++ continue;
462 ++ }
463 + /* Unless we are doing a full sync, we only need
464 + * to recover the block if it is set in the bitmap
465 + */
466 +diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
467 +index 1f6c68d..cff955a 100644
468 +--- a/drivers/md/raid5.c
469 ++++ b/drivers/md/raid5.c
470 +@@ -199,12 +199,14 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
471 + BUG_ON(!list_empty(&sh->lru));
472 + BUG_ON(atomic_read(&conf->active_stripes)==0);
473 + if (test_bit(STRIPE_HANDLE, &sh->state)) {
474 +- if (test_bit(STRIPE_DELAYED, &sh->state))
475 ++ if (test_bit(STRIPE_DELAYED, &sh->state) &&
476 ++ !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
477 + list_add_tail(&sh->lru, &conf->delayed_list);
478 + else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
479 + sh->bm_seq - conf->seq_write > 0)
480 + list_add_tail(&sh->lru, &conf->bitmap_list);
481 + else {
482 ++ clear_bit(STRIPE_DELAYED, &sh->state);
483 + clear_bit(STRIPE_BIT_DELAY, &sh->state);
484 + list_add_tail(&sh->lru, &conf->handle_list);
485 + }
486 +@@ -3846,7 +3848,6 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio)
487 + raid_bio->bi_next = (void*)rdev;
488 + align_bi->bi_bdev = rdev->bdev;
489 + align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
490 +- align_bi->bi_sector += rdev->data_offset;
491 +
492 + if (!bio_fits_rdev(align_bi)) {
493 + /* too big in some way */
494 +@@ -3855,6 +3856,9 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio)
495 + return 0;
496 + }
497 +
498 ++ /* No reshape active, so we can trust rdev->data_offset */
499 ++ align_bi->bi_sector += rdev->data_offset;
500 ++
501 + spin_lock_irq(&conf->device_lock);
502 + wait_event_lock_irq(conf->wait_for_stripe,
503 + conf->quiesce == 0,
504 +diff --git a/drivers/media/dvb/siano/smsusb.c b/drivers/media/dvb/siano/smsusb.c
505 +index d755407..4e5719e 100644
506 +--- a/drivers/media/dvb/siano/smsusb.c
507 ++++ b/drivers/media/dvb/siano/smsusb.c
508 +@@ -543,6 +543,8 @@ static const struct usb_device_id smsusb_id_table[] __devinitconst = {
509 + .driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM },
510 + { USB_DEVICE(0x2040, 0xc0a0),
511 + .driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM },
512 ++ { USB_DEVICE(0x2040, 0xf5a0),
513 ++ .driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM },
514 + { } /* Terminating entry */
515 + };
516 +
517 +diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
518 +index 87ebb4e..f5cdc56 100644
519 +--- a/drivers/mtd/nand/cafe_nand.c
520 ++++ b/drivers/mtd/nand/cafe_nand.c
521 +@@ -102,7 +102,7 @@ static const char *part_probes[] = { "cmdlinepart", "RedBoot", NULL };
522 + static int cafe_device_ready(struct mtd_info *mtd)
523 + {
524 + struct cafe_priv *cafe = mtd->priv;
525 +- int result = !!(cafe_readl(cafe, NAND_STATUS) | 0x40000000);
526 ++ int result = !!(cafe_readl(cafe, NAND_STATUS) & 0x40000000);
527 + uint32_t irqs = cafe_readl(cafe, NAND_IRQ);
528 +
529 + cafe_writel(cafe, irqs, NAND_IRQ);
530 +diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
531 +index a485f7f..2ce5db5 100644
532 +--- a/drivers/net/benet/be_main.c
533 ++++ b/drivers/net/benet/be_main.c
534 +@@ -763,6 +763,8 @@ static netdev_tx_t be_xmit(struct sk_buff *skb,
535 +
536 + copied = make_tx_wrbs(adapter, skb, wrb_cnt, dummy_wrb);
537 + if (copied) {
538 ++ int gso_segs = skb_shinfo(skb)->gso_segs;
539 ++
540 + /* record the sent skb in the sent_skb table */
541 + BUG_ON(tx_obj->sent_skb_list[start]);
542 + tx_obj->sent_skb_list[start] = skb;
543 +@@ -780,8 +782,7 @@ static netdev_tx_t be_xmit(struct sk_buff *skb,
544 +
545 + be_txq_notify(adapter, txq->id, wrb_cnt);
546 +
547 +- be_tx_stats_update(adapter, wrb_cnt, copied,
548 +- skb_shinfo(skb)->gso_segs, stopped);
549 ++ be_tx_stats_update(adapter, wrb_cnt, copied, gso_segs, stopped);
550 + } else {
551 + txq->head = start;
552 + dev_kfree_skb_any(skb);
553 +diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
554 +index e6da842..504e201 100644
555 +--- a/drivers/net/bonding/bond_main.c
556 ++++ b/drivers/net/bonding/bond_main.c
557 +@@ -77,6 +77,7 @@
558 + #include <net/route.h>
559 + #include <net/net_namespace.h>
560 + #include <net/netns/generic.h>
561 ++#include <net/pkt_sched.h>
562 + #include "bonding.h"
563 + #include "bond_3ad.h"
564 + #include "bond_alb.h"
565 +@@ -388,8 +389,6 @@ struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
566 + return next;
567 + }
568 +
569 +-#define bond_queue_mapping(skb) (*(u16 *)((skb)->cb))
570 +-
571 + /**
572 + * bond_dev_queue_xmit - Prepare skb for xmit.
573 + *
574 +@@ -403,7 +402,9 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
575 + skb->dev = slave_dev;
576 + skb->priority = 1;
577 +
578 +- skb->queue_mapping = bond_queue_mapping(skb);
579 ++ BUILD_BUG_ON(sizeof(skb->queue_mapping) !=
580 ++ sizeof(qdisc_skb_cb(skb)->bond_queue_mapping));
581 ++ skb->queue_mapping = qdisc_skb_cb(skb)->bond_queue_mapping;
582 +
583 + if (unlikely(netpoll_tx_running(slave_dev)))
584 + bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb);
585 +@@ -4240,7 +4241,7 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb)
586 + /*
587 + * Save the original txq to restore before passing to the driver
588 + */
589 +- bond_queue_mapping(skb) = skb->queue_mapping;
590 ++ qdisc_skb_cb(skb)->bond_queue_mapping = skb->queue_mapping;
591 +
592 + if (unlikely(txq >= dev->real_num_tx_queues)) {
593 + do {
594 +diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
595 +index 1f8a824..1bf8032 100644
596 +--- a/drivers/net/can/c_can/c_can.c
597 ++++ b/drivers/net/can/c_can/c_can.c
598 +@@ -592,8 +592,8 @@ static void c_can_chip_config(struct net_device *dev)
599 + priv->write_reg(priv, &priv->regs->control,
600 + CONTROL_ENABLE_AR);
601 +
602 +- if (priv->can.ctrlmode & (CAN_CTRLMODE_LISTENONLY &
603 +- CAN_CTRLMODE_LOOPBACK)) {
604 ++ if ((priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) &&
605 ++ (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK)) {
606 + /* loopback + silent mode : useful for hot self-test */
607 + priv->write_reg(priv, &priv->regs->control, CONTROL_EIE |
608 + CONTROL_SIE | CONTROL_IE | CONTROL_TEST);
609 +diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
610 +index 3fa19c1..098ff31 100644
611 +--- a/drivers/net/dummy.c
612 ++++ b/drivers/net/dummy.c
613 +@@ -37,6 +37,7 @@
614 + #include <linux/rtnetlink.h>
615 + #include <net/rtnetlink.h>
616 + #include <linux/u64_stats_sync.h>
617 ++#include <linux/sched.h>
618 +
619 + static int numdummies = 1;
620 +
621 +@@ -186,8 +187,10 @@ static int __init dummy_init_module(void)
622 + rtnl_lock();
623 + err = __rtnl_link_register(&dummy_link_ops);
624 +
625 +- for (i = 0; i < numdummies && !err; i++)
626 ++ for (i = 0; i < numdummies && !err; i++) {
627 + err = dummy_init_one();
628 ++ cond_resched();
629 ++ }
630 + if (err < 0)
631 + __rtnl_link_unregister(&dummy_link_ops);
632 + rtnl_unlock();
633 +diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
634 +index f2e31c8..9d4a2a3 100644
635 +--- a/drivers/net/sky2.c
636 ++++ b/drivers/net/sky2.c
637 +@@ -4206,10 +4206,12 @@ static int sky2_set_features(struct net_device *dev, u32 features)
638 + struct sky2_port *sky2 = netdev_priv(dev);
639 + u32 changed = dev->features ^ features;
640 +
641 +- if (changed & NETIF_F_RXCSUM) {
642 +- u32 on = features & NETIF_F_RXCSUM;
643 +- sky2_write32(sky2->hw, Q_ADDR(rxqaddr[sky2->port], Q_CSR),
644 +- on ? BMU_ENA_RX_CHKSUM : BMU_DIS_RX_CHKSUM);
645 ++ if ((changed & NETIF_F_RXCSUM) &&
646 ++ !(sky2->hw->flags & SKY2_HW_NEW_LE)) {
647 ++ sky2_write32(sky2->hw,
648 ++ Q_ADDR(rxqaddr[sky2->port], Q_CSR),
649 ++ (features & NETIF_F_RXCSUM)
650 ++ ? BMU_ENA_RX_CHKSUM : BMU_DIS_RX_CHKSUM);
651 + }
652 +
653 + if (changed & NETIF_F_RXHASH)
654 +diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
655 +index 9cf4e47..db9b212 100644
656 +--- a/drivers/net/usb/ipheth.c
657 ++++ b/drivers/net/usb/ipheth.c
658 +@@ -59,6 +59,7 @@
659 + #define USB_PRODUCT_IPHONE_3G 0x1292
660 + #define USB_PRODUCT_IPHONE_3GS 0x1294
661 + #define USB_PRODUCT_IPHONE_4 0x1297
662 ++#define USB_PRODUCT_IPAD 0x129a
663 + #define USB_PRODUCT_IPHONE_4_VZW 0x129c
664 + #define USB_PRODUCT_IPHONE_4S 0x12a0
665 +
666 +@@ -101,6 +102,10 @@ static struct usb_device_id ipheth_table[] = {
667 + IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
668 + IPHETH_USBINTF_PROTO) },
669 + { USB_DEVICE_AND_INTERFACE_INFO(
670 ++ USB_VENDOR_APPLE, USB_PRODUCT_IPAD,
671 ++ IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
672 ++ IPHETH_USBINTF_PROTO) },
673 ++ { USB_DEVICE_AND_INTERFACE_INFO(
674 + USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_4_VZW,
675 + IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
676 + IPHETH_USBINTF_PROTO) },
677 +diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
678 +index 7c2f06e..9130a5a 100644
679 +--- a/drivers/net/wireless/ath/ath9k/hw.c
680 ++++ b/drivers/net/wireless/ath/ath9k/hw.c
681 +@@ -530,7 +530,7 @@ static int __ath9k_hw_init(struct ath_hw *ah)
682 +
683 + if (ah->config.serialize_regmode == SER_REG_MODE_AUTO) {
684 + if (ah->hw_version.macVersion == AR_SREV_VERSION_5416_PCI ||
685 +- ((AR_SREV_9160(ah) || AR_SREV_9280(ah)) &&
686 ++ ((AR_SREV_9160(ah) || AR_SREV_9280(ah) || AR_SREV_9287(ah)) &&
687 + !ah->is_pciexpress)) {
688 + ah->config.serialize_regmode =
689 + SER_REG_MODE_ON;
690 +@@ -682,13 +682,25 @@ static void ath9k_hw_init_qos(struct ath_hw *ah)
691 +
692 + u32 ar9003_get_pll_sqsum_dvc(struct ath_hw *ah)
693 + {
694 ++ struct ath_common *common = ath9k_hw_common(ah);
695 ++ int i = 0;
696 ++
697 + REG_CLR_BIT(ah, PLL3, PLL3_DO_MEAS_MASK);
698 + udelay(100);
699 + REG_SET_BIT(ah, PLL3, PLL3_DO_MEAS_MASK);
700 +
701 +- while ((REG_READ(ah, PLL4) & PLL4_MEAS_DONE) == 0)
702 ++ while ((REG_READ(ah, PLL4) & PLL4_MEAS_DONE) == 0) {
703 ++
704 + udelay(100);
705 +
706 ++ if (WARN_ON_ONCE(i >= 100)) {
707 ++ ath_err(common, "PLL4 meaurement not done\n");
708 ++ break;
709 ++ }
710 ++
711 ++ i++;
712 ++ }
713 ++
714 + return (REG_READ(ah, PLL3) & SQSUM_DVC_MASK) >> 3;
715 + }
716 + EXPORT_SYMBOL(ar9003_get_pll_sqsum_dvc);
717 +diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
718 +index a126a3e..633f962 100644
719 +--- a/drivers/net/wireless/ath/ath9k/main.c
720 ++++ b/drivers/net/wireless/ath/ath9k/main.c
721 +@@ -648,6 +648,15 @@ void ath_hw_pll_work(struct work_struct *work)
722 + hw_pll_work.work);
723 + u32 pll_sqsum;
724 +
725 ++ /*
726 ++ * ensure that the PLL WAR is executed only
727 ++ * after the STA is associated (or) if the
728 ++ * beaconing had started in interfaces that
729 ++ * uses beacons.
730 ++ */
731 ++ if (!(sc->sc_flags & SC_OP_BEACONS))
732 ++ return;
733 ++
734 + if (AR_SREV_9485(sc->sc_ah)) {
735 +
736 + ath9k_ps_wakeup(sc);
737 +diff --git a/drivers/net/wireless/mwifiex/11n_rxreorder.c b/drivers/net/wireless/mwifiex/11n_rxreorder.c
738 +index e5dfdc3..d2358cf 100644
739 +--- a/drivers/net/wireless/mwifiex/11n_rxreorder.c
740 ++++ b/drivers/net/wireless/mwifiex/11n_rxreorder.c
741 +@@ -267,7 +267,8 @@ mwifiex_11n_create_rx_reorder_tbl(struct mwifiex_private *priv, u8 *ta,
742 + else
743 + last_seq = priv->rx_seq[tid];
744 +
745 +- if (last_seq >= new_node->start_win)
746 ++ if (last_seq != MWIFIEX_DEF_11N_RX_SEQ_NUM &&
747 ++ last_seq >= new_node->start_win)
748 + new_node->start_win = last_seq + 1;
749 +
750 + new_node->win_size = win_size;
751 +@@ -612,5 +613,5 @@ void mwifiex_11n_cleanup_reorder_tbl(struct mwifiex_private *priv)
752 + spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
753 +
754 + INIT_LIST_HEAD(&priv->rx_reorder_tbl_ptr);
755 +- memset(priv->rx_seq, 0, sizeof(priv->rx_seq));
756 ++ mwifiex_reset_11n_rx_seq_num(priv);
757 + }
758 +diff --git a/drivers/net/wireless/mwifiex/11n_rxreorder.h b/drivers/net/wireless/mwifiex/11n_rxreorder.h
759 +index f3ca8c8..7576c2a 100644
760 +--- a/drivers/net/wireless/mwifiex/11n_rxreorder.h
761 ++++ b/drivers/net/wireless/mwifiex/11n_rxreorder.h
762 +@@ -37,6 +37,13 @@
763 +
764 + #define ADDBA_RSP_STATUS_ACCEPT 0
765 +
766 ++#define MWIFIEX_DEF_11N_RX_SEQ_NUM 0xffff
767 ++
768 ++static inline void mwifiex_reset_11n_rx_seq_num(struct mwifiex_private *priv)
769 ++{
770 ++ memset(priv->rx_seq, 0xff, sizeof(priv->rx_seq));
771 ++}
772 ++
773 + int mwifiex_11n_rx_reorder_pkt(struct mwifiex_private *,
774 + u16 seqNum,
775 + u16 tid, u8 *ta,
776 +diff --git a/drivers/net/wireless/mwifiex/wmm.c b/drivers/net/wireless/mwifiex/wmm.c
777 +index 91634da..2cdb41a 100644
778 +--- a/drivers/net/wireless/mwifiex/wmm.c
779 ++++ b/drivers/net/wireless/mwifiex/wmm.c
780 +@@ -406,6 +406,8 @@ mwifiex_wmm_init(struct mwifiex_adapter *adapter)
781 + priv->add_ba_param.tx_win_size = MWIFIEX_AMPDU_DEF_TXWINSIZE;
782 + priv->add_ba_param.rx_win_size = MWIFIEX_AMPDU_DEF_RXWINSIZE;
783 +
784 ++ mwifiex_reset_11n_rx_seq_num(priv);
785 ++
786 + atomic_set(&priv->wmm.tx_pkts_queued, 0);
787 + atomic_set(&priv->wmm.highest_queued_prio, HIGH_PRIO_TID);
788 + }
789 +diff --git a/drivers/net/wireless/rtl818x/rtl8187/leds.c b/drivers/net/wireless/rtl818x/rtl8187/leds.c
790 +index 2e0de2f..c2d5b49 100644
791 +--- a/drivers/net/wireless/rtl818x/rtl8187/leds.c
792 ++++ b/drivers/net/wireless/rtl818x/rtl8187/leds.c
793 +@@ -117,7 +117,7 @@ static void rtl8187_led_brightness_set(struct led_classdev *led_dev,
794 + radio_on = true;
795 + } else if (radio_on) {
796 + radio_on = false;
797 +- cancel_delayed_work_sync(&priv->led_on);
798 ++ cancel_delayed_work(&priv->led_on);
799 + ieee80211_queue_delayed_work(hw, &priv->led_off, 0);
800 + }
801 + } else if (radio_on) {
802 +diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c
803 +index 9046f7b..137406c 100644
804 +--- a/drivers/oprofile/oprofile_perf.c
805 ++++ b/drivers/oprofile/oprofile_perf.c
806 +@@ -25,7 +25,7 @@ static int oprofile_perf_enabled;
807 + static DEFINE_MUTEX(oprofile_perf_mutex);
808 +
809 + static struct op_counter_config *counter_config;
810 +-static struct perf_event **perf_events[nr_cpumask_bits];
811 ++static struct perf_event **perf_events[NR_CPUS];
812 + static int num_counters;
813 +
814 + /*
815 +diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
816 +index 46767c5..475a340 100644
817 +--- a/drivers/pci/pci-driver.c
818 ++++ b/drivers/pci/pci-driver.c
819 +@@ -726,6 +726,18 @@ static int pci_pm_suspend_noirq(struct device *dev)
820 +
821 + pci_pm_set_unknown_state(pci_dev);
822 +
823 ++ /*
824 ++ * Some BIOSes from ASUS have a bug: If a USB EHCI host controller's
825 ++ * PCI COMMAND register isn't 0, the BIOS assumes that the controller
826 ++ * hasn't been quiesced and tries to turn it off. If the controller
827 ++ * is already in D3, this can hang or cause memory corruption.
828 ++ *
829 ++ * Since the value of the COMMAND register doesn't matter once the
830 ++ * device has been suspended, we can safely set it to 0 here.
831 ++ */
832 ++ if (pci_dev->class == PCI_CLASS_SERIAL_USB_EHCI)
833 ++ pci_write_config_word(pci_dev, PCI_COMMAND, 0);
834 ++
835 + return 0;
836 + }
837 +
838 +diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
839 +index bf401ae..d549bbc 100644
840 +--- a/drivers/pci/pci.c
841 ++++ b/drivers/pci/pci.c
842 +@@ -1682,11 +1682,6 @@ int pci_prepare_to_sleep(struct pci_dev *dev)
843 + if (target_state == PCI_POWER_ERROR)
844 + return -EIO;
845 +
846 +- /* Some devices mustn't be in D3 during system sleep */
847 +- if (target_state == PCI_D3hot &&
848 +- (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP))
849 +- return 0;
850 +-
851 + pci_enable_wake(dev, target_state, device_may_wakeup(&dev->dev));
852 +
853 + error = pci_set_power_state(dev, target_state);
854 +diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
855 +index 975af43..a6b07dd 100644
856 +--- a/drivers/pci/quirks.c
857 ++++ b/drivers/pci/quirks.c
858 +@@ -2856,32 +2856,6 @@ static void __devinit disable_igfx_irq(struct pci_dev *dev)
859 + DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq);
860 + DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq);
861 +
862 +-/*
863 +- * The Intel 6 Series/C200 Series chipset's EHCI controllers on many
864 +- * ASUS motherboards will cause memory corruption or a system crash
865 +- * if they are in D3 while the system is put into S3 sleep.
866 +- */
867 +-static void __devinit asus_ehci_no_d3(struct pci_dev *dev)
868 +-{
869 +- const char *sys_info;
870 +- static const char good_Asus_board[] = "P8Z68-V";
871 +-
872 +- if (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP)
873 +- return;
874 +- if (dev->subsystem_vendor != PCI_VENDOR_ID_ASUSTEK)
875 +- return;
876 +- sys_info = dmi_get_system_info(DMI_BOARD_NAME);
877 +- if (sys_info && memcmp(sys_info, good_Asus_board,
878 +- sizeof(good_Asus_board) - 1) == 0)
879 +- return;
880 +-
881 +- dev_info(&dev->dev, "broken D3 during system sleep on ASUS\n");
882 +- dev->dev_flags |= PCI_DEV_FLAGS_NO_D3_DURING_SLEEP;
883 +- device_set_wakeup_capable(&dev->dev, false);
884 +-}
885 +-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c26, asus_ehci_no_d3);
886 +-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c2d, asus_ehci_no_d3);
887 +-
888 + static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
889 + struct pci_fixup *end)
890 + {
891 +diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c
892 +index 39e41fb..5160354 100644
893 +--- a/drivers/rtc/rtc-mxc.c
894 ++++ b/drivers/rtc/rtc-mxc.c
895 +@@ -191,10 +191,11 @@ static irqreturn_t mxc_rtc_interrupt(int irq, void *dev_id)
896 + struct platform_device *pdev = dev_id;
897 + struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
898 + void __iomem *ioaddr = pdata->ioaddr;
899 ++ unsigned long flags;
900 + u32 status;
901 + u32 events = 0;
902 +
903 +- spin_lock_irq(&pdata->rtc->irq_lock);
904 ++ spin_lock_irqsave(&pdata->rtc->irq_lock, flags);
905 + status = readw(ioaddr + RTC_RTCISR) & readw(ioaddr + RTC_RTCIENR);
906 + /* clear interrupt sources */
907 + writew(status, ioaddr + RTC_RTCISR);
908 +@@ -217,7 +218,7 @@ static irqreturn_t mxc_rtc_interrupt(int irq, void *dev_id)
909 + rtc_update_alarm(&pdev->dev, &pdata->g_rtc_alarm);
910 +
911 + rtc_update_irq(pdata->rtc, 1, events);
912 +- spin_unlock_irq(&pdata->rtc->irq_lock);
913 ++ spin_unlock_irqrestore(&pdata->rtc->irq_lock, flags);
914 +
915 + return IRQ_HANDLED;
916 + }
917 +diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c
918 +index 7491e21..a40541c 100644
919 +--- a/drivers/target/tcm_fc/tfc_sess.c
920 ++++ b/drivers/target/tcm_fc/tfc_sess.c
921 +@@ -64,7 +64,8 @@ static struct ft_tport *ft_tport_create(struct fc_lport *lport)
922 + struct ft_tport *tport;
923 + int i;
924 +
925 +- tport = rcu_dereference(lport->prov[FC_TYPE_FCP]);
926 ++ tport = rcu_dereference_protected(lport->prov[FC_TYPE_FCP],
927 ++ lockdep_is_held(&ft_lport_lock));
928 + if (tport && tport->tpg)
929 + return tport;
930 +
931 +diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
932 +index 5a244cf..8a72e05 100644
933 +--- a/drivers/usb/class/cdc-wdm.c
934 ++++ b/drivers/usb/class/cdc-wdm.c
935 +@@ -457,6 +457,8 @@ retry:
936 + goto retry;
937 + }
938 + if (!desc->reslength) { /* zero length read */
939 ++ dev_dbg(&desc->intf->dev, "%s: zero length - clearing WDM_READ\n", __func__);
940 ++ clear_bit(WDM_READ, &desc->flags);
941 + spin_unlock_irq(&desc->iuspin);
942 + goto retry;
943 + }
944 +diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
945 +index 0757b19..9ab094c 100644
946 +--- a/drivers/usb/host/pci-quirks.c
947 ++++ b/drivers/usb/host/pci-quirks.c
948 +@@ -755,6 +755,7 @@ EXPORT_SYMBOL_GPL(usb_is_intel_switchable_xhci);
949 + */
950 + void usb_enable_xhci_ports(struct pci_dev *xhci_pdev)
951 + {
952 ++#if defined(CONFIG_USB_XHCI_HCD) || defined(CONFIG_USB_XHCI_HCD_MODULE)
953 + u32 ports_available;
954 +
955 + ports_available = 0xffffffff;
956 +@@ -782,6 +783,18 @@ void usb_enable_xhci_ports(struct pci_dev *xhci_pdev)
957 + &ports_available);
958 + dev_dbg(&xhci_pdev->dev, "USB 2.0 ports that are now switched over "
959 + "to xHCI: 0x%x\n", ports_available);
960 ++#else
961 ++ /* Don't switchover the ports if the user hasn't compiled the xHCI
962 ++ * driver. Otherwise they will see "dead" USB ports that don't power
963 ++ * the devices.
964 ++ */
965 ++ dev_warn(&xhci_pdev->dev,
966 ++ "CONFIG_USB_XHCI_HCD is turned off, "
967 ++ "defaulting to EHCI.\n");
968 ++ dev_warn(&xhci_pdev->dev,
969 ++ "USB 3.0 devices will work at USB 2.0 speeds.\n");
970 ++#endif /* CONFIG_USB_XHCI_HCD || CONFIG_USB_XHCI_HCD_MODULE */
971 ++
972 + }
973 + EXPORT_SYMBOL_GPL(usb_enable_xhci_ports);
974 +
975 +diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
976 +index aa0c43f..35e6b5f 100644
977 +--- a/drivers/usb/serial/cp210x.c
978 ++++ b/drivers/usb/serial/cp210x.c
979 +@@ -93,6 +93,7 @@ static const struct usb_device_id id_table[] = {
980 + { USB_DEVICE(0x10C4, 0x814B) }, /* West Mountain Radio RIGtalk */
981 + { USB_DEVICE(0x10C4, 0x8156) }, /* B&G H3000 link cable */
982 + { USB_DEVICE(0x10C4, 0x815E) }, /* Helicomm IP-Link 1220-DVM */
983 ++ { USB_DEVICE(0x10C4, 0x815F) }, /* Timewave HamLinkUSB */
984 + { USB_DEVICE(0x10C4, 0x818B) }, /* AVIT Research USB to TTL */
985 + { USB_DEVICE(0x10C4, 0x819F) }, /* MJS USB Toslink Switcher */
986 + { USB_DEVICE(0x10C4, 0x81A6) }, /* ThinkOptics WavIt */
987 +@@ -134,7 +135,13 @@ static const struct usb_device_id id_table[] = {
988 + { USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */
989 + { USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */
990 + { USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */
991 ++ { USB_DEVICE(0x166A, 0x0201) }, /* Clipsal 5500PACA C-Bus Pascal Automation Controller */
992 ++ { USB_DEVICE(0x166A, 0x0301) }, /* Clipsal 5800PC C-Bus Wireless PC Interface */
993 + { USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */
994 ++ { USB_DEVICE(0x166A, 0x0304) }, /* Clipsal 5000CT2 C-Bus Black and White Touchscreen */
995 ++ { USB_DEVICE(0x166A, 0x0305) }, /* Clipsal C-5000CT2 C-Bus Spectrum Colour Touchscreen */
996 ++ { USB_DEVICE(0x166A, 0x0401) }, /* Clipsal L51xx C-Bus Architectural Dimmer */
997 ++ { USB_DEVICE(0x166A, 0x0101) }, /* Clipsal 5560884 C-Bus Multi-room Audio Matrix Switcher */
998 + { USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */
999 + { USB_DEVICE(0x16DC, 0x0010) }, /* W-IE-NE-R Plein & Baus GmbH PL512 Power Supply */
1000 + { USB_DEVICE(0x16DC, 0x0011) }, /* W-IE-NE-R Plein & Baus GmbH RCM Remote Control for MARATON Power Supply */
1001 +@@ -146,7 +153,11 @@ static const struct usb_device_id id_table[] = {
1002 + { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
1003 + { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
1004 + { USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */
1005 ++ { USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */
1006 ++ { USB_DEVICE(0x1E29, 0x0501) }, /* Festo CMSP */
1007 + { USB_DEVICE(0x3195, 0xF190) }, /* Link Instruments MSO-19 */
1008 ++ { USB_DEVICE(0x3195, 0xF280) }, /* Link Instruments MSO-28 */
1009 ++ { USB_DEVICE(0x3195, 0xF281) }, /* Link Instruments MSO-28 */
1010 + { USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */
1011 + { } /* Terminating Entry */
1012 + };
1013 +diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
1014 +index ce02457..d232623 100644
1015 +--- a/drivers/usb/serial/option.c
1016 ++++ b/drivers/usb/serial/option.c
1017 +@@ -236,6 +236,7 @@ static void option_instat_callback(struct urb *urb);
1018 + #define NOVATELWIRELESS_PRODUCT_G1 0xA001
1019 + #define NOVATELWIRELESS_PRODUCT_G1_M 0xA002
1020 + #define NOVATELWIRELESS_PRODUCT_G2 0xA010
1021 ++#define NOVATELWIRELESS_PRODUCT_MC551 0xB001
1022 +
1023 + /* AMOI PRODUCTS */
1024 + #define AMOI_VENDOR_ID 0x1614
1025 +@@ -496,6 +497,19 @@ static void option_instat_callback(struct urb *urb);
1026 +
1027 + /* MediaTek products */
1028 + #define MEDIATEK_VENDOR_ID 0x0e8d
1029 ++#define MEDIATEK_PRODUCT_DC_1COM 0x00a0
1030 ++#define MEDIATEK_PRODUCT_DC_4COM 0x00a5
1031 ++#define MEDIATEK_PRODUCT_DC_5COM 0x00a4
1032 ++#define MEDIATEK_PRODUCT_7208_1COM 0x7101
1033 ++#define MEDIATEK_PRODUCT_7208_2COM 0x7102
1034 ++#define MEDIATEK_PRODUCT_FP_1COM 0x0003
1035 ++#define MEDIATEK_PRODUCT_FP_2COM 0x0023
1036 ++#define MEDIATEK_PRODUCT_FPDC_1COM 0x0043
1037 ++#define MEDIATEK_PRODUCT_FPDC_2COM 0x0033
1038 ++
1039 ++/* Cellient products */
1040 ++#define CELLIENT_VENDOR_ID 0x2692
1041 ++#define CELLIENT_PRODUCT_MEN200 0x9005
1042 +
1043 + /* some devices interfaces need special handling due to a number of reasons */
1044 + enum option_blacklist_reason {
1045 +@@ -549,6 +563,10 @@ static const struct option_blacklist_info net_intf1_blacklist = {
1046 + .reserved = BIT(1),
1047 + };
1048 +
1049 ++static const struct option_blacklist_info net_intf2_blacklist = {
1050 ++ .reserved = BIT(2),
1051 ++};
1052 ++
1053 + static const struct option_blacklist_info net_intf3_blacklist = {
1054 + .reserved = BIT(3),
1055 + };
1056 +@@ -734,6 +752,8 @@ static const struct usb_device_id option_ids[] = {
1057 + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G1) },
1058 + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G1_M) },
1059 + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G2) },
1060 ++ /* Novatel Ovation MC551 a.k.a. Verizon USB551L */
1061 ++ { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) },
1062 +
1063 + { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) },
1064 + { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) },
1065 +@@ -1090,6 +1110,8 @@ static const struct usb_device_id option_ids[] = {
1066 + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1298, 0xff, 0xff, 0xff) },
1067 + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1299, 0xff, 0xff, 0xff) },
1068 + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1300, 0xff, 0xff, 0xff) },
1069 ++ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1402, 0xff, 0xff, 0xff),
1070 ++ .driver_info = (kernel_ulong_t)&net_intf2_blacklist },
1071 + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x2002, 0xff,
1072 + 0xff, 0xff), .driver_info = (kernel_ulong_t)&zte_k3765_z_blacklist },
1073 + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x2003, 0xff, 0xff, 0xff) },
1074 +@@ -1231,6 +1253,18 @@ static const struct usb_device_id option_ids[] = {
1075 + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a1, 0xff, 0x02, 0x01) },
1076 + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a2, 0xff, 0x00, 0x00) },
1077 + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a2, 0xff, 0x02, 0x01) }, /* MediaTek MT6276M modem & app port */
1078 ++ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_1COM, 0x0a, 0x00, 0x00) },
1079 ++ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_5COM, 0xff, 0x02, 0x01) },
1080 ++ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_5COM, 0xff, 0x00, 0x00) },
1081 ++ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM, 0xff, 0x02, 0x01) },
1082 ++ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM, 0xff, 0x00, 0x00) },
1083 ++ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_7208_1COM, 0x02, 0x00, 0x00) },
1084 ++ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_7208_2COM, 0x02, 0x02, 0x01) },
1085 ++ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FP_1COM, 0x0a, 0x00, 0x00) },
1086 ++ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FP_2COM, 0x0a, 0x00, 0x00) },
1087 ++ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FPDC_1COM, 0x0a, 0x00, 0x00) },
1088 ++ { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FPDC_2COM, 0x0a, 0x00, 0x00) },
1089 ++ { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) },
1090 + { } /* Terminating entry */
1091 + };
1092 + MODULE_DEVICE_TABLE(usb, option_ids);
1093 +diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
1094 +index ea966b3..61047fe 100644
1095 +--- a/drivers/vhost/vhost.c
1096 ++++ b/drivers/vhost/vhost.c
1097 +@@ -217,6 +217,8 @@ static int vhost_worker(void *data)
1098 + if (work) {
1099 + __set_current_state(TASK_RUNNING);
1100 + work->fn(work);
1101 ++ if (need_resched())
1102 ++ schedule();
1103 + } else
1104 + schedule();
1105 +
1106 +diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
1107 +index 7fa128d..faf7d0b 100644
1108 +--- a/fs/btrfs/tree-log.c
1109 ++++ b/fs/btrfs/tree-log.c
1110 +@@ -691,6 +691,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
1111 + kfree(name);
1112 +
1113 + iput(inode);
1114 ++
1115 ++ btrfs_run_delayed_items(trans, root);
1116 + return ret;
1117 + }
1118 +
1119 +@@ -896,6 +898,7 @@ again:
1120 + ret = btrfs_unlink_inode(trans, root, dir,
1121 + inode, victim_name,
1122 + victim_name_len);
1123 ++ btrfs_run_delayed_items(trans, root);
1124 + }
1125 + kfree(victim_name);
1126 + ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
1127 +@@ -1476,6 +1479,9 @@ again:
1128 + ret = btrfs_unlink_inode(trans, root, dir, inode,
1129 + name, name_len);
1130 + BUG_ON(ret);
1131 ++
1132 ++ btrfs_run_delayed_items(trans, root);
1133 ++
1134 + kfree(name);
1135 + iput(inode);
1136 +
1137 +diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
1138 +index 69f994a..0dbe58a 100644
1139 +--- a/fs/ecryptfs/kthread.c
1140 ++++ b/fs/ecryptfs/kthread.c
1141 +@@ -149,7 +149,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
1142 + (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred);
1143 + if (!IS_ERR(*lower_file))
1144 + goto out;
1145 +- if (flags & O_RDONLY) {
1146 ++ if ((flags & O_ACCMODE) == O_RDONLY) {
1147 + rc = PTR_ERR((*lower_file));
1148 + goto out;
1149 + }
1150 +diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
1151 +index 0dc5a3d..de42310 100644
1152 +--- a/fs/ecryptfs/miscdev.c
1153 ++++ b/fs/ecryptfs/miscdev.c
1154 +@@ -49,7 +49,10 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
1155 + mutex_lock(&ecryptfs_daemon_hash_mux);
1156 + /* TODO: Just use file->private_data? */
1157 + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
1158 +- BUG_ON(rc || !daemon);
1159 ++ if (rc || !daemon) {
1160 ++ mutex_unlock(&ecryptfs_daemon_hash_mux);
1161 ++ return -EINVAL;
1162 ++ }
1163 + mutex_lock(&daemon->mux);
1164 + mutex_unlock(&ecryptfs_daemon_hash_mux);
1165 + if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
1166 +@@ -122,6 +125,7 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file)
1167 + goto out_unlock_daemon;
1168 + }
1169 + daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN;
1170 ++ file->private_data = daemon;
1171 + atomic_inc(&ecryptfs_num_miscdev_opens);
1172 + out_unlock_daemon:
1173 + mutex_unlock(&daemon->mux);
1174 +@@ -152,9 +156,9 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file)
1175 +
1176 + mutex_lock(&ecryptfs_daemon_hash_mux);
1177 + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
1178 +- BUG_ON(rc || !daemon);
1179 ++ if (rc || !daemon)
1180 ++ daemon = file->private_data;
1181 + mutex_lock(&daemon->mux);
1182 +- BUG_ON(daemon->pid != task_pid(current));
1183 + BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN));
1184 + daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN;
1185 + atomic_dec(&ecryptfs_num_miscdev_opens);
1186 +@@ -191,31 +195,32 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
1187 + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
1188 + u16 msg_flags, struct ecryptfs_daemon *daemon)
1189 + {
1190 +- int rc = 0;
1191 ++ struct ecryptfs_message *msg;
1192 +
1193 +- mutex_lock(&msg_ctx->mux);
1194 +- msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
1195 +- GFP_KERNEL);
1196 +- if (!msg_ctx->msg) {
1197 +- rc = -ENOMEM;
1198 ++ msg = kmalloc((sizeof(*msg) + data_size), GFP_KERNEL);
1199 ++ if (!msg) {
1200 + printk(KERN_ERR "%s: Out of memory whilst attempting "
1201 + "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
1202 +- (sizeof(*msg_ctx->msg) + data_size));
1203 +- goto out_unlock;
1204 ++ (sizeof(*msg) + data_size));
1205 ++ return -ENOMEM;
1206 + }
1207 ++
1208 ++ mutex_lock(&msg_ctx->mux);
1209 ++ msg_ctx->msg = msg;
1210 + msg_ctx->msg->index = msg_ctx->index;
1211 + msg_ctx->msg->data_len = data_size;
1212 + msg_ctx->type = msg_type;
1213 + memcpy(msg_ctx->msg->data, data, data_size);
1214 + msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
1215 +- mutex_lock(&daemon->mux);
1216 + list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
1217 ++ mutex_unlock(&msg_ctx->mux);
1218 ++
1219 ++ mutex_lock(&daemon->mux);
1220 + daemon->num_queued_msg_ctx++;
1221 + wake_up_interruptible(&daemon->wait);
1222 + mutex_unlock(&daemon->mux);
1223 +-out_unlock:
1224 +- mutex_unlock(&msg_ctx->mux);
1225 +- return rc;
1226 ++
1227 ++ return 0;
1228 + }
1229 +
1230 + /**
1231 +@@ -246,8 +251,16 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
1232 + mutex_lock(&ecryptfs_daemon_hash_mux);
1233 + /* TODO: Just use file->private_data? */
1234 + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
1235 +- BUG_ON(rc || !daemon);
1236 ++ if (rc || !daemon) {
1237 ++ mutex_unlock(&ecryptfs_daemon_hash_mux);
1238 ++ return -EINVAL;
1239 ++ }
1240 + mutex_lock(&daemon->mux);
1241 ++ if (task_pid(current) != daemon->pid) {
1242 ++ mutex_unlock(&daemon->mux);
1243 ++ mutex_unlock(&ecryptfs_daemon_hash_mux);
1244 ++ return -EPERM;
1245 ++ }
1246 + if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
1247 + rc = 0;
1248 + mutex_unlock(&ecryptfs_daemon_hash_mux);
1249 +@@ -284,9 +297,6 @@ check_list:
1250 + * message from the queue; try again */
1251 + goto check_list;
1252 + }
1253 +- BUG_ON(euid != daemon->euid);
1254 +- BUG_ON(current_user_ns() != daemon->user_ns);
1255 +- BUG_ON(task_pid(current) != daemon->pid);
1256 + msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
1257 + struct ecryptfs_msg_ctx, daemon_out_list);
1258 + BUG_ON(!msg_ctx);
1259 +diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
1260 +index 08a07a2..57ceaf3 100644
1261 +--- a/fs/nilfs2/gcinode.c
1262 ++++ b/fs/nilfs2/gcinode.c
1263 +@@ -191,6 +191,8 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs)
1264 + while (!list_empty(head)) {
1265 + ii = list_first_entry(head, struct nilfs_inode_info, i_dirty);
1266 + list_del_init(&ii->i_dirty);
1267 ++ truncate_inode_pages(&ii->vfs_inode.i_data, 0);
1268 ++ nilfs_btnode_cache_clear(&ii->i_btnode_cache);
1269 + iput(&ii->vfs_inode);
1270 + }
1271 + }
1272 +diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
1273 +index bb24ab6..6f24e67 100644
1274 +--- a/fs/nilfs2/segment.c
1275 ++++ b/fs/nilfs2/segment.c
1276 +@@ -2309,6 +2309,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
1277 + if (!test_bit(NILFS_I_UPDATED, &ii->i_state))
1278 + continue;
1279 + list_del_init(&ii->i_dirty);
1280 ++ truncate_inode_pages(&ii->vfs_inode.i_data, 0);
1281 ++ nilfs_btnode_cache_clear(&ii->i_btnode_cache);
1282 + iput(&ii->vfs_inode);
1283 + }
1284 + }
1285 +diff --git a/fs/open.c b/fs/open.c
1286 +index b52cf01..7e18c4d 100644
1287 +--- a/fs/open.c
1288 ++++ b/fs/open.c
1289 +@@ -396,10 +396,10 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
1290 + {
1291 + struct file *file;
1292 + struct inode *inode;
1293 +- int error;
1294 ++ int error, fput_needed;
1295 +
1296 + error = -EBADF;
1297 +- file = fget(fd);
1298 ++ file = fget_raw_light(fd, &fput_needed);
1299 + if (!file)
1300 + goto out;
1301 +
1302 +@@ -413,7 +413,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
1303 + if (!error)
1304 + set_fs_pwd(current->fs, &file->f_path);
1305 + out_putf:
1306 +- fput(file);
1307 ++ fput_light(file, fput_needed);
1308 + out:
1309 + return error;
1310 + }
1311 +diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
1312 +index fbb0b47..d5378d0 100644
1313 +--- a/fs/ramfs/file-nommu.c
1314 ++++ b/fs/ramfs/file-nommu.c
1315 +@@ -110,6 +110,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
1316 +
1317 + /* prevent the page from being discarded on memory pressure */
1318 + SetPageDirty(page);
1319 ++ SetPageUptodate(page);
1320 +
1321 + unlock_page(page);
1322 + put_page(page);
1323 +diff --git a/fs/udf/super.c b/fs/udf/super.c
1324 +index 7f0e18a..a8e867a 100644
1325 +--- a/fs/udf/super.c
1326 ++++ b/fs/udf/super.c
1327 +@@ -56,6 +56,7 @@
1328 + #include <linux/seq_file.h>
1329 + #include <linux/bitmap.h>
1330 + #include <linux/crc-itu-t.h>
1331 ++#include <linux/log2.h>
1332 + #include <asm/byteorder.h>
1333 +
1334 + #include "udf_sb.h"
1335 +@@ -1244,16 +1245,65 @@ out_bh:
1336 + return ret;
1337 + }
1338 +
1339 ++static int udf_load_sparable_map(struct super_block *sb,
1340 ++ struct udf_part_map *map,
1341 ++ struct sparablePartitionMap *spm)
1342 ++{
1343 ++ uint32_t loc;
1344 ++ uint16_t ident;
1345 ++ struct sparingTable *st;
1346 ++ struct udf_sparing_data *sdata = &map->s_type_specific.s_sparing;
1347 ++ int i;
1348 ++ struct buffer_head *bh;
1349 ++
1350 ++ map->s_partition_type = UDF_SPARABLE_MAP15;
1351 ++ sdata->s_packet_len = le16_to_cpu(spm->packetLength);
1352 ++ if (!is_power_of_2(sdata->s_packet_len)) {
1353 ++ udf_error(sb, __func__, "error loading logical volume descriptor: "
1354 ++ "Invalid packet length %u\n",
1355 ++ (unsigned)sdata->s_packet_len);
1356 ++ return -EIO;
1357 ++ }
1358 ++ if (spm->numSparingTables > 4) {
1359 ++ udf_error(sb, __func__, "error loading logical volume descriptor: "
1360 ++ "Too many sparing tables (%d)\n",
1361 ++ (int)spm->numSparingTables);
1362 ++ return -EIO;
1363 ++ }
1364 ++
1365 ++ for (i = 0; i < spm->numSparingTables; i++) {
1366 ++ loc = le32_to_cpu(spm->locSparingTable[i]);
1367 ++ bh = udf_read_tagged(sb, loc, loc, &ident);
1368 ++ if (!bh)
1369 ++ continue;
1370 ++
1371 ++ st = (struct sparingTable *)bh->b_data;
1372 ++ if (ident != 0 ||
1373 ++ strncmp(st->sparingIdent.ident, UDF_ID_SPARING,
1374 ++ strlen(UDF_ID_SPARING)) ||
1375 ++ sizeof(*st) + le16_to_cpu(st->reallocationTableLen) >
1376 ++ sb->s_blocksize) {
1377 ++ brelse(bh);
1378 ++ continue;
1379 ++ }
1380 ++
1381 ++ sdata->s_spar_map[i] = bh;
1382 ++ }
1383 ++ map->s_partition_func = udf_get_pblock_spar15;
1384 ++ return 0;
1385 ++}
1386 ++
1387 + static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1388 + struct kernel_lb_addr *fileset)
1389 + {
1390 + struct logicalVolDesc *lvd;
1391 +- int i, j, offset;
1392 ++ int i, offset;
1393 + uint8_t type;
1394 + struct udf_sb_info *sbi = UDF_SB(sb);
1395 + struct genericPartitionMap *gpm;
1396 + uint16_t ident;
1397 + struct buffer_head *bh;
1398 ++ unsigned int table_len;
1399 + int ret = 0;
1400 +
1401 + bh = udf_read_tagged(sb, block, block, &ident);
1402 +@@ -1261,15 +1311,20 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1403 + return 1;
1404 + BUG_ON(ident != TAG_IDENT_LVD);
1405 + lvd = (struct logicalVolDesc *)bh->b_data;
1406 +-
1407 +- i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
1408 +- if (i != 0) {
1409 +- ret = i;
1410 ++ table_len = le32_to_cpu(lvd->mapTableLength);
1411 ++ if (sizeof(*lvd) + table_len > sb->s_blocksize) {
1412 ++ udf_error(sb, __func__, "error loading logical volume descriptor: "
1413 ++ "Partition table too long (%u > %lu)\n", table_len,
1414 ++ sb->s_blocksize - sizeof(*lvd));
1415 + goto out_bh;
1416 + }
1417 +
1418 ++ ret = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
1419 ++ if (ret)
1420 ++ goto out_bh;
1421 ++
1422 + for (i = 0, offset = 0;
1423 +- i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength);
1424 ++ i < sbi->s_partitions && offset < table_len;
1425 + i++, offset += gpm->partitionMapLength) {
1426 + struct udf_part_map *map = &sbi->s_partmaps[i];
1427 + gpm = (struct genericPartitionMap *)
1428 +@@ -1304,38 +1359,9 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1429 + } else if (!strncmp(upm2->partIdent.ident,
1430 + UDF_ID_SPARABLE,
1431 + strlen(UDF_ID_SPARABLE))) {
1432 +- uint32_t loc;
1433 +- struct sparingTable *st;
1434 +- struct sparablePartitionMap *spm =
1435 +- (struct sparablePartitionMap *)gpm;
1436 +-
1437 +- map->s_partition_type = UDF_SPARABLE_MAP15;
1438 +- map->s_type_specific.s_sparing.s_packet_len =
1439 +- le16_to_cpu(spm->packetLength);
1440 +- for (j = 0; j < spm->numSparingTables; j++) {
1441 +- struct buffer_head *bh2;
1442 +-
1443 +- loc = le32_to_cpu(
1444 +- spm->locSparingTable[j]);
1445 +- bh2 = udf_read_tagged(sb, loc, loc,
1446 +- &ident);
1447 +- map->s_type_specific.s_sparing.
1448 +- s_spar_map[j] = bh2;
1449 +-
1450 +- if (bh2 == NULL)
1451 +- continue;
1452 +-
1453 +- st = (struct sparingTable *)bh2->b_data;
1454 +- if (ident != 0 || strncmp(
1455 +- st->sparingIdent.ident,
1456 +- UDF_ID_SPARING,
1457 +- strlen(UDF_ID_SPARING))) {
1458 +- brelse(bh2);
1459 +- map->s_type_specific.s_sparing.
1460 +- s_spar_map[j] = NULL;
1461 +- }
1462 +- }
1463 +- map->s_partition_func = udf_get_pblock_spar15;
1464 ++ if (udf_load_sparable_map(sb, map,
1465 ++ (struct sparablePartitionMap *)gpm) < 0)
1466 ++ goto out_bh;
1467 + } else if (!strncmp(upm2->partIdent.ident,
1468 + UDF_ID_METADATA,
1469 + strlen(UDF_ID_METADATA))) {
1470 +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
1471 +index 9f7c3eb..aa2d80b 100644
1472 +--- a/include/linux/mmzone.h
1473 ++++ b/include/linux/mmzone.h
1474 +@@ -633,7 +633,7 @@ typedef struct pglist_data {
1475 + range, including holes */
1476 + int node_id;
1477 + wait_queue_head_t kswapd_wait;
1478 +- struct task_struct *kswapd;
1479 ++ struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */
1480 + int kswapd_max_order;
1481 + enum zone_type classzone_idx;
1482 + } pg_data_t;
1483 +diff --git a/include/linux/pci.h b/include/linux/pci.h
1484 +index ff5970b..c446b5c 100644
1485 +--- a/include/linux/pci.h
1486 ++++ b/include/linux/pci.h
1487 +@@ -174,8 +174,6 @@ enum pci_dev_flags {
1488 + PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1,
1489 + /* Device configuration is irrevocably lost if disabled into D3 */
1490 + PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2,
1491 +- /* Device causes system crash if in D3 during S3 sleep */
1492 +- PCI_DEV_FLAGS_NO_D3_DURING_SLEEP = (__force pci_dev_flags_t) 8,
1493 + };
1494 +
1495 + enum pci_irq_reroute_variant {
1496 +diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
1497 +index abd4436..4af554b 100644
1498 +--- a/include/net/cipso_ipv4.h
1499 ++++ b/include/net/cipso_ipv4.h
1500 +@@ -42,6 +42,7 @@
1501 + #include <net/netlabel.h>
1502 + #include <net/request_sock.h>
1503 + #include <asm/atomic.h>
1504 ++#include <asm/unaligned.h>
1505 +
1506 + /* known doi values */
1507 + #define CIPSO_V4_DOI_UNKNOWN 0x00000000
1508 +@@ -285,7 +286,33 @@ static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
1509 + static inline int cipso_v4_validate(const struct sk_buff *skb,
1510 + unsigned char **option)
1511 + {
1512 +- return -ENOSYS;
1513 ++ unsigned char *opt = *option;
1514 ++ unsigned char err_offset = 0;
1515 ++ u8 opt_len = opt[1];
1516 ++ u8 opt_iter;
1517 ++
1518 ++ if (opt_len < 8) {
1519 ++ err_offset = 1;
1520 ++ goto out;
1521 ++ }
1522 ++
1523 ++ if (get_unaligned_be32(&opt[2]) == 0) {
1524 ++ err_offset = 2;
1525 ++ goto out;
1526 ++ }
1527 ++
1528 ++ for (opt_iter = 6; opt_iter < opt_len;) {
1529 ++ if (opt[opt_iter + 1] > (opt_len - opt_iter)) {
1530 ++ err_offset = opt_iter + 1;
1531 ++ goto out;
1532 ++ }
1533 ++ opt_iter += opt[opt_iter + 1];
1534 ++ }
1535 ++
1536 ++out:
1537 ++ *option = opt + err_offset;
1538 ++ return err_offset;
1539 ++
1540 + }
1541 + #endif /* CONFIG_NETLABEL */
1542 +
1543 +diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
1544 +index f1fbe2d..af2e047 100644
1545 +--- a/include/net/sch_generic.h
1546 ++++ b/include/net/sch_generic.h
1547 +@@ -219,13 +219,16 @@ struct tcf_proto {
1548 +
1549 + struct qdisc_skb_cb {
1550 + unsigned int pkt_len;
1551 +- unsigned char data[24];
1552 ++ u16 bond_queue_mapping;
1553 ++ u16 _pad;
1554 ++ unsigned char data[20];
1555 + };
1556 +
1557 + static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
1558 + {
1559 + struct qdisc_skb_cb *qcb;
1560 +- BUILD_BUG_ON(sizeof(skb->cb) < sizeof(unsigned int) + sz);
1561 ++
1562 ++ BUILD_BUG_ON(sizeof(skb->cb) < offsetof(struct qdisc_skb_cb, data) + sz);
1563 + BUILD_BUG_ON(sizeof(qcb->data) < sz);
1564 + }
1565 +
1566 +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
1567 +index 0731e81a..672a749 100644
1568 +--- a/kernel/trace/trace.c
1569 ++++ b/kernel/trace/trace.c
1570 +@@ -2432,10 +2432,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
1571 + if (cpumask_test_cpu(cpu, tracing_cpumask) &&
1572 + !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
1573 + atomic_inc(&global_trace.data[cpu]->disabled);
1574 ++ ring_buffer_record_disable_cpu(global_trace.buffer, cpu);
1575 + }
1576 + if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
1577 + cpumask_test_cpu(cpu, tracing_cpumask_new)) {
1578 + atomic_dec(&global_trace.data[cpu]->disabled);
1579 ++ ring_buffer_record_enable_cpu(global_trace.buffer, cpu);
1580 + }
1581 + }
1582 + arch_spin_unlock(&ftrace_max_lock);
1583 +diff --git a/mm/compaction.c b/mm/compaction.c
1584 +index c4bc5ac..adc5336 100644
1585 +--- a/mm/compaction.c
1586 ++++ b/mm/compaction.c
1587 +@@ -596,8 +596,11 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
1588 + if (err) {
1589 + putback_lru_pages(&cc->migratepages);
1590 + cc->nr_migratepages = 0;
1591 ++ if (err == -ENOMEM) {
1592 ++ ret = COMPACT_PARTIAL;
1593 ++ goto out;
1594 ++ }
1595 + }
1596 +-
1597 + }
1598 +
1599 + out:
1600 +diff --git a/mm/madvise.c b/mm/madvise.c
1601 +index 2221491..deabe5f6 100644
1602 +--- a/mm/madvise.c
1603 ++++ b/mm/madvise.c
1604 +@@ -13,6 +13,7 @@
1605 + #include <linux/hugetlb.h>
1606 + #include <linux/sched.h>
1607 + #include <linux/ksm.h>
1608 ++#include <linux/file.h>
1609 +
1610 + /*
1611 + * Any behaviour which results in changes to the vma->vm_flags needs to
1612 +@@ -197,14 +198,16 @@ static long madvise_remove(struct vm_area_struct *vma,
1613 + struct address_space *mapping;
1614 + loff_t offset, endoff;
1615 + int error;
1616 ++ struct file *f;
1617 +
1618 + *prev = NULL; /* tell sys_madvise we drop mmap_sem */
1619 +
1620 + if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
1621 + return -EINVAL;
1622 +
1623 +- if (!vma->vm_file || !vma->vm_file->f_mapping
1624 +- || !vma->vm_file->f_mapping->host) {
1625 ++ f = vma->vm_file;
1626 ++
1627 ++ if (!f || !f->f_mapping || !f->f_mapping->host) {
1628 + return -EINVAL;
1629 + }
1630 +
1631 +@@ -218,9 +221,16 @@ static long madvise_remove(struct vm_area_struct *vma,
1632 + endoff = (loff_t)(end - vma->vm_start - 1)
1633 + + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
1634 +
1635 +- /* vmtruncate_range needs to take i_mutex and i_alloc_sem */
1636 ++ /*
1637 ++ * vmtruncate_range may need to take i_mutex and i_alloc_sem.
1638 ++ * We need to explicitly grab a reference because the vma (and
1639 ++ * hence the vma's reference to the file) can go away as soon as
1640 ++ * we drop mmap_sem.
1641 ++ */
1642 ++ get_file(f);
1643 + up_read(&current->mm->mmap_sem);
1644 + error = vmtruncate_range(mapping->host, offset, endoff);
1645 ++ fput(f);
1646 + down_read(&current->mm->mmap_sem);
1647 + return error;
1648 + }
1649 +diff --git a/mm/vmscan.c b/mm/vmscan.c
1650 +index 769935d..1b0ed36 100644
1651 +--- a/mm/vmscan.c
1652 ++++ b/mm/vmscan.c
1653 +@@ -2952,14 +2952,17 @@ int kswapd_run(int nid)
1654 + }
1655 +
1656 + /*
1657 +- * Called by memory hotplug when all memory in a node is offlined.
1658 ++ * Called by memory hotplug when all memory in a node is offlined. Caller must
1659 ++ * hold lock_memory_hotplug().
1660 + */
1661 + void kswapd_stop(int nid)
1662 + {
1663 + struct task_struct *kswapd = NODE_DATA(nid)->kswapd;
1664 +
1665 +- if (kswapd)
1666 ++ if (kswapd) {
1667 + kthread_stop(kswapd);
1668 ++ NODE_DATA(nid)->kswapd = NULL;
1669 ++ }
1670 + }
1671 +
1672 + static int __init kswapd_init(void)
1673 +diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
1674 +index 4490873..eae6a4e 100644
1675 +--- a/net/bridge/br_if.c
1676 ++++ b/net/bridge/br_if.c
1677 +@@ -241,6 +241,7 @@ int br_add_bridge(struct net *net, const char *name)
1678 + return -ENOMEM;
1679 +
1680 + dev_net_set(dev, net);
1681 ++ dev->rtnl_link_ops = &br_link_ops;
1682 +
1683 + res = register_netdev(dev);
1684 + if (res)
1685 +diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
1686 +index 2c16055..71861a9 100644
1687 +--- a/net/bridge/br_netlink.c
1688 ++++ b/net/bridge/br_netlink.c
1689 +@@ -203,7 +203,7 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
1690 + return 0;
1691 + }
1692 +
1693 +-static struct rtnl_link_ops br_link_ops __read_mostly = {
1694 ++struct rtnl_link_ops br_link_ops __read_mostly = {
1695 + .kind = "bridge",
1696 + .priv_size = sizeof(struct net_bridge),
1697 + .setup = br_dev_setup,
1698 +diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
1699 +index 1ca1b1c..7c1f3a0 100644
1700 +--- a/net/bridge/br_private.h
1701 ++++ b/net/bridge/br_private.h
1702 +@@ -529,6 +529,7 @@ extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr)
1703 + #endif
1704 +
1705 + /* br_netlink.c */
1706 ++extern struct rtnl_link_ops br_link_ops;
1707 + extern int br_netlink_init(void);
1708 + extern void br_netlink_fini(void);
1709 + extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
1710 +diff --git a/net/core/ethtool.c b/net/core/ethtool.c
1711 +index 4fb7704..891b19f 100644
1712 +--- a/net/core/ethtool.c
1713 ++++ b/net/core/ethtool.c
1714 +@@ -1964,6 +1964,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1715 + case ETHTOOL_GRXCSUM:
1716 + case ETHTOOL_GTXCSUM:
1717 + case ETHTOOL_GSG:
1718 ++ case ETHTOOL_GSSET_INFO:
1719 + case ETHTOOL_GSTRINGS:
1720 + case ETHTOOL_GTSO:
1721 + case ETHTOOL_GPERMADDR:
1722 +diff --git a/net/core/netpoll.c b/net/core/netpoll.c
1723 +index 05db410..207a178 100644
1724 +--- a/net/core/netpoll.c
1725 ++++ b/net/core/netpoll.c
1726 +@@ -357,22 +357,23 @@ EXPORT_SYMBOL(netpoll_send_skb_on_dev);
1727 +
1728 + void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
1729 + {
1730 +- int total_len, eth_len, ip_len, udp_len;
1731 ++ int total_len, ip_len, udp_len;
1732 + struct sk_buff *skb;
1733 + struct udphdr *udph;
1734 + struct iphdr *iph;
1735 + struct ethhdr *eth;
1736 +
1737 + udp_len = len + sizeof(*udph);
1738 +- ip_len = eth_len = udp_len + sizeof(*iph);
1739 +- total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;
1740 ++ ip_len = udp_len + sizeof(*iph);
1741 ++ total_len = ip_len + LL_RESERVED_SPACE(np->dev);
1742 +
1743 +- skb = find_skb(np, total_len, total_len - len);
1744 ++ skb = find_skb(np, total_len + np->dev->needed_tailroom,
1745 ++ total_len - len);
1746 + if (!skb)
1747 + return;
1748 +
1749 + skb_copy_to_linear_data(skb, msg, len);
1750 +- skb->len += len;
1751 ++ skb_put(skb, len);
1752 +
1753 + skb_push(skb, sizeof(*udph));
1754 + skb_reset_transport_header(skb);
1755 +diff --git a/net/core/sock.c b/net/core/sock.c
1756 +index aebb419..b4bb59a 100644
1757 +--- a/net/core/sock.c
1758 ++++ b/net/core/sock.c
1759 +@@ -1501,6 +1501,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1760 + gfp_t gfp_mask;
1761 + long timeo;
1762 + int err;
1763 ++ int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1764 ++
1765 ++ err = -EMSGSIZE;
1766 ++ if (npages > MAX_SKB_FRAGS)
1767 ++ goto failure;
1768 +
1769 + gfp_mask = sk->sk_allocation;
1770 + if (gfp_mask & __GFP_WAIT)
1771 +@@ -1519,14 +1524,12 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1772 + if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1773 + skb = alloc_skb(header_len, gfp_mask);
1774 + if (skb) {
1775 +- int npages;
1776 + int i;
1777 +
1778 + /* No pages, we're done... */
1779 + if (!data_len)
1780 + break;
1781 +
1782 +- npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1783 + skb->truesize += data_len;
1784 + skb_shinfo(skb)->nr_frags = npages;
1785 + for (i = 0; i < npages; i++) {
1786 +diff --git a/net/ipv6/route.c b/net/ipv6/route.c
1787 +index 8e600f8..7c5b4cb 100644
1788 +--- a/net/ipv6/route.c
1789 ++++ b/net/ipv6/route.c
1790 +@@ -2846,10 +2846,6 @@ static int __net_init ip6_route_net_init(struct net *net)
1791 + net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
1792 + net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
1793 +
1794 +-#ifdef CONFIG_PROC_FS
1795 +- proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
1796 +- proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
1797 +-#endif
1798 + net->ipv6.ip6_rt_gc_expire = 30*HZ;
1799 +
1800 + ret = 0;
1801 +@@ -2870,10 +2866,6 @@ out_ip6_dst_ops:
1802 +
1803 + static void __net_exit ip6_route_net_exit(struct net *net)
1804 + {
1805 +-#ifdef CONFIG_PROC_FS
1806 +- proc_net_remove(net, "ipv6_route");
1807 +- proc_net_remove(net, "rt6_stats");
1808 +-#endif
1809 + kfree(net->ipv6.ip6_null_entry);
1810 + #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1811 + kfree(net->ipv6.ip6_prohibit_entry);
1812 +@@ -2882,11 +2874,33 @@ static void __net_exit ip6_route_net_exit(struct net *net)
1813 + dst_entries_destroy(&net->ipv6.ip6_dst_ops);
1814 + }
1815 +
1816 ++static int __net_init ip6_route_net_init_late(struct net *net)
1817 ++{
1818 ++#ifdef CONFIG_PROC_FS
1819 ++ proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
1820 ++ proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
1821 ++#endif
1822 ++ return 0;
1823 ++}
1824 ++
1825 ++static void __net_exit ip6_route_net_exit_late(struct net *net)
1826 ++{
1827 ++#ifdef CONFIG_PROC_FS
1828 ++ proc_net_remove(net, "ipv6_route");
1829 ++ proc_net_remove(net, "rt6_stats");
1830 ++#endif
1831 ++}
1832 ++
1833 + static struct pernet_operations ip6_route_net_ops = {
1834 + .init = ip6_route_net_init,
1835 + .exit = ip6_route_net_exit,
1836 + };
1837 +
1838 ++static struct pernet_operations ip6_route_net_late_ops = {
1839 ++ .init = ip6_route_net_init_late,
1840 ++ .exit = ip6_route_net_exit_late,
1841 ++};
1842 ++
1843 + static struct notifier_block ip6_route_dev_notifier = {
1844 + .notifier_call = ip6_route_dev_notify,
1845 + .priority = 0,
1846 +@@ -2936,19 +2950,25 @@ int __init ip6_route_init(void)
1847 + if (ret)
1848 + goto xfrm6_init;
1849 +
1850 ++ ret = register_pernet_subsys(&ip6_route_net_late_ops);
1851 ++ if (ret)
1852 ++ goto fib6_rules_init;
1853 ++
1854 + ret = -ENOBUFS;
1855 + if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
1856 + __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
1857 + __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
1858 +- goto fib6_rules_init;
1859 ++ goto out_register_late_subsys;
1860 +
1861 + ret = register_netdevice_notifier(&ip6_route_dev_notifier);
1862 + if (ret)
1863 +- goto fib6_rules_init;
1864 ++ goto out_register_late_subsys;
1865 +
1866 + out:
1867 + return ret;
1868 +
1869 ++out_register_late_subsys:
1870 ++ unregister_pernet_subsys(&ip6_route_net_late_ops);
1871 + fib6_rules_init:
1872 + fib6_rules_cleanup();
1873 + xfrm6_init:
1874 +@@ -2967,6 +2987,7 @@ out_kmem_cache:
1875 + void ip6_route_cleanup(void)
1876 + {
1877 + unregister_netdevice_notifier(&ip6_route_dev_notifier);
1878 ++ unregister_pernet_subsys(&ip6_route_net_late_ops);
1879 + fib6_rules_cleanup();
1880 + xfrm6_fini();
1881 + fib6_gc_cleanup();
1882 +diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
1883 +index d2726a7..3c55f63 100644
1884 +--- a/net/l2tp/l2tp_eth.c
1885 ++++ b/net/l2tp/l2tp_eth.c
1886 +@@ -167,6 +167,7 @@ static void l2tp_eth_delete(struct l2tp_session *session)
1887 + if (dev) {
1888 + unregister_netdev(dev);
1889 + spriv->dev = NULL;
1890 ++ module_put(THIS_MODULE);
1891 + }
1892 + }
1893 + }
1894 +@@ -254,6 +255,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
1895 + if (rc < 0)
1896 + goto out_del_dev;
1897 +
1898 ++ __module_get(THIS_MODULE);
1899 + /* Must be done after register_netdev() */
1900 + strlcpy(session->ifname, dev->name, IFNAMSIZ);
1901 +
1902 +diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
1903 +index 4100065..667f559 100644
1904 +--- a/net/mac80211/rx.c
1905 ++++ b/net/mac80211/rx.c
1906 +@@ -2291,7 +2291,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
1907 + * frames that we didn't handle, including returning unknown
1908 + * ones. For all other modes we will return them to the sender,
1909 + * setting the 0x80 bit in the action category, as required by
1910 +- * 802.11-2007 7.3.1.11.
1911 ++ * 802.11-2012 9.24.4.
1912 + * Newer versions of hostapd shall also use the management frame
1913 + * registration mechanisms, but older ones still use cooked
1914 + * monitor interfaces so push all frames there.
1915 +@@ -2301,6 +2301,9 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
1916 + sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
1917 + return RX_DROP_MONITOR;
1918 +
1919 ++ if (is_multicast_ether_addr(mgmt->da))
1920 ++ return RX_DROP_MONITOR;
1921 ++
1922 + /* do not return rejected action frames */
1923 + if (mgmt->u.action.category & 0x80)
1924 + return RX_DROP_UNUSABLE;
1925 +diff --git a/net/wireless/reg.c b/net/wireless/reg.c
1926 +index 7457697..90b73d1 100644
1927 +--- a/net/wireless/reg.c
1928 ++++ b/net/wireless/reg.c
1929 +@@ -1358,7 +1358,7 @@ static void reg_set_request_processed(void)
1930 + spin_unlock(&reg_requests_lock);
1931 +
1932 + if (last_request->initiator == NL80211_REGDOM_SET_BY_USER)
1933 +- cancel_delayed_work_sync(&reg_timeout);
1934 ++ cancel_delayed_work(&reg_timeout);
1935 +
1936 + if (need_more_processing)
1937 + schedule_work(&reg_work);
1938 +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
1939 +index 51412e1..baa7a49 100644
1940 +--- a/sound/pci/hda/patch_realtek.c
1941 ++++ b/sound/pci/hda/patch_realtek.c
1942 +@@ -20132,6 +20132,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = {
1943 + { .id = 0x10ec0272, .name = "ALC272", .patch = patch_alc662 },
1944 + { .id = 0x10ec0275, .name = "ALC275", .patch = patch_alc269 },
1945 + { .id = 0x10ec0276, .name = "ALC276", .patch = patch_alc269 },
1946 ++ { .id = 0x10ec0280, .name = "ALC280", .patch = patch_alc269 },
1947 + { .id = 0x10ec0861, .rev = 0x100340, .name = "ALC660",
1948 + .patch = patch_alc861 },
1949 + { .id = 0x10ec0660, .name = "ALC660-VD", .patch = patch_alc861vd },
1950 +diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c
1951 +index 789453d..0b08bb7 100644
1952 +--- a/sound/soc/codecs/tlv320aic3x.c
1953 ++++ b/sound/soc/codecs/tlv320aic3x.c
1954 +@@ -949,9 +949,7 @@ static int aic3x_hw_params(struct snd_pcm_substream *substream,
1955 + }
1956 +
1957 + found:
1958 +- data = snd_soc_read(codec, AIC3X_PLL_PROGA_REG);
1959 +- snd_soc_write(codec, AIC3X_PLL_PROGA_REG,
1960 +- data | (pll_p << PLLP_SHIFT));
1961 ++ snd_soc_update_bits(codec, AIC3X_PLL_PROGA_REG, PLLP_MASK, pll_p);
1962 + snd_soc_write(codec, AIC3X_OVRF_STATUS_AND_PLLR_REG,
1963 + pll_r << PLLR_SHIFT);
1964 + snd_soc_write(codec, AIC3X_PLL_PROGB_REG, pll_j << PLLJ_SHIFT);
1965 +diff --git a/sound/soc/codecs/tlv320aic3x.h b/sound/soc/codecs/tlv320aic3x.h
1966 +index 06a1978..16d9999 100644
1967 +--- a/sound/soc/codecs/tlv320aic3x.h
1968 ++++ b/sound/soc/codecs/tlv320aic3x.h
1969 +@@ -166,6 +166,7 @@
1970 +
1971 + /* PLL registers bitfields */
1972 + #define PLLP_SHIFT 0
1973 ++#define PLLP_MASK 7
1974 + #define PLLQ_SHIFT 3
1975 + #define PLLR_SHIFT 0
1976 + #define PLLJ_SHIFT 2
1977
1978 Added: genpatches-2.6/trunk/3.0/1037_linux-3.0.38.patch
1979 ===================================================================
1980 --- genpatches-2.6/trunk/3.0/1037_linux-3.0.38.patch (rev 0)
1981 +++ genpatches-2.6/trunk/3.0/1037_linux-3.0.38.patch 2012-08-01 23:35:55 UTC (rev 2182)
1982 @@ -0,0 +1,901 @@
1983 +diff --git a/Makefile b/Makefile
1984 +index 009160e..5fdfaa8 100644
1985 +--- a/Makefile
1986 ++++ b/Makefile
1987 +@@ -1,6 +1,6 @@
1988 + VERSION = 3
1989 + PATCHLEVEL = 0
1990 +-SUBLEVEL = 37
1991 ++SUBLEVEL = 38
1992 + EXTRAVERSION =
1993 + NAME = Sneaky Weasel
1994 +
1995 +diff --git a/arch/arm/plat-samsung/adc.c b/arch/arm/plat-samsung/adc.c
1996 +index e8f2be2..df14954 100644
1997 +--- a/arch/arm/plat-samsung/adc.c
1998 ++++ b/arch/arm/plat-samsung/adc.c
1999 +@@ -143,11 +143,13 @@ int s3c_adc_start(struct s3c_adc_client *client,
2000 + return -EINVAL;
2001 + }
2002 +
2003 +- if (client->is_ts && adc->ts_pend)
2004 +- return -EAGAIN;
2005 +-
2006 + spin_lock_irqsave(&adc->lock, flags);
2007 +
2008 ++ if (client->is_ts && adc->ts_pend) {
2009 ++ spin_unlock_irqrestore(&adc->lock, flags);
2010 ++ return -EAGAIN;
2011 ++ }
2012 ++
2013 + client->channel = channel;
2014 + client->nr_samples = nr_samples;
2015 +
2016 +diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
2017 +index 5f52477..b358c87 100644
2018 +--- a/drivers/hwmon/it87.c
2019 ++++ b/drivers/hwmon/it87.c
2020 +@@ -2057,7 +2057,7 @@ static void __devinit it87_init_device(struct platform_device *pdev)
2021 +
2022 + /* Start monitoring */
2023 + it87_write_value(data, IT87_REG_CONFIG,
2024 +- (it87_read_value(data, IT87_REG_CONFIG) & 0x36)
2025 ++ (it87_read_value(data, IT87_REG_CONFIG) & 0x3e)
2026 + | (update_vbat ? 0x41 : 0x01));
2027 + }
2028 +
2029 +diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
2030 +index 56abf3d..92c7be1 100644
2031 +--- a/drivers/input/joystick/xpad.c
2032 ++++ b/drivers/input/joystick/xpad.c
2033 +@@ -142,6 +142,7 @@ static const struct xpad_device {
2034 + { 0x0c12, 0x880a, "Pelican Eclipse PL-2023", 0, XTYPE_XBOX },
2035 + { 0x0c12, 0x8810, "Zeroplus Xbox Controller", 0, XTYPE_XBOX },
2036 + { 0x0c12, 0x9902, "HAMA VibraX - *FAULTY HARDWARE*", 0, XTYPE_XBOX },
2037 ++ { 0x0d2f, 0x0002, "Andamiro Pump It Up pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
2038 + { 0x0e4c, 0x1097, "Radica Gamester Controller", 0, XTYPE_XBOX },
2039 + { 0x0e4c, 0x2390, "Radica Games Jtech Controller", 0, XTYPE_XBOX },
2040 + { 0x0e6f, 0x0003, "Logic3 Freebird wireless Controller", 0, XTYPE_XBOX },
2041 +diff --git a/drivers/media/dvb/dvb-core/dvbdev.c b/drivers/media/dvb/dvb-core/dvbdev.c
2042 +index f732877..d5cda35 100644
2043 +--- a/drivers/media/dvb/dvb-core/dvbdev.c
2044 ++++ b/drivers/media/dvb/dvb-core/dvbdev.c
2045 +@@ -243,6 +243,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
2046 + if (minor == MAX_DVB_MINORS) {
2047 + kfree(dvbdevfops);
2048 + kfree(dvbdev);
2049 ++ up_write(&minor_rwsem);
2050 + mutex_unlock(&dvbdev_register_lock);
2051 + return -EINVAL;
2052 + }
2053 +diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
2054 +index 357e8c5..63c8048 100644
2055 +--- a/drivers/mtd/nand/nandsim.c
2056 ++++ b/drivers/mtd/nand/nandsim.c
2057 +@@ -28,7 +28,7 @@
2058 + #include <linux/module.h>
2059 + #include <linux/moduleparam.h>
2060 + #include <linux/vmalloc.h>
2061 +-#include <asm/div64.h>
2062 ++#include <linux/math64.h>
2063 + #include <linux/slab.h>
2064 + #include <linux/errno.h>
2065 + #include <linux/string.h>
2066 +@@ -547,12 +547,6 @@ static char *get_partition_name(int i)
2067 + return kstrdup(buf, GFP_KERNEL);
2068 + }
2069 +
2070 +-static uint64_t divide(uint64_t n, uint32_t d)
2071 +-{
2072 +- do_div(n, d);
2073 +- return n;
2074 +-}
2075 +-
2076 + /*
2077 + * Initialize the nandsim structure.
2078 + *
2079 +@@ -581,7 +575,7 @@ static int init_nandsim(struct mtd_info *mtd)
2080 + ns->geom.oobsz = mtd->oobsize;
2081 + ns->geom.secsz = mtd->erasesize;
2082 + ns->geom.pgszoob = ns->geom.pgsz + ns->geom.oobsz;
2083 +- ns->geom.pgnum = divide(ns->geom.totsz, ns->geom.pgsz);
2084 ++ ns->geom.pgnum = div_u64(ns->geom.totsz, ns->geom.pgsz);
2085 + ns->geom.totszoob = ns->geom.totsz + (uint64_t)ns->geom.pgnum * ns->geom.oobsz;
2086 + ns->geom.secshift = ffs(ns->geom.secsz) - 1;
2087 + ns->geom.pgshift = chip->page_shift;
2088 +@@ -924,7 +918,7 @@ static int setup_wear_reporting(struct mtd_info *mtd)
2089 +
2090 + if (!rptwear)
2091 + return 0;
2092 +- wear_eb_count = divide(mtd->size, mtd->erasesize);
2093 ++ wear_eb_count = div_u64(mtd->size, mtd->erasesize);
2094 + mem = wear_eb_count * sizeof(unsigned long);
2095 + if (mem / sizeof(unsigned long) != wear_eb_count) {
2096 + NS_ERR("Too many erase blocks for wear reporting\n");
2097 +diff --git a/drivers/net/e1000e/82571.c b/drivers/net/e1000e/82571.c
2098 +index 8295f21..5278e84 100644
2099 +--- a/drivers/net/e1000e/82571.c
2100 ++++ b/drivers/net/e1000e/82571.c
2101 +@@ -1573,6 +1573,9 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw)
2102 + ctrl = er32(CTRL);
2103 + status = er32(STATUS);
2104 + rxcw = er32(RXCW);
2105 ++ /* SYNCH bit and IV bit are sticky */
2106 ++ udelay(10);
2107 ++ rxcw = er32(RXCW);
2108 +
2109 + if ((rxcw & E1000_RXCW_SYNCH) && !(rxcw & E1000_RXCW_IV)) {
2110 +
2111 +diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c
2112 +index 54f0b13..99fa416 100644
2113 +--- a/drivers/net/wireless/rt2x00/rt2x00usb.c
2114 ++++ b/drivers/net/wireless/rt2x00/rt2x00usb.c
2115 +@@ -426,8 +426,8 @@ void rt2x00usb_kick_queue(struct data_queue *queue)
2116 + case QID_RX:
2117 + if (!rt2x00queue_full(queue))
2118 + rt2x00queue_for_each_entry(queue,
2119 +- Q_INDEX_DONE,
2120 + Q_INDEX,
2121 ++ Q_INDEX_DONE,
2122 + NULL,
2123 + rt2x00usb_kick_rx_entry);
2124 + break;
2125 +diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
2126 +index 5ffe7c3..e66bbba 100644
2127 +--- a/drivers/platform/x86/intel_ips.c
2128 ++++ b/drivers/platform/x86/intel_ips.c
2129 +@@ -72,6 +72,7 @@
2130 + #include <linux/string.h>
2131 + #include <linux/tick.h>
2132 + #include <linux/timer.h>
2133 ++#include <linux/dmi.h>
2134 + #include <drm/i915_drm.h>
2135 + #include <asm/msr.h>
2136 + #include <asm/processor.h>
2137 +@@ -1505,6 +1506,24 @@ static DEFINE_PCI_DEVICE_TABLE(ips_id_table) = {
2138 +
2139 + MODULE_DEVICE_TABLE(pci, ips_id_table);
2140 +
2141 ++static int ips_blacklist_callback(const struct dmi_system_id *id)
2142 ++{
2143 ++ pr_info("Blacklisted intel_ips for %s\n", id->ident);
2144 ++ return 1;
2145 ++}
2146 ++
2147 ++static const struct dmi_system_id ips_blacklist[] = {
2148 ++ {
2149 ++ .callback = ips_blacklist_callback,
2150 ++ .ident = "HP ProBook",
2151 ++ .matches = {
2152 ++ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
2153 ++ DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook"),
2154 ++ },
2155 ++ },
2156 ++ { } /* terminating entry */
2157 ++};
2158 ++
2159 + static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
2160 + {
2161 + u64 platform_info;
2162 +@@ -1514,6 +1533,9 @@ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
2163 + u16 htshi, trc, trc_required_mask;
2164 + u8 tse;
2165 +
2166 ++ if (dmi_check_system(ips_blacklist))
2167 ++ return -ENODEV;
2168 ++
2169 + ips = kzalloc(sizeof(struct ips_driver), GFP_KERNEL);
2170 + if (!ips)
2171 + return -ENOMEM;
2172 +diff --git a/fs/buffer.c b/fs/buffer.c
2173 +index 330cbce..d421626 100644
2174 +--- a/fs/buffer.c
2175 ++++ b/fs/buffer.c
2176 +@@ -1084,6 +1084,9 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
2177 + static struct buffer_head *
2178 + __getblk_slow(struct block_device *bdev, sector_t block, int size)
2179 + {
2180 ++ int ret;
2181 ++ struct buffer_head *bh;
2182 ++
2183 + /* Size must be multiple of hard sectorsize */
2184 + if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
2185 + (size < 512 || size > PAGE_SIZE))) {
2186 +@@ -1096,20 +1099,21 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
2187 + return NULL;
2188 + }
2189 +
2190 +- for (;;) {
2191 +- struct buffer_head * bh;
2192 +- int ret;
2193 ++retry:
2194 ++ bh = __find_get_block(bdev, block, size);
2195 ++ if (bh)
2196 ++ return bh;
2197 +
2198 ++ ret = grow_buffers(bdev, block, size);
2199 ++ if (ret == 0) {
2200 ++ free_more_memory();
2201 ++ goto retry;
2202 ++ } else if (ret > 0) {
2203 + bh = __find_get_block(bdev, block, size);
2204 + if (bh)
2205 + return bh;
2206 +-
2207 +- ret = grow_buffers(bdev, block, size);
2208 +- if (ret < 0)
2209 +- return NULL;
2210 +- if (ret == 0)
2211 +- free_more_memory();
2212 + }
2213 ++ return NULL;
2214 + }
2215 +
2216 + /*
2217 +diff --git a/fs/fifo.c b/fs/fifo.c
2218 +index b1a524d..cf6f434 100644
2219 +--- a/fs/fifo.c
2220 ++++ b/fs/fifo.c
2221 +@@ -14,7 +14,7 @@
2222 + #include <linux/sched.h>
2223 + #include <linux/pipe_fs_i.h>
2224 +
2225 +-static void wait_for_partner(struct inode* inode, unsigned int *cnt)
2226 ++static int wait_for_partner(struct inode* inode, unsigned int *cnt)
2227 + {
2228 + int cur = *cnt;
2229 +
2230 +@@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt)
2231 + if (signal_pending(current))
2232 + break;
2233 + }
2234 ++ return cur == *cnt ? -ERESTARTSYS : 0;
2235 + }
2236 +
2237 + static void wake_up_partner(struct inode* inode)
2238 +@@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
2239 + * seen a writer */
2240 + filp->f_version = pipe->w_counter;
2241 + } else {
2242 +- wait_for_partner(inode, &pipe->w_counter);
2243 +- if(signal_pending(current))
2244 ++ if (wait_for_partner(inode, &pipe->w_counter))
2245 + goto err_rd;
2246 + }
2247 + }
2248 +@@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
2249 + wake_up_partner(inode);
2250 +
2251 + if (!pipe->readers) {
2252 +- wait_for_partner(inode, &pipe->r_counter);
2253 +- if (signal_pending(current))
2254 ++ if (wait_for_partner(inode, &pipe->r_counter))
2255 + goto err_wr;
2256 + }
2257 + break;
2258 +diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
2259 +index fd0dc30..cc07d27 100644
2260 +--- a/include/linux/hrtimer.h
2261 ++++ b/include/linux/hrtimer.h
2262 +@@ -165,6 +165,7 @@ enum hrtimer_base_type {
2263 + * @lock: lock protecting the base and associated clock bases
2264 + * and timers
2265 + * @active_bases: Bitfield to mark bases with active timers
2266 ++ * @clock_was_set: Indicates that clock was set from irq context.
2267 + * @expires_next: absolute time of the next event which was scheduled
2268 + * via clock_set_next_event()
2269 + * @hres_active: State of high resolution mode
2270 +@@ -177,7 +178,8 @@ enum hrtimer_base_type {
2271 + */
2272 + struct hrtimer_cpu_base {
2273 + raw_spinlock_t lock;
2274 +- unsigned long active_bases;
2275 ++ unsigned int active_bases;
2276 ++ unsigned int clock_was_set;
2277 + #ifdef CONFIG_HIGH_RES_TIMERS
2278 + ktime_t expires_next;
2279 + int hres_active;
2280 +@@ -286,6 +288,8 @@ extern void hrtimer_peek_ahead_timers(void);
2281 + # define MONOTONIC_RES_NSEC HIGH_RES_NSEC
2282 + # define KTIME_MONOTONIC_RES KTIME_HIGH_RES
2283 +
2284 ++extern void clock_was_set_delayed(void);
2285 ++
2286 + #else
2287 +
2288 + # define MONOTONIC_RES_NSEC LOW_RES_NSEC
2289 +@@ -306,6 +310,9 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
2290 + {
2291 + return 0;
2292 + }
2293 ++
2294 ++static inline void clock_was_set_delayed(void) { }
2295 ++
2296 + #endif
2297 +
2298 + extern void clock_was_set(void);
2299 +@@ -320,6 +327,7 @@ extern ktime_t ktime_get(void);
2300 + extern ktime_t ktime_get_real(void);
2301 + extern ktime_t ktime_get_boottime(void);
2302 + extern ktime_t ktime_get_monotonic_offset(void);
2303 ++extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot);
2304 +
2305 + DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
2306 +
2307 +diff --git a/include/linux/timex.h b/include/linux/timex.h
2308 +index aa60fe7..08e90fb 100644
2309 +--- a/include/linux/timex.h
2310 ++++ b/include/linux/timex.h
2311 +@@ -266,7 +266,7 @@ static inline int ntp_synced(void)
2312 + /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
2313 + extern u64 tick_length;
2314 +
2315 +-extern void second_overflow(void);
2316 ++extern int second_overflow(unsigned long secs);
2317 + extern void update_ntp_one_tick(void);
2318 + extern int do_adjtimex(struct timex *);
2319 + extern void hardpps(const struct timespec *, const struct timespec *);
2320 +diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
2321 +index 2043c08..957869f 100644
2322 +--- a/kernel/hrtimer.c
2323 ++++ b/kernel/hrtimer.c
2324 +@@ -657,6 +657,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
2325 + return 0;
2326 + }
2327 +
2328 ++static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
2329 ++{
2330 ++ ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
2331 ++ ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
2332 ++
2333 ++ return ktime_get_update_offsets(offs_real, offs_boot);
2334 ++}
2335 ++
2336 + /*
2337 + * Retrigger next event is called after clock was set
2338 + *
2339 +@@ -665,22 +673,12 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
2340 + static void retrigger_next_event(void *arg)
2341 + {
2342 + struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
2343 +- struct timespec realtime_offset, xtim, wtm, sleep;
2344 +
2345 + if (!hrtimer_hres_active())
2346 + return;
2347 +
2348 +- /* Optimized out for !HIGH_RES */
2349 +- get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
2350 +- set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
2351 +-
2352 +- /* Adjust CLOCK_REALTIME offset */
2353 + raw_spin_lock(&base->lock);
2354 +- base->clock_base[HRTIMER_BASE_REALTIME].offset =
2355 +- timespec_to_ktime(realtime_offset);
2356 +- base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
2357 +- timespec_to_ktime(sleep);
2358 +-
2359 ++ hrtimer_update_base(base);
2360 + hrtimer_force_reprogram(base, 0);
2361 + raw_spin_unlock(&base->lock);
2362 + }
2363 +@@ -710,13 +708,25 @@ static int hrtimer_switch_to_hres(void)
2364 + base->clock_base[i].resolution = KTIME_HIGH_RES;
2365 +
2366 + tick_setup_sched_timer();
2367 +-
2368 + /* "Retrigger" the interrupt to get things going */
2369 + retrigger_next_event(NULL);
2370 + local_irq_restore(flags);
2371 + return 1;
2372 + }
2373 +
2374 ++/*
2375 ++ * Called from timekeeping code to reprogramm the hrtimer interrupt
2376 ++ * device. If called from the timer interrupt context we defer it to
2377 ++ * softirq context.
2378 ++ */
2379 ++void clock_was_set_delayed(void)
2380 ++{
2381 ++ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
2382 ++
2383 ++ cpu_base->clock_was_set = 1;
2384 ++ __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
2385 ++}
2386 ++
2387 + #else
2388 +
2389 + static inline int hrtimer_hres_active(void) { return 0; }
2390 +@@ -1250,11 +1260,10 @@ void hrtimer_interrupt(struct clock_event_device *dev)
2391 + cpu_base->nr_events++;
2392 + dev->next_event.tv64 = KTIME_MAX;
2393 +
2394 +- entry_time = now = ktime_get();
2395 ++ raw_spin_lock(&cpu_base->lock);
2396 ++ entry_time = now = hrtimer_update_base(cpu_base);
2397 + retry:
2398 + expires_next.tv64 = KTIME_MAX;
2399 +-
2400 +- raw_spin_lock(&cpu_base->lock);
2401 + /*
2402 + * We set expires_next to KTIME_MAX here with cpu_base->lock
2403 + * held to prevent that a timer is enqueued in our queue via
2404 +@@ -1330,8 +1339,12 @@ retry:
2405 + * We need to prevent that we loop forever in the hrtimer
2406 + * interrupt routine. We give it 3 attempts to avoid
2407 + * overreacting on some spurious event.
2408 ++ *
2409 ++ * Acquire base lock for updating the offsets and retrieving
2410 ++ * the current time.
2411 + */
2412 +- now = ktime_get();
2413 ++ raw_spin_lock(&cpu_base->lock);
2414 ++ now = hrtimer_update_base(cpu_base);
2415 + cpu_base->nr_retries++;
2416 + if (++retries < 3)
2417 + goto retry;
2418 +@@ -1343,6 +1356,7 @@ retry:
2419 + */
2420 + cpu_base->nr_hangs++;
2421 + cpu_base->hang_detected = 1;
2422 ++ raw_spin_unlock(&cpu_base->lock);
2423 + delta = ktime_sub(now, entry_time);
2424 + if (delta.tv64 > cpu_base->max_hang_time.tv64)
2425 + cpu_base->max_hang_time = delta;
2426 +@@ -1395,6 +1409,13 @@ void hrtimer_peek_ahead_timers(void)
2427 +
2428 + static void run_hrtimer_softirq(struct softirq_action *h)
2429 + {
2430 ++ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
2431 ++
2432 ++ if (cpu_base->clock_was_set) {
2433 ++ cpu_base->clock_was_set = 0;
2434 ++ clock_was_set();
2435 ++ }
2436 ++
2437 + hrtimer_peek_ahead_timers();
2438 + }
2439 +
2440 +diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
2441 +index 4b85a7a..f1eb182 100644
2442 +--- a/kernel/time/ntp.c
2443 ++++ b/kernel/time/ntp.c
2444 +@@ -31,8 +31,6 @@ unsigned long tick_nsec;
2445 + u64 tick_length;
2446 + static u64 tick_length_base;
2447 +
2448 +-static struct hrtimer leap_timer;
2449 +-
2450 + #define MAX_TICKADJ 500LL /* usecs */
2451 + #define MAX_TICKADJ_SCALED \
2452 + (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
2453 +@@ -350,60 +348,60 @@ void ntp_clear(void)
2454 + }
2455 +
2456 + /*
2457 +- * Leap second processing. If in leap-insert state at the end of the
2458 +- * day, the system clock is set back one second; if in leap-delete
2459 +- * state, the system clock is set ahead one second.
2460 ++ * this routine handles the overflow of the microsecond field
2461 ++ *
2462 ++ * The tricky bits of code to handle the accurate clock support
2463 ++ * were provided by Dave Mills (Mills@××××.EDU) of NTP fame.
2464 ++ * They were originally developed for SUN and DEC kernels.
2465 ++ * All the kudos should go to Dave for this stuff.
2466 ++ *
2467 ++ * Also handles leap second processing, and returns leap offset
2468 + */
2469 +-static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
2470 ++int second_overflow(unsigned long secs)
2471 + {
2472 +- enum hrtimer_restart res = HRTIMER_NORESTART;
2473 +-
2474 +- write_seqlock(&xtime_lock);
2475 ++ int leap = 0;
2476 ++ s64 delta;
2477 +
2478 ++ /*
2479 ++ * Leap second processing. If in leap-insert state at the end of the
2480 ++ * day, the system clock is set back one second; if in leap-delete
2481 ++ * state, the system clock is set ahead one second.
2482 ++ */
2483 + switch (time_state) {
2484 + case TIME_OK:
2485 ++ if (time_status & STA_INS)
2486 ++ time_state = TIME_INS;
2487 ++ else if (time_status & STA_DEL)
2488 ++ time_state = TIME_DEL;
2489 + break;
2490 + case TIME_INS:
2491 +- timekeeping_leap_insert(-1);
2492 +- time_state = TIME_OOP;
2493 +- printk(KERN_NOTICE
2494 +- "Clock: inserting leap second 23:59:60 UTC\n");
2495 +- hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
2496 +- res = HRTIMER_RESTART;
2497 ++ if (secs % 86400 == 0) {
2498 ++ leap = -1;
2499 ++ time_state = TIME_OOP;
2500 ++ time_tai++;
2501 ++ printk(KERN_NOTICE
2502 ++ "Clock: inserting leap second 23:59:60 UTC\n");
2503 ++ }
2504 + break;
2505 + case TIME_DEL:
2506 +- timekeeping_leap_insert(1);
2507 +- time_tai--;
2508 +- time_state = TIME_WAIT;
2509 +- printk(KERN_NOTICE
2510 +- "Clock: deleting leap second 23:59:59 UTC\n");
2511 ++ if ((secs + 1) % 86400 == 0) {
2512 ++ leap = 1;
2513 ++ time_tai--;
2514 ++ time_state = TIME_WAIT;
2515 ++ printk(KERN_NOTICE
2516 ++ "Clock: deleting leap second 23:59:59 UTC\n");
2517 ++ }
2518 + break;
2519 + case TIME_OOP:
2520 +- time_tai++;
2521 + time_state = TIME_WAIT;
2522 +- /* fall through */
2523 ++ break;
2524 ++
2525 + case TIME_WAIT:
2526 + if (!(time_status & (STA_INS | STA_DEL)))
2527 + time_state = TIME_OK;
2528 + break;
2529 + }
2530 +
2531 +- write_sequnlock(&xtime_lock);
2532 +-
2533 +- return res;
2534 +-}
2535 +-
2536 +-/*
2537 +- * this routine handles the overflow of the microsecond field
2538 +- *
2539 +- * The tricky bits of code to handle the accurate clock support
2540 +- * were provided by Dave Mills (Mills@××××.EDU) of NTP fame.
2541 +- * They were originally developed for SUN and DEC kernels.
2542 +- * All the kudos should go to Dave for this stuff.
2543 +- */
2544 +-void second_overflow(void)
2545 +-{
2546 +- s64 delta;
2547 +
2548 + /* Bump the maxerror field */
2549 + time_maxerror += MAXFREQ / NSEC_PER_USEC;
2550 +@@ -423,23 +421,25 @@ void second_overflow(void)
2551 + pps_dec_valid();
2552 +
2553 + if (!time_adjust)
2554 +- return;
2555 ++ goto out;
2556 +
2557 + if (time_adjust > MAX_TICKADJ) {
2558 + time_adjust -= MAX_TICKADJ;
2559 + tick_length += MAX_TICKADJ_SCALED;
2560 +- return;
2561 ++ goto out;
2562 + }
2563 +
2564 + if (time_adjust < -MAX_TICKADJ) {
2565 + time_adjust += MAX_TICKADJ;
2566 + tick_length -= MAX_TICKADJ_SCALED;
2567 +- return;
2568 ++ goto out;
2569 + }
2570 +
2571 + tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
2572 + << NTP_SCALE_SHIFT;
2573 + time_adjust = 0;
2574 ++out:
2575 ++ return leap;
2576 + }
2577 +
2578 + #ifdef CONFIG_GENERIC_CMOS_UPDATE
2579 +@@ -501,27 +501,6 @@ static void notify_cmos_timer(void)
2580 + static inline void notify_cmos_timer(void) { }
2581 + #endif
2582 +
2583 +-/*
2584 +- * Start the leap seconds timer:
2585 +- */
2586 +-static inline void ntp_start_leap_timer(struct timespec *ts)
2587 +-{
2588 +- long now = ts->tv_sec;
2589 +-
2590 +- if (time_status & STA_INS) {
2591 +- time_state = TIME_INS;
2592 +- now += 86400 - now % 86400;
2593 +- hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
2594 +-
2595 +- return;
2596 +- }
2597 +-
2598 +- if (time_status & STA_DEL) {
2599 +- time_state = TIME_DEL;
2600 +- now += 86400 - (now + 1) % 86400;
2601 +- hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
2602 +- }
2603 +-}
2604 +
2605 + /*
2606 + * Propagate a new txc->status value into the NTP state:
2607 +@@ -546,22 +525,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
2608 + time_status &= STA_RONLY;
2609 + time_status |= txc->status & ~STA_RONLY;
2610 +
2611 +- switch (time_state) {
2612 +- case TIME_OK:
2613 +- ntp_start_leap_timer(ts);
2614 +- break;
2615 +- case TIME_INS:
2616 +- case TIME_DEL:
2617 +- time_state = TIME_OK;
2618 +- ntp_start_leap_timer(ts);
2619 +- case TIME_WAIT:
2620 +- if (!(time_status & (STA_INS | STA_DEL)))
2621 +- time_state = TIME_OK;
2622 +- break;
2623 +- case TIME_OOP:
2624 +- hrtimer_restart(&leap_timer);
2625 +- break;
2626 +- }
2627 + }
2628 + /*
2629 + * Called with the xtime lock held, so we can access and modify
2630 +@@ -643,9 +606,6 @@ int do_adjtimex(struct timex *txc)
2631 + (txc->tick < 900000/USER_HZ ||
2632 + txc->tick > 1100000/USER_HZ))
2633 + return -EINVAL;
2634 +-
2635 +- if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
2636 +- hrtimer_cancel(&leap_timer);
2637 + }
2638 +
2639 + if (txc->modes & ADJ_SETOFFSET) {
2640 +@@ -967,6 +927,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
2641 + void __init ntp_init(void)
2642 + {
2643 + ntp_clear();
2644 +- hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
2645 +- leap_timer.function = ntp_leap_second;
2646 + }
2647 +diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
2648 +index 5f45831..678ae31 100644
2649 +--- a/kernel/time/timekeeping.c
2650 ++++ b/kernel/time/timekeeping.c
2651 +@@ -161,23 +161,43 @@ static struct timespec xtime __attribute__ ((aligned (16)));
2652 + static struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
2653 + static struct timespec total_sleep_time;
2654 +
2655 ++/* Offset clock monotonic -> clock realtime */
2656 ++static ktime_t offs_real;
2657 ++
2658 ++/* Offset clock monotonic -> clock boottime */
2659 ++static ktime_t offs_boot;
2660 ++
2661 + /*
2662 + * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
2663 + */
2664 + static struct timespec raw_time;
2665 +
2666 +-/* flag for if timekeeping is suspended */
2667 +-int __read_mostly timekeeping_suspended;
2668 ++/* must hold write on xtime_lock */
2669 ++static void update_rt_offset(void)
2670 ++{
2671 ++ struct timespec tmp, *wtm = &wall_to_monotonic;
2672 +
2673 +-/* must hold xtime_lock */
2674 +-void timekeeping_leap_insert(int leapsecond)
2675 ++ set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec);
2676 ++ offs_real = timespec_to_ktime(tmp);
2677 ++}
2678 ++
2679 ++/* must hold write on xtime_lock */
2680 ++static void timekeeping_update(bool clearntp)
2681 + {
2682 +- xtime.tv_sec += leapsecond;
2683 +- wall_to_monotonic.tv_sec -= leapsecond;
2684 +- update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
2685 +- timekeeper.mult);
2686 ++ if (clearntp) {
2687 ++ timekeeper.ntp_error = 0;
2688 ++ ntp_clear();
2689 ++ }
2690 ++ update_rt_offset();
2691 ++ update_vsyscall(&xtime, &wall_to_monotonic,
2692 ++ timekeeper.clock, timekeeper.mult);
2693 + }
2694 +
2695 ++
2696 ++
2697 ++/* flag for if timekeeping is suspended */
2698 ++int __read_mostly timekeeping_suspended;
2699 ++
2700 + /**
2701 + * timekeeping_forward_now - update clock to the current time
2702 + *
2703 +@@ -375,11 +395,7 @@ int do_settimeofday(const struct timespec *tv)
2704 +
2705 + xtime = *tv;
2706 +
2707 +- timekeeper.ntp_error = 0;
2708 +- ntp_clear();
2709 +-
2710 +- update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
2711 +- timekeeper.mult);
2712 ++ timekeeping_update(true);
2713 +
2714 + write_sequnlock_irqrestore(&xtime_lock, flags);
2715 +
2716 +@@ -412,11 +428,7 @@ int timekeeping_inject_offset(struct timespec *ts)
2717 + xtime = timespec_add(xtime, *ts);
2718 + wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
2719 +
2720 +- timekeeper.ntp_error = 0;
2721 +- ntp_clear();
2722 +-
2723 +- update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
2724 +- timekeeper.mult);
2725 ++ timekeeping_update(true);
2726 +
2727 + write_sequnlock_irqrestore(&xtime_lock, flags);
2728 +
2729 +@@ -591,6 +603,7 @@ void __init timekeeping_init(void)
2730 + }
2731 + set_normalized_timespec(&wall_to_monotonic,
2732 + -boot.tv_sec, -boot.tv_nsec);
2733 ++ update_rt_offset();
2734 + total_sleep_time.tv_sec = 0;
2735 + total_sleep_time.tv_nsec = 0;
2736 + write_sequnlock_irqrestore(&xtime_lock, flags);
2737 +@@ -599,6 +612,12 @@ void __init timekeeping_init(void)
2738 + /* time in seconds when suspend began */
2739 + static struct timespec timekeeping_suspend_time;
2740 +
2741 ++static void update_sleep_time(struct timespec t)
2742 ++{
2743 ++ total_sleep_time = t;
2744 ++ offs_boot = timespec_to_ktime(t);
2745 ++}
2746 ++
2747 + /**
2748 + * __timekeeping_inject_sleeptime - Internal function to add sleep interval
2749 + * @delta: pointer to a timespec delta value
2750 +@@ -610,7 +629,7 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta)
2751 + {
2752 + xtime = timespec_add(xtime, *delta);
2753 + wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta);
2754 +- total_sleep_time = timespec_add(total_sleep_time, *delta);
2755 ++ update_sleep_time(timespec_add(total_sleep_time, *delta));
2756 + }
2757 +
2758 +
2759 +@@ -639,10 +658,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
2760 +
2761 + __timekeeping_inject_sleeptime(delta);
2762 +
2763 +- timekeeper.ntp_error = 0;
2764 +- ntp_clear();
2765 +- update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
2766 +- timekeeper.mult);
2767 ++ timekeeping_update(true);
2768 +
2769 + write_sequnlock_irqrestore(&xtime_lock, flags);
2770 +
2771 +@@ -677,6 +693,7 @@ static void timekeeping_resume(void)
2772 + timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
2773 + timekeeper.ntp_error = 0;
2774 + timekeeping_suspended = 0;
2775 ++ timekeeping_update(false);
2776 + write_sequnlock_irqrestore(&xtime_lock, flags);
2777 +
2778 + touch_softlockup_watchdog();
2779 +@@ -828,9 +845,14 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
2780 +
2781 + timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
2782 + while (timekeeper.xtime_nsec >= nsecps) {
2783 ++ int leap;
2784 + timekeeper.xtime_nsec -= nsecps;
2785 + xtime.tv_sec++;
2786 +- second_overflow();
2787 ++ leap = second_overflow(xtime.tv_sec);
2788 ++ xtime.tv_sec += leap;
2789 ++ wall_to_monotonic.tv_sec -= leap;
2790 ++ if (leap)
2791 ++ clock_was_set_delayed();
2792 + }
2793 +
2794 + /* Accumulate raw time */
2795 +@@ -936,14 +958,17 @@ static void update_wall_time(void)
2796 + * xtime.tv_nsec isn't larger then NSEC_PER_SEC
2797 + */
2798 + if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
2799 ++ int leap;
2800 + xtime.tv_nsec -= NSEC_PER_SEC;
2801 + xtime.tv_sec++;
2802 +- second_overflow();
2803 ++ leap = second_overflow(xtime.tv_sec);
2804 ++ xtime.tv_sec += leap;
2805 ++ wall_to_monotonic.tv_sec -= leap;
2806 ++ if (leap)
2807 ++ clock_was_set_delayed();
2808 + }
2809 +
2810 +- /* check to see if there is a new clocksource to use */
2811 +- update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
2812 +- timekeeper.mult);
2813 ++ timekeeping_update(false);
2814 + }
2815 +
2816 + /**
2817 +@@ -1102,6 +1127,40 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
2818 + } while (read_seqretry(&xtime_lock, seq));
2819 + }
2820 +
2821 ++#ifdef CONFIG_HIGH_RES_TIMERS
2822 ++/**
2823 ++ * ktime_get_update_offsets - hrtimer helper
2824 ++ * @real: pointer to storage for monotonic -> realtime offset
2825 ++ * @_boot: pointer to storage for monotonic -> boottime offset
2826 ++ *
2827 ++ * Returns current monotonic time and updates the offsets
2828 ++ * Called from hrtimer_interupt() or retrigger_next_event()
2829 ++ */
2830 ++ktime_t ktime_get_update_offsets(ktime_t *real, ktime_t *boot)
2831 ++{
2832 ++ ktime_t now;
2833 ++ unsigned int seq;
2834 ++ u64 secs, nsecs;
2835 ++
2836 ++ do {
2837 ++ seq = read_seqbegin(&xtime_lock);
2838 ++
2839 ++ secs = xtime.tv_sec;
2840 ++ nsecs = xtime.tv_nsec;
2841 ++ nsecs += timekeeping_get_ns();
2842 ++ /* If arch requires, add in gettimeoffset() */
2843 ++ nsecs += arch_gettimeoffset();
2844 ++
2845 ++ *real = offs_real;
2846 ++ *boot = offs_boot;
2847 ++ } while (read_seqretry(&xtime_lock, seq));
2848 ++
2849 ++ now = ktime_add_ns(ktime_set(secs, 0), nsecs);
2850 ++ now = ktime_sub(now, *real);
2851 ++ return now;
2852 ++}
2853 ++#endif
2854 ++
2855 + /**
2856 + * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
2857 + */
2858 +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
2859 +index 7410a8c..6e33b79c 100644
2860 +--- a/net/ipv4/tcp_input.c
2861 ++++ b/net/ipv4/tcp_input.c
2862 +@@ -5761,6 +5761,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
2863 + goto discard;
2864 +
2865 + if (th->syn) {
2866 ++ if (th->fin)
2867 ++ goto discard;
2868 + if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
2869 + return 1;
2870 +
2871 +diff --git a/net/wireless/util.c b/net/wireless/util.c
2872 +index 30f68dc..bbcb58e 100644
2873 +--- a/net/wireless/util.c
2874 ++++ b/net/wireless/util.c
2875 +@@ -807,7 +807,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
2876 + ntype == NL80211_IFTYPE_P2P_CLIENT))
2877 + return -EBUSY;
2878 +
2879 +- if (ntype != otype) {
2880 ++ if (ntype != otype && netif_running(dev)) {
2881 + err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr,
2882 + ntype);
2883 + if (err)
2884
2885 Added: genpatches-2.6/trunk/3.0/1038_linux-3.0.39.patch
2886 ===================================================================
2887 --- genpatches-2.6/trunk/3.0/1038_linux-3.0.39.patch (rev 0)
2888 +++ genpatches-2.6/trunk/3.0/1038_linux-3.0.39.patch 2012-08-01 23:35:55 UTC (rev 2182)
2889 @@ -0,0 +1,2440 @@
2890 +diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
2891 +index 12cecc8..4a37c47 100644
2892 +--- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
2893 ++++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
2894 +@@ -379,10 +379,10 @@ EVENT_PROCESS:
2895 +
2896 + # To closer match vmstat scanning statistics, only count isolate_both
2897 + # and isolate_inactive as scanning. isolate_active is rotation
2898 +- # isolate_inactive == 0
2899 +- # isolate_active == 1
2900 +- # isolate_both == 2
2901 +- if ($isolate_mode != 1) {
2902 ++ # isolate_inactive == 1
2903 ++ # isolate_active == 2
2904 ++ # isolate_both == 3
2905 ++ if ($isolate_mode != 2) {
2906 + $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
2907 + }
2908 + $perprocesspid{$process_pid}->{HIGH_NR_CONTIG_DIRTY} += $nr_contig_dirty;
2909 +diff --git a/Makefile b/Makefile
2910 +index 5fdfaa8..3ec1722 100644
2911 +--- a/Makefile
2912 ++++ b/Makefile
2913 +@@ -1,6 +1,6 @@
2914 + VERSION = 3
2915 + PATCHLEVEL = 0
2916 +-SUBLEVEL = 38
2917 ++SUBLEVEL = 39
2918 + EXTRAVERSION =
2919 + NAME = Sneaky Weasel
2920 +
2921 +diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
2922 +index 97f8bf6..adda036 100644
2923 +--- a/arch/mips/include/asm/thread_info.h
2924 ++++ b/arch/mips/include/asm/thread_info.h
2925 +@@ -60,6 +60,8 @@ struct thread_info {
2926 + register struct thread_info *__current_thread_info __asm__("$28");
2927 + #define current_thread_info() __current_thread_info
2928 +
2929 ++#endif /* !__ASSEMBLY__ */
2930 ++
2931 + /* thread information allocation */
2932 + #if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT)
2933 + #define THREAD_SIZE_ORDER (1)
2934 +@@ -97,8 +99,6 @@ register struct thread_info *__current_thread_info __asm__("$28");
2935 +
2936 + #define free_thread_info(info) kfree(info)
2937 +
2938 +-#endif /* !__ASSEMBLY__ */
2939 +-
2940 + #define PREEMPT_ACTIVE 0x10000000
2941 +
2942 + /*
2943 +diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
2944 +index a81176f..be281c6 100644
2945 +--- a/arch/mips/kernel/vmlinux.lds.S
2946 ++++ b/arch/mips/kernel/vmlinux.lds.S
2947 +@@ -1,5 +1,6 @@
2948 + #include <asm/asm-offsets.h>
2949 + #include <asm/page.h>
2950 ++#include <asm/thread_info.h>
2951 + #include <asm-generic/vmlinux.lds.h>
2952 +
2953 + #undef mips
2954 +@@ -73,7 +74,7 @@ SECTIONS
2955 + .data : { /* Data */
2956 + . = . + DATAOFFSET; /* for CONFIG_MAPPED_KERNEL */
2957 +
2958 +- INIT_TASK_DATA(PAGE_SIZE)
2959 ++ INIT_TASK_DATA(THREAD_SIZE)
2960 + NOSAVE_DATA
2961 + CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
2962 + READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
2963 +diff --git a/drivers/base/memory.c b/drivers/base/memory.c
2964 +index 45d7c8f..5fb6aae 100644
2965 +--- a/drivers/base/memory.c
2966 ++++ b/drivers/base/memory.c
2967 +@@ -224,13 +224,48 @@ int memory_isolate_notify(unsigned long val, void *v)
2968 + }
2969 +
2970 + /*
2971 ++ * The probe routines leave the pages reserved, just as the bootmem code does.
2972 ++ * Make sure they're still that way.
2973 ++ */
2974 ++static bool pages_correctly_reserved(unsigned long start_pfn,
2975 ++ unsigned long nr_pages)
2976 ++{
2977 ++ int i, j;
2978 ++ struct page *page;
2979 ++ unsigned long pfn = start_pfn;
2980 ++
2981 ++ /*
2982 ++ * memmap between sections is not contiguous except with
2983 ++ * SPARSEMEM_VMEMMAP. We lookup the page once per section
2984 ++ * and assume memmap is contiguous within each section
2985 ++ */
2986 ++ for (i = 0; i < sections_per_block; i++, pfn += PAGES_PER_SECTION) {
2987 ++ if (WARN_ON_ONCE(!pfn_valid(pfn)))
2988 ++ return false;
2989 ++ page = pfn_to_page(pfn);
2990 ++
2991 ++ for (j = 0; j < PAGES_PER_SECTION; j++) {
2992 ++ if (PageReserved(page + j))
2993 ++ continue;
2994 ++
2995 ++ printk(KERN_WARNING "section number %ld page number %d "
2996 ++ "not reserved, was it already online?\n",
2997 ++ pfn_to_section_nr(pfn), j);
2998 ++
2999 ++ return false;
3000 ++ }
3001 ++ }
3002 ++
3003 ++ return true;
3004 ++}
3005 ++
3006 ++/*
3007 + * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
3008 + * OK to have direct references to sparsemem variables in here.
3009 + */
3010 + static int
3011 + memory_block_action(unsigned long phys_index, unsigned long action)
3012 + {
3013 +- int i;
3014 + unsigned long start_pfn, start_paddr;
3015 + unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
3016 + struct page *first_page;
3017 +@@ -238,26 +273,13 @@ memory_block_action(unsigned long phys_index, unsigned long action)
3018 +
3019 + first_page = pfn_to_page(phys_index << PFN_SECTION_SHIFT);
3020 +
3021 +- /*
3022 +- * The probe routines leave the pages reserved, just
3023 +- * as the bootmem code does. Make sure they're still
3024 +- * that way.
3025 +- */
3026 +- if (action == MEM_ONLINE) {
3027 +- for (i = 0; i < nr_pages; i++) {
3028 +- if (PageReserved(first_page+i))
3029 +- continue;
3030 +-
3031 +- printk(KERN_WARNING "section number %ld page number %d "
3032 +- "not reserved, was it already online?\n",
3033 +- phys_index, i);
3034 +- return -EBUSY;
3035 +- }
3036 +- }
3037 +-
3038 + switch (action) {
3039 + case MEM_ONLINE:
3040 + start_pfn = page_to_pfn(first_page);
3041 ++
3042 ++ if (!pages_correctly_reserved(start_pfn, nr_pages))
3043 ++ return -EBUSY;
3044 ++
3045 + ret = online_pages(start_pfn, nr_pages);
3046 + break;
3047 + case MEM_OFFLINE:
3048 +diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
3049 +index 9bfd057..42ef54f 100644
3050 +--- a/drivers/md/dm-raid1.c
3051 ++++ b/drivers/md/dm-raid1.c
3052 +@@ -1210,7 +1210,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
3053 + * We need to dec pending if this was a write.
3054 + */
3055 + if (rw == WRITE) {
3056 +- if (!(bio->bi_rw & REQ_FLUSH))
3057 ++ if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)))
3058 + dm_rh_dec(ms->rh, map_context->ll);
3059 + return error;
3060 + }
3061 +diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
3062 +index 7771ed2..69732e0 100644
3063 +--- a/drivers/md/dm-region-hash.c
3064 ++++ b/drivers/md/dm-region-hash.c
3065 +@@ -404,6 +404,9 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
3066 + return;
3067 + }
3068 +
3069 ++ if (bio->bi_rw & REQ_DISCARD)
3070 ++ return;
3071 ++
3072 + /* We must inform the log that the sync count has changed. */
3073 + log->type->set_region_sync(log, region, 0);
3074 +
3075 +@@ -524,7 +527,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
3076 + struct bio *bio;
3077 +
3078 + for (bio = bios->head; bio; bio = bio->bi_next) {
3079 +- if (bio->bi_rw & REQ_FLUSH)
3080 ++ if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))
3081 + continue;
3082 + rh_inc(rh, dm_rh_bio_to_region(rh, bio));
3083 + }
3084 +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
3085 +index 1ac8db5d..57106a9 100644
3086 +--- a/fs/btrfs/disk-io.c
3087 ++++ b/fs/btrfs/disk-io.c
3088 +@@ -801,7 +801,8 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
3089 +
3090 + #ifdef CONFIG_MIGRATION
3091 + static int btree_migratepage(struct address_space *mapping,
3092 +- struct page *newpage, struct page *page)
3093 ++ struct page *newpage, struct page *page,
3094 ++ enum migrate_mode mode)
3095 + {
3096 + /*
3097 + * we can't safely write a btree page from here,
3098 +@@ -816,7 +817,7 @@ static int btree_migratepage(struct address_space *mapping,
3099 + if (page_has_private(page) &&
3100 + !try_to_release_page(page, GFP_KERNEL))
3101 + return -EAGAIN;
3102 +- return migrate_page(mapping, newpage, page);
3103 ++ return migrate_page(mapping, newpage, page, mode);
3104 + }
3105 + #endif
3106 +
3107 +diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
3108 +index 6751e74..c71032b 100644
3109 +--- a/fs/cifs/readdir.c
3110 ++++ b/fs/cifs/readdir.c
3111 +@@ -85,9 +85,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
3112 +
3113 + dentry = d_lookup(parent, name);
3114 + if (dentry) {
3115 +- /* FIXME: check for inode number changes? */
3116 +- if (dentry->d_inode != NULL)
3117 ++ inode = dentry->d_inode;
3118 ++ /* update inode in place if i_ino didn't change */
3119 ++ if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) {
3120 ++ cifs_fattr_to_inode(inode, fattr);
3121 + return dentry;
3122 ++ }
3123 + d_drop(dentry);
3124 + dput(dentry);
3125 + }
3126 +diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
3127 +index 8b0c875..6327a06 100644
3128 +--- a/fs/hugetlbfs/inode.c
3129 ++++ b/fs/hugetlbfs/inode.c
3130 +@@ -568,7 +568,8 @@ static int hugetlbfs_set_page_dirty(struct page *page)
3131 + }
3132 +
3133 + static int hugetlbfs_migrate_page(struct address_space *mapping,
3134 +- struct page *newpage, struct page *page)
3135 ++ struct page *newpage, struct page *page,
3136 ++ enum migrate_mode mode)
3137 + {
3138 + int rc;
3139 +
3140 +diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
3141 +index 2a55347..4f10d81 100644
3142 +--- a/fs/nfs/internal.h
3143 ++++ b/fs/nfs/internal.h
3144 +@@ -315,7 +315,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data);
3145 +
3146 + #ifdef CONFIG_MIGRATION
3147 + extern int nfs_migrate_page(struct address_space *,
3148 +- struct page *, struct page *);
3149 ++ struct page *, struct page *, enum migrate_mode);
3150 + #else
3151 + #define nfs_migrate_page NULL
3152 + #endif
3153 +diff --git a/fs/nfs/write.c b/fs/nfs/write.c
3154 +index f2f80c0..58bb999 100644
3155 +--- a/fs/nfs/write.c
3156 ++++ b/fs/nfs/write.c
3157 +@@ -1662,7 +1662,7 @@ out_error:
3158 +
3159 + #ifdef CONFIG_MIGRATION
3160 + int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
3161 +- struct page *page)
3162 ++ struct page *page, enum migrate_mode mode)
3163 + {
3164 + /*
3165 + * If PagePrivate is set, then the page is currently associated with
3166 +@@ -1677,7 +1677,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
3167 +
3168 + nfs_fscache_release_page(page, GFP_KERNEL);
3169 +
3170 +- return migrate_page(mapping, newpage, page);
3171 ++ return migrate_page(mapping, newpage, page, mode);
3172 + }
3173 + #endif
3174 +
3175 +diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
3176 +index c606f01..1250016 100644
3177 +--- a/fs/ubifs/sb.c
3178 ++++ b/fs/ubifs/sb.c
3179 +@@ -715,8 +715,12 @@ static int fixup_free_space(struct ubifs_info *c)
3180 + lnum = ubifs_next_log_lnum(c, lnum);
3181 + }
3182 +
3183 +- /* Fixup the current log head */
3184 +- err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
3185 ++ /*
3186 ++ * Fixup the log head which contains the only a CS node at the
3187 ++ * beginning.
3188 ++ */
3189 ++ err = fixup_leb(c, c->lhead_lnum,
3190 ++ ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size));
3191 + if (err)
3192 + goto out;
3193 +
3194 +diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
3195 +index e9eaec5..7a7e5fd 100644
3196 +--- a/include/linux/cpuset.h
3197 ++++ b/include/linux/cpuset.h
3198 +@@ -89,42 +89,33 @@ extern void rebuild_sched_domains(void);
3199 + extern void cpuset_print_task_mems_allowed(struct task_struct *p);
3200 +
3201 + /*
3202 +- * reading current mems_allowed and mempolicy in the fastpath must protected
3203 +- * by get_mems_allowed()
3204 ++ * get_mems_allowed is required when making decisions involving mems_allowed
3205 ++ * such as during page allocation. mems_allowed can be updated in parallel
3206 ++ * and depending on the new value an operation can fail potentially causing
3207 ++ * process failure. A retry loop with get_mems_allowed and put_mems_allowed
3208 ++ * prevents these artificial failures.
3209 + */
3210 +-static inline void get_mems_allowed(void)
3211 ++static inline unsigned int get_mems_allowed(void)
3212 + {
3213 +- current->mems_allowed_change_disable++;
3214 +-
3215 +- /*
3216 +- * ensure that reading mems_allowed and mempolicy happens after the
3217 +- * update of ->mems_allowed_change_disable.
3218 +- *
3219 +- * the write-side task finds ->mems_allowed_change_disable is not 0,
3220 +- * and knows the read-side task is reading mems_allowed or mempolicy,
3221 +- * so it will clear old bits lazily.
3222 +- */
3223 +- smp_mb();
3224 ++ return read_seqcount_begin(&current->mems_allowed_seq);
3225 + }
3226 +
3227 +-static inline void put_mems_allowed(void)
3228 ++/*
3229 ++ * If this returns false, the operation that took place after get_mems_allowed
3230 ++ * may have failed. It is up to the caller to retry the operation if
3231 ++ * appropriate.
3232 ++ */
3233 ++static inline bool put_mems_allowed(unsigned int seq)
3234 + {
3235 +- /*
3236 +- * ensure that reading mems_allowed and mempolicy before reducing
3237 +- * mems_allowed_change_disable.
3238 +- *
3239 +- * the write-side task will know that the read-side task is still
3240 +- * reading mems_allowed or mempolicy, don't clears old bits in the
3241 +- * nodemask.
3242 +- */
3243 +- smp_mb();
3244 +- --ACCESS_ONCE(current->mems_allowed_change_disable);
3245 ++ return !read_seqcount_retry(&current->mems_allowed_seq, seq);
3246 + }
3247 +
3248 + static inline void set_mems_allowed(nodemask_t nodemask)
3249 + {
3250 + task_lock(current);
3251 ++ write_seqcount_begin(&current->mems_allowed_seq);
3252 + current->mems_allowed = nodemask;
3253 ++ write_seqcount_end(&current->mems_allowed_seq);
3254 + task_unlock(current);
3255 + }
3256 +
3257 +@@ -234,12 +225,14 @@ static inline void set_mems_allowed(nodemask_t nodemask)
3258 + {
3259 + }
3260 +
3261 +-static inline void get_mems_allowed(void)
3262 ++static inline unsigned int get_mems_allowed(void)
3263 + {
3264 ++ return 0;
3265 + }
3266 +
3267 +-static inline void put_mems_allowed(void)
3268 ++static inline bool put_mems_allowed(unsigned int seq)
3269 + {
3270 ++ return true;
3271 + }
3272 +
3273 + #endif /* !CONFIG_CPUSETS */
3274 +diff --git a/include/linux/fs.h b/include/linux/fs.h
3275 +index 96b1035..212ea7b 100644
3276 +--- a/include/linux/fs.h
3277 ++++ b/include/linux/fs.h
3278 +@@ -523,6 +523,7 @@ enum positive_aop_returns {
3279 + struct page;
3280 + struct address_space;
3281 + struct writeback_control;
3282 ++enum migrate_mode;
3283 +
3284 + struct iov_iter {
3285 + const struct iovec *iov;
3286 +@@ -607,9 +608,12 @@ struct address_space_operations {
3287 + loff_t offset, unsigned long nr_segs);
3288 + int (*get_xip_mem)(struct address_space *, pgoff_t, int,
3289 + void **, unsigned long *);
3290 +- /* migrate the contents of a page to the specified target */
3291 ++ /*
3292 ++ * migrate the contents of a page to the specified target. If sync
3293 ++ * is false, it must not block.
3294 ++ */
3295 + int (*migratepage) (struct address_space *,
3296 +- struct page *, struct page *);
3297 ++ struct page *, struct page *, enum migrate_mode);
3298 + int (*launder_page) (struct page *);
3299 + int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
3300 + unsigned long);
3301 +@@ -2478,7 +2482,8 @@ extern int generic_check_addressable(unsigned, u64);
3302 +
3303 + #ifdef CONFIG_MIGRATION
3304 + extern int buffer_migrate_page(struct address_space *,
3305 +- struct page *, struct page *);
3306 ++ struct page *, struct page *,
3307 ++ enum migrate_mode);
3308 + #else
3309 + #define buffer_migrate_page NULL
3310 + #endif
3311 +diff --git a/include/linux/init_task.h b/include/linux/init_task.h
3312 +index 580f70c..5e41a8e 100644
3313 +--- a/include/linux/init_task.h
3314 ++++ b/include/linux/init_task.h
3315 +@@ -30,6 +30,13 @@ extern struct fs_struct init_fs;
3316 + #define INIT_THREADGROUP_FORK_LOCK(sig)
3317 + #endif
3318 +
3319 ++#ifdef CONFIG_CPUSETS
3320 ++#define INIT_CPUSET_SEQ \
3321 ++ .mems_allowed_seq = SEQCNT_ZERO,
3322 ++#else
3323 ++#define INIT_CPUSET_SEQ
3324 ++#endif
3325 ++
3326 + #define INIT_SIGNALS(sig) { \
3327 + .nr_threads = 1, \
3328 + .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
3329 +@@ -193,6 +200,7 @@ extern struct cred init_cred;
3330 + INIT_FTRACE_GRAPH \
3331 + INIT_TRACE_RECURSION \
3332 + INIT_TASK_RCU_PREEMPT(tsk) \
3333 ++ INIT_CPUSET_SEQ \
3334 + }
3335 +
3336 +
3337 +diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
3338 +index 313a00e..4a8da84 100644
3339 +--- a/include/linux/memcontrol.h
3340 ++++ b/include/linux/memcontrol.h
3341 +@@ -35,7 +35,8 @@ enum mem_cgroup_page_stat_item {
3342 + extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
3343 + struct list_head *dst,
3344 + unsigned long *scanned, int order,
3345 +- int mode, struct zone *z,
3346 ++ isolate_mode_t mode,
3347 ++ struct zone *z,
3348 + struct mem_cgroup *mem_cont,
3349 + int active, int file);
3350 +
3351 +diff --git a/include/linux/migrate.h b/include/linux/migrate.h
3352 +index e39aeec..eaf8674 100644
3353 +--- a/include/linux/migrate.h
3354 ++++ b/include/linux/migrate.h
3355 +@@ -6,18 +6,31 @@
3356 +
3357 + typedef struct page *new_page_t(struct page *, unsigned long private, int **);
3358 +
3359 ++/*
3360 ++ * MIGRATE_ASYNC means never block
3361 ++ * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking
3362 ++ * on most operations but not ->writepage as the potential stall time
3363 ++ * is too significant
3364 ++ * MIGRATE_SYNC will block when migrating pages
3365 ++ */
3366 ++enum migrate_mode {
3367 ++ MIGRATE_ASYNC,
3368 ++ MIGRATE_SYNC_LIGHT,
3369 ++ MIGRATE_SYNC,
3370 ++};
3371 ++
3372 + #ifdef CONFIG_MIGRATION
3373 + #define PAGE_MIGRATION 1
3374 +
3375 + extern void putback_lru_pages(struct list_head *l);
3376 + extern int migrate_page(struct address_space *,
3377 +- struct page *, struct page *);
3378 ++ struct page *, struct page *, enum migrate_mode);
3379 + extern int migrate_pages(struct list_head *l, new_page_t x,
3380 + unsigned long private, bool offlining,
3381 +- bool sync);
3382 ++ enum migrate_mode mode);
3383 + extern int migrate_huge_pages(struct list_head *l, new_page_t x,
3384 + unsigned long private, bool offlining,
3385 +- bool sync);
3386 ++ enum migrate_mode mode);
3387 +
3388 + extern int fail_migrate_page(struct address_space *,
3389 + struct page *, struct page *);
3390 +@@ -36,10 +49,10 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
3391 + static inline void putback_lru_pages(struct list_head *l) {}
3392 + static inline int migrate_pages(struct list_head *l, new_page_t x,
3393 + unsigned long private, bool offlining,
3394 +- bool sync) { return -ENOSYS; }
3395 ++ enum migrate_mode mode) { return -ENOSYS; }
3396 + static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
3397 + unsigned long private, bool offlining,
3398 +- bool sync) { return -ENOSYS; }
3399 ++ enum migrate_mode mode) { return -ENOSYS; }
3400 +
3401 + static inline int migrate_prep(void) { return -ENOSYS; }
3402 + static inline int migrate_prep_local(void) { return -ENOSYS; }
3403 +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
3404 +index aa2d80b..b32f3f9 100644
3405 +--- a/include/linux/mmzone.h
3406 ++++ b/include/linux/mmzone.h
3407 +@@ -158,6 +158,20 @@ static inline int is_unevictable_lru(enum lru_list l)
3408 + return (l == LRU_UNEVICTABLE);
3409 + }
3410 +
3411 ++/* Isolate inactive pages */
3412 ++#define ISOLATE_INACTIVE ((__force isolate_mode_t)0x1)
3413 ++/* Isolate active pages */
3414 ++#define ISOLATE_ACTIVE ((__force isolate_mode_t)0x2)
3415 ++/* Isolate clean file */
3416 ++#define ISOLATE_CLEAN ((__force isolate_mode_t)0x4)
3417 ++/* Isolate unmapped file */
3418 ++#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x8)
3419 ++/* Isolate for asynchronous migration */
3420 ++#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x10)
3421 ++
3422 ++/* LRU Isolation modes. */
3423 ++typedef unsigned __bitwise__ isolate_mode_t;
3424 ++
3425 + enum zone_watermarks {
3426 + WMARK_MIN,
3427 + WMARK_LOW,
3428 +diff --git a/include/linux/sched.h b/include/linux/sched.h
3429 +index 4ef452b..443ec43 100644
3430 +--- a/include/linux/sched.h
3431 ++++ b/include/linux/sched.h
3432 +@@ -1484,7 +1484,7 @@ struct task_struct {
3433 + #endif
3434 + #ifdef CONFIG_CPUSETS
3435 + nodemask_t mems_allowed; /* Protected by alloc_lock */
3436 +- int mems_allowed_change_disable;
3437 ++ seqcount_t mems_allowed_seq; /* Seqence no to catch updates */
3438 + int cpuset_mem_spread_rotor;
3439 + int cpuset_slab_spread_rotor;
3440 + #endif
3441 +diff --git a/include/linux/swap.h b/include/linux/swap.h
3442 +index a273468..e73799d 100644
3443 +--- a/include/linux/swap.h
3444 ++++ b/include/linux/swap.h
3445 +@@ -243,11 +243,6 @@ static inline void lru_cache_add_file(struct page *page)
3446 + __lru_cache_add(page, LRU_INACTIVE_FILE);
3447 + }
3448 +
3449 +-/* LRU Isolation modes. */
3450 +-#define ISOLATE_INACTIVE 0 /* Isolate inactive pages. */
3451 +-#define ISOLATE_ACTIVE 1 /* Isolate active pages. */
3452 +-#define ISOLATE_BOTH 2 /* Isolate both active and inactive pages. */
3453 +-
3454 + /* linux/mm/vmscan.c */
3455 + extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
3456 + gfp_t gfp_mask, nodemask_t *mask);
3457 +@@ -259,7 +254,7 @@ extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
3458 + unsigned int swappiness,
3459 + struct zone *zone,
3460 + unsigned long *nr_scanned);
3461 +-extern int __isolate_lru_page(struct page *page, int mode, int file);
3462 ++extern int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file);
3463 + extern unsigned long shrink_all_memory(unsigned long nr_pages);
3464 + extern int vm_swappiness;
3465 + extern int remove_mapping(struct address_space *mapping, struct page *page);
3466 +diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
3467 +index b2c33bd..edc4b3d 100644
3468 +--- a/include/trace/events/vmscan.h
3469 ++++ b/include/trace/events/vmscan.h
3470 +@@ -179,6 +179,83 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_re
3471 + TP_ARGS(nr_reclaimed)
3472 + );
3473 +
3474 ++TRACE_EVENT(mm_shrink_slab_start,
3475 ++ TP_PROTO(struct shrinker *shr, struct shrink_control *sc,
3476 ++ long nr_objects_to_shrink, unsigned long pgs_scanned,
3477 ++ unsigned long lru_pgs, unsigned long cache_items,
3478 ++ unsigned long long delta, unsigned long total_scan),
3479 ++
3480 ++ TP_ARGS(shr, sc, nr_objects_to_shrink, pgs_scanned, lru_pgs,
3481 ++ cache_items, delta, total_scan),
3482 ++
3483 ++ TP_STRUCT__entry(
3484 ++ __field(struct shrinker *, shr)
3485 ++ __field(void *, shrink)
3486 ++ __field(long, nr_objects_to_shrink)
3487 ++ __field(gfp_t, gfp_flags)
3488 ++ __field(unsigned long, pgs_scanned)
3489 ++ __field(unsigned long, lru_pgs)
3490 ++ __field(unsigned long, cache_items)
3491 ++ __field(unsigned long long, delta)
3492 ++ __field(unsigned long, total_scan)
3493 ++ ),
3494 ++
3495 ++ TP_fast_assign(
3496 ++ __entry->shr = shr;
3497 ++ __entry->shrink = shr->shrink;
3498 ++ __entry->nr_objects_to_shrink = nr_objects_to_shrink;
3499 ++ __entry->gfp_flags = sc->gfp_mask;
3500 ++ __entry->pgs_scanned = pgs_scanned;
3501 ++ __entry->lru_pgs = lru_pgs;
3502 ++ __entry->cache_items = cache_items;
3503 ++ __entry->delta = delta;
3504 ++ __entry->total_scan = total_scan;
3505 ++ ),
3506 ++
3507 ++ TP_printk("%pF %p: objects to shrink %ld gfp_flags %s pgs_scanned %ld lru_pgs %ld cache items %ld delta %lld total_scan %ld",
3508 ++ __entry->shrink,
3509 ++ __entry->shr,
3510 ++ __entry->nr_objects_to_shrink,
3511 ++ show_gfp_flags(__entry->gfp_flags),
3512 ++ __entry->pgs_scanned,
3513 ++ __entry->lru_pgs,
3514 ++ __entry->cache_items,
3515 ++ __entry->delta,
3516 ++ __entry->total_scan)
3517 ++);
3518 ++
3519 ++TRACE_EVENT(mm_shrink_slab_end,
3520 ++ TP_PROTO(struct shrinker *shr, int shrinker_retval,
3521 ++ long unused_scan_cnt, long new_scan_cnt),
3522 ++
3523 ++ TP_ARGS(shr, shrinker_retval, unused_scan_cnt, new_scan_cnt),
3524 ++
3525 ++ TP_STRUCT__entry(
3526 ++ __field(struct shrinker *, shr)
3527 ++ __field(void *, shrink)
3528 ++ __field(long, unused_scan)
3529 ++ __field(long, new_scan)
3530 ++ __field(int, retval)
3531 ++ __field(long, total_scan)
3532 ++ ),
3533 ++
3534 ++ TP_fast_assign(
3535 ++ __entry->shr = shr;
3536 ++ __entry->shrink = shr->shrink;
3537 ++ __entry->unused_scan = unused_scan_cnt;
3538 ++ __entry->new_scan = new_scan_cnt;
3539 ++ __entry->retval = shrinker_retval;
3540 ++ __entry->total_scan = new_scan_cnt - unused_scan_cnt;
3541 ++ ),
3542 ++
3543 ++ TP_printk("%pF %p: unused scan count %ld new scan count %ld total_scan %ld last shrinker return val %d",
3544 ++ __entry->shrink,
3545 ++ __entry->shr,
3546 ++ __entry->unused_scan,
3547 ++ __entry->new_scan,
3548 ++ __entry->total_scan,
3549 ++ __entry->retval)
3550 ++);
3551 +
3552 + DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
3553 +
3554 +@@ -189,7 +266,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
3555 + unsigned long nr_lumpy_taken,
3556 + unsigned long nr_lumpy_dirty,
3557 + unsigned long nr_lumpy_failed,
3558 +- int isolate_mode),
3559 ++ isolate_mode_t isolate_mode),
3560 +
3561 + TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode),
3562 +
3563 +@@ -201,7 +278,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
3564 + __field(unsigned long, nr_lumpy_taken)
3565 + __field(unsigned long, nr_lumpy_dirty)
3566 + __field(unsigned long, nr_lumpy_failed)
3567 +- __field(int, isolate_mode)
3568 ++ __field(isolate_mode_t, isolate_mode)
3569 + ),
3570 +
3571 + TP_fast_assign(
3572 +@@ -235,7 +312,7 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate,
3573 + unsigned long nr_lumpy_taken,
3574 + unsigned long nr_lumpy_dirty,
3575 + unsigned long nr_lumpy_failed,
3576 +- int isolate_mode),
3577 ++ isolate_mode_t isolate_mode),
3578 +
3579 + TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode)
3580 +
3581 +@@ -250,7 +327,7 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate,
3582 + unsigned long nr_lumpy_taken,
3583 + unsigned long nr_lumpy_dirty,
3584 + unsigned long nr_lumpy_failed,
3585 +- int isolate_mode),
3586 ++ isolate_mode_t isolate_mode),
3587 +
3588 + TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode)
3589 +
3590 +diff --git a/kernel/cpuset.c b/kernel/cpuset.c
3591 +index 9c9b754..b2e84bd 100644
3592 +--- a/kernel/cpuset.c
3593 ++++ b/kernel/cpuset.c
3594 +@@ -123,6 +123,19 @@ static inline struct cpuset *task_cs(struct task_struct *task)
3595 + struct cpuset, css);
3596 + }
3597 +
3598 ++#ifdef CONFIG_NUMA
3599 ++static inline bool task_has_mempolicy(struct task_struct *task)
3600 ++{
3601 ++ return task->mempolicy;
3602 ++}
3603 ++#else
3604 ++static inline bool task_has_mempolicy(struct task_struct *task)
3605 ++{
3606 ++ return false;
3607 ++}
3608 ++#endif
3609 ++
3610 ++
3611 + /* bits in struct cpuset flags field */
3612 + typedef enum {
3613 + CS_CPU_EXCLUSIVE,
3614 +@@ -949,7 +962,8 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
3615 + static void cpuset_change_task_nodemask(struct task_struct *tsk,
3616 + nodemask_t *newmems)
3617 + {
3618 +-repeat:
3619 ++ bool need_loop;
3620 ++
3621 + /*
3622 + * Allow tasks that have access to memory reserves because they have
3623 + * been OOM killed to get memory anywhere.
3624 +@@ -960,46 +974,27 @@ repeat:
3625 + return;
3626 +
3627 + task_lock(tsk);
3628 +- nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
3629 +- mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
3630 +-
3631 +-
3632 + /*
3633 +- * ensure checking ->mems_allowed_change_disable after setting all new
3634 +- * allowed nodes.
3635 +- *
3636 +- * the read-side task can see an nodemask with new allowed nodes and
3637 +- * old allowed nodes. and if it allocates page when cpuset clears newly
3638 +- * disallowed ones continuous, it can see the new allowed bits.
3639 +- *
3640 +- * And if setting all new allowed nodes is after the checking, setting
3641 +- * all new allowed nodes and clearing newly disallowed ones will be done
3642 +- * continuous, and the read-side task may find no node to alloc page.
3643 ++ * Determine if a loop is necessary if another thread is doing
3644 ++ * get_mems_allowed(). If at least one node remains unchanged and
3645 ++ * tsk does not have a mempolicy, then an empty nodemask will not be
3646 ++ * possible when mems_allowed is larger than a word.
3647 + */
3648 +- smp_mb();
3649 ++ need_loop = task_has_mempolicy(tsk) ||
3650 ++ !nodes_intersects(*newmems, tsk->mems_allowed);
3651 +
3652 +- /*
3653 +- * Allocation of memory is very fast, we needn't sleep when waiting
3654 +- * for the read-side.
3655 +- */
3656 +- while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
3657 +- task_unlock(tsk);
3658 +- if (!task_curr(tsk))
3659 +- yield();
3660 +- goto repeat;
3661 +- }
3662 ++ if (need_loop)
3663 ++ write_seqcount_begin(&tsk->mems_allowed_seq);
3664 +
3665 +- /*
3666 +- * ensure checking ->mems_allowed_change_disable before clearing all new
3667 +- * disallowed nodes.
3668 +- *
3669 +- * if clearing newly disallowed bits before the checking, the read-side
3670 +- * task may find no node to alloc page.
3671 +- */
3672 +- smp_mb();
3673 ++ nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
3674 ++ mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
3675 +
3676 + mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
3677 + tsk->mems_allowed = *newmems;
3678 ++
3679 ++ if (need_loop)
3680 ++ write_seqcount_end(&tsk->mems_allowed_seq);
3681 ++
3682 + task_unlock(tsk);
3683 + }
3684 +
3685 +diff --git a/kernel/fork.c b/kernel/fork.c
3686 +index 4712e3e..3d42aa3 100644
3687 +--- a/kernel/fork.c
3688 ++++ b/kernel/fork.c
3689 +@@ -985,6 +985,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
3690 + #ifdef CONFIG_CGROUPS
3691 + init_rwsem(&sig->threadgroup_fork_lock);
3692 + #endif
3693 ++#ifdef CONFIG_CPUSETS
3694 ++ seqcount_init(&tsk->mems_allowed_seq);
3695 ++#endif
3696 +
3697 + sig->oom_adj = current->signal->oom_adj;
3698 + sig->oom_score_adj = current->signal->oom_score_adj;
3699 +diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
3700 +index f1eb182..61fc450 100644
3701 +--- a/kernel/time/ntp.c
3702 ++++ b/kernel/time/ntp.c
3703 +@@ -375,7 +375,9 @@ int second_overflow(unsigned long secs)
3704 + time_state = TIME_DEL;
3705 + break;
3706 + case TIME_INS:
3707 +- if (secs % 86400 == 0) {
3708 ++ if (!(time_status & STA_INS))
3709 ++ time_state = TIME_OK;
3710 ++ else if (secs % 86400 == 0) {
3711 + leap = -1;
3712 + time_state = TIME_OOP;
3713 + time_tai++;
3714 +@@ -384,7 +386,9 @@ int second_overflow(unsigned long secs)
3715 + }
3716 + break;
3717 + case TIME_DEL:
3718 +- if ((secs + 1) % 86400 == 0) {
3719 ++ if (!(time_status & STA_DEL))
3720 ++ time_state = TIME_OK;
3721 ++ else if ((secs + 1) % 86400 == 0) {
3722 + leap = 1;
3723 + time_tai--;
3724 + time_state = TIME_WAIT;
3725 +diff --git a/mm/compaction.c b/mm/compaction.c
3726 +index adc5336..8ea7308 100644
3727 +--- a/mm/compaction.c
3728 ++++ b/mm/compaction.c
3729 +@@ -35,10 +35,6 @@ struct compact_control {
3730 + unsigned long migrate_pfn; /* isolate_migratepages search base */
3731 + bool sync; /* Synchronous migration */
3732 +
3733 +- /* Account for isolated anon and file pages */
3734 +- unsigned long nr_anon;
3735 +- unsigned long nr_file;
3736 +-
3737 + unsigned int order; /* order a direct compactor needs */
3738 + int migratetype; /* MOVABLE, RECLAIMABLE etc */
3739 + struct zone *zone;
3740 +@@ -223,17 +219,13 @@ static void isolate_freepages(struct zone *zone,
3741 + static void acct_isolated(struct zone *zone, struct compact_control *cc)
3742 + {
3743 + struct page *page;
3744 +- unsigned int count[NR_LRU_LISTS] = { 0, };
3745 ++ unsigned int count[2] = { 0, };
3746 +
3747 +- list_for_each_entry(page, &cc->migratepages, lru) {
3748 +- int lru = page_lru_base_type(page);
3749 +- count[lru]++;
3750 +- }
3751 ++ list_for_each_entry(page, &cc->migratepages, lru)
3752 ++ count[!!page_is_file_cache(page)]++;
3753 +
3754 +- cc->nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
3755 +- cc->nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
3756 +- __mod_zone_page_state(zone, NR_ISOLATED_ANON, cc->nr_anon);
3757 +- __mod_zone_page_state(zone, NR_ISOLATED_FILE, cc->nr_file);
3758 ++ __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
3759 ++ __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
3760 + }
3761 +
3762 + /* Similar to reclaim, but different enough that they don't share logic */
3763 +@@ -269,6 +261,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
3764 + unsigned long last_pageblock_nr = 0, pageblock_nr;
3765 + unsigned long nr_scanned = 0, nr_isolated = 0;
3766 + struct list_head *migratelist = &cc->migratepages;
3767 ++ isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE;
3768 +
3769 + /* Do not scan outside zone boundaries */
3770 + low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn);
3771 +@@ -378,8 +371,11 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
3772 + continue;
3773 + }
3774 +
3775 ++ if (!cc->sync)
3776 ++ mode |= ISOLATE_ASYNC_MIGRATE;
3777 ++
3778 + /* Try isolate the page */
3779 +- if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0)
3780 ++ if (__isolate_lru_page(page, mode, 0) != 0)
3781 + continue;
3782 +
3783 + VM_BUG_ON(PageTransCompound(page));
3784 +@@ -581,7 +577,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
3785 + nr_migrate = cc->nr_migratepages;
3786 + err = migrate_pages(&cc->migratepages, compaction_alloc,
3787 + (unsigned long)cc, false,
3788 +- cc->sync);
3789 ++ cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
3790 + update_nr_listpages(cc);
3791 + nr_remaining = cc->nr_migratepages;
3792 +
3793 +diff --git a/mm/filemap.c b/mm/filemap.c
3794 +index b7d8603..10481eb 100644
3795 +--- a/mm/filemap.c
3796 ++++ b/mm/filemap.c
3797 +@@ -516,10 +516,13 @@ struct page *__page_cache_alloc(gfp_t gfp)
3798 + struct page *page;
3799 +
3800 + if (cpuset_do_page_mem_spread()) {
3801 +- get_mems_allowed();
3802 +- n = cpuset_mem_spread_node();
3803 +- page = alloc_pages_exact_node(n, gfp, 0);
3804 +- put_mems_allowed();
3805 ++ unsigned int cpuset_mems_cookie;
3806 ++ do {
3807 ++ cpuset_mems_cookie = get_mems_allowed();
3808 ++ n = cpuset_mem_spread_node();
3809 ++ page = alloc_pages_exact_node(n, gfp, 0);
3810 ++ } while (!put_mems_allowed(cpuset_mems_cookie) && !page);
3811 ++
3812 + return page;
3813 + }
3814 + return alloc_pages(gfp, 0);
3815 +diff --git a/mm/hugetlb.c b/mm/hugetlb.c
3816 +index 05f8fd4..ae60a53 100644
3817 +--- a/mm/hugetlb.c
3818 ++++ b/mm/hugetlb.c
3819 +@@ -460,8 +460,10 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
3820 + struct zonelist *zonelist;
3821 + struct zone *zone;
3822 + struct zoneref *z;
3823 ++ unsigned int cpuset_mems_cookie;
3824 +
3825 +- get_mems_allowed();
3826 ++retry_cpuset:
3827 ++ cpuset_mems_cookie = get_mems_allowed();
3828 + zonelist = huge_zonelist(vma, address,
3829 + htlb_alloc_mask, &mpol, &nodemask);
3830 + /*
3831 +@@ -488,10 +490,15 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
3832 + }
3833 + }
3834 + }
3835 +-err:
3836 ++
3837 + mpol_cond_put(mpol);
3838 +- put_mems_allowed();
3839 ++ if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
3840 ++ goto retry_cpuset;
3841 + return page;
3842 ++
3843 ++err:
3844 ++ mpol_cond_put(mpol);
3845 ++ return NULL;
3846 + }
3847 +
3848 + static void update_and_free_page(struct hstate *h, struct page *page)
3849 +diff --git a/mm/memcontrol.c b/mm/memcontrol.c
3850 +index ffb99b4..57cdf5a 100644
3851 +--- a/mm/memcontrol.c
3852 ++++ b/mm/memcontrol.c
3853 +@@ -1251,7 +1251,8 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
3854 + unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
3855 + struct list_head *dst,
3856 + unsigned long *scanned, int order,
3857 +- int mode, struct zone *z,
3858 ++ isolate_mode_t mode,
3859 ++ struct zone *z,
3860 + struct mem_cgroup *mem_cont,
3861 + int active, int file)
3862 + {
3863 +diff --git a/mm/memory-failure.c b/mm/memory-failure.c
3864 +index 740c4f5..6496748 100644
3865 +--- a/mm/memory-failure.c
3866 ++++ b/mm/memory-failure.c
3867 +@@ -1464,7 +1464,7 @@ int soft_offline_page(struct page *page, int flags)
3868 + page_is_file_cache(page));
3869 + list_add(&page->lru, &pagelist);
3870 + ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
3871 +- 0, true);
3872 ++ 0, MIGRATE_SYNC);
3873 + if (ret) {
3874 + putback_lru_pages(&pagelist);
3875 + pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
3876 +diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
3877 +index c46887b..ae5a3f2 100644
3878 +--- a/mm/memory_hotplug.c
3879 ++++ b/mm/memory_hotplug.c
3880 +@@ -747,7 +747,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
3881 + }
3882 + /* this function returns # of failed pages */
3883 + ret = migrate_pages(&source, hotremove_migrate_alloc, 0,
3884 +- true, true);
3885 ++ true, MIGRATE_SYNC);
3886 + if (ret)
3887 + putback_lru_pages(&source);
3888 + }
3889 +diff --git a/mm/mempolicy.c b/mm/mempolicy.c
3890 +index 3dac2d1..cff919f 100644
3891 +--- a/mm/mempolicy.c
3892 ++++ b/mm/mempolicy.c
3893 +@@ -926,7 +926,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
3894 +
3895 + if (!list_empty(&pagelist)) {
3896 + err = migrate_pages(&pagelist, new_node_page, dest,
3897 +- false, true);
3898 ++ false, MIGRATE_SYNC);
3899 + if (err)
3900 + putback_lru_pages(&pagelist);
3901 + }
3902 +@@ -1810,18 +1810,24 @@ struct page *
3903 + alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
3904 + unsigned long addr, int node)
3905 + {
3906 +- struct mempolicy *pol = get_vma_policy(current, vma, addr);
3907 ++ struct mempolicy *pol;
3908 + struct zonelist *zl;
3909 + struct page *page;
3910 ++ unsigned int cpuset_mems_cookie;
3911 ++
3912 ++retry_cpuset:
3913 ++ pol = get_vma_policy(current, vma, addr);
3914 ++ cpuset_mems_cookie = get_mems_allowed();
3915 +
3916 +- get_mems_allowed();
3917 + if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
3918 + unsigned nid;
3919 +
3920 + nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
3921 + mpol_cond_put(pol);
3922 + page = alloc_page_interleave(gfp, order, nid);
3923 +- put_mems_allowed();
3924 ++ if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
3925 ++ goto retry_cpuset;
3926 ++
3927 + return page;
3928 + }
3929 + zl = policy_zonelist(gfp, pol, node);
3930 +@@ -1832,7 +1838,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
3931 + struct page *page = __alloc_pages_nodemask(gfp, order,
3932 + zl, policy_nodemask(gfp, pol));
3933 + __mpol_put(pol);
3934 +- put_mems_allowed();
3935 ++ if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
3936 ++ goto retry_cpuset;
3937 + return page;
3938 + }
3939 + /*
3940 +@@ -1840,7 +1847,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
3941 + */
3942 + page = __alloc_pages_nodemask(gfp, order, zl,
3943 + policy_nodemask(gfp, pol));
3944 +- put_mems_allowed();
3945 ++ if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
3946 ++ goto retry_cpuset;
3947 + return page;
3948 + }
3949 +
3950 +@@ -1867,11 +1875,14 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
3951 + {
3952 + struct mempolicy *pol = current->mempolicy;
3953 + struct page *page;
3954 ++ unsigned int cpuset_mems_cookie;
3955 +
3956 + if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
3957 + pol = &default_policy;
3958 +
3959 +- get_mems_allowed();
3960 ++retry_cpuset:
3961 ++ cpuset_mems_cookie = get_mems_allowed();
3962 ++
3963 + /*
3964 + * No reference counting needed for current->mempolicy
3965 + * nor system default_policy
3966 +@@ -1882,7 +1893,10 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
3967 + page = __alloc_pages_nodemask(gfp, order,
3968 + policy_zonelist(gfp, pol, numa_node_id()),
3969 + policy_nodemask(gfp, pol));
3970 +- put_mems_allowed();
3971 ++
3972 ++ if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
3973 ++ goto retry_cpuset;
3974 ++
3975 + return page;
3976 + }
3977 + EXPORT_SYMBOL(alloc_pages_current);
3978 +diff --git a/mm/migrate.c b/mm/migrate.c
3979 +index 14d0a6a..480714b 100644
3980 +--- a/mm/migrate.c
3981 ++++ b/mm/migrate.c
3982 +@@ -220,6 +220,56 @@ out:
3983 + pte_unmap_unlock(ptep, ptl);
3984 + }
3985 +
3986 ++#ifdef CONFIG_BLOCK
3987 ++/* Returns true if all buffers are successfully locked */
3988 ++static bool buffer_migrate_lock_buffers(struct buffer_head *head,
3989 ++ enum migrate_mode mode)
3990 ++{
3991 ++ struct buffer_head *bh = head;
3992 ++
3993 ++ /* Simple case, sync compaction */
3994 ++ if (mode != MIGRATE_ASYNC) {
3995 ++ do {
3996 ++ get_bh(bh);
3997 ++ lock_buffer(bh);
3998 ++ bh = bh->b_this_page;
3999 ++
4000 ++ } while (bh != head);
4001 ++
4002 ++ return true;
4003 ++ }
4004 ++
4005 ++ /* async case, we cannot block on lock_buffer so use trylock_buffer */
4006 ++ do {
4007 ++ get_bh(bh);
4008 ++ if (!trylock_buffer(bh)) {
4009 ++ /*
4010 ++ * We failed to lock the buffer and cannot stall in
4011 ++ * async migration. Release the taken locks
4012 ++ */
4013 ++ struct buffer_head *failed_bh = bh;
4014 ++ put_bh(failed_bh);
4015 ++ bh = head;
4016 ++ while (bh != failed_bh) {
4017 ++ unlock_buffer(bh);
4018 ++ put_bh(bh);
4019 ++ bh = bh->b_this_page;
4020 ++ }
4021 ++ return false;
4022 ++ }
4023 ++
4024 ++ bh = bh->b_this_page;
4025 ++ } while (bh != head);
4026 ++ return true;
4027 ++}
4028 ++#else
4029 ++static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
4030 ++ enum migrate_mode mode)
4031 ++{
4032 ++ return true;
4033 ++}
4034 ++#endif /* CONFIG_BLOCK */
4035 ++
4036 + /*
4037 + * Replace the page in the mapping.
4038 + *
4039 +@@ -229,7 +279,8 @@ out:
4040 + * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
4041 + */
4042 + static int migrate_page_move_mapping(struct address_space *mapping,
4043 +- struct page *newpage, struct page *page)
4044 ++ struct page *newpage, struct page *page,
4045 ++ struct buffer_head *head, enum migrate_mode mode)
4046 + {
4047 + int expected_count;
4048 + void **pslot;
4049 +@@ -259,6 +310,20 @@ static int migrate_page_move_mapping(struct address_space *mapping,
4050 + }
4051 +
4052 + /*
4053 ++ * In the async migration case of moving a page with buffers, lock the
4054 ++ * buffers using trylock before the mapping is moved. If the mapping
4055 ++ * was moved, we later failed to lock the buffers and could not move
4056 ++ * the mapping back due to an elevated page count, we would have to
4057 ++ * block waiting on other references to be dropped.
4058 ++ */
4059 ++ if (mode == MIGRATE_ASYNC && head &&
4060 ++ !buffer_migrate_lock_buffers(head, mode)) {
4061 ++ page_unfreeze_refs(page, expected_count);
4062 ++ spin_unlock_irq(&mapping->tree_lock);
4063 ++ return -EAGAIN;
4064 ++ }
4065 ++
4066 ++ /*
4067 + * Now we know that no one else is looking at the page.
4068 + */
4069 + get_page(newpage); /* add cache reference */
4070 +@@ -415,13 +480,14 @@ EXPORT_SYMBOL(fail_migrate_page);
4071 + * Pages are locked upon entry and exit.
4072 + */
4073 + int migrate_page(struct address_space *mapping,
4074 +- struct page *newpage, struct page *page)
4075 ++ struct page *newpage, struct page *page,
4076 ++ enum migrate_mode mode)
4077 + {
4078 + int rc;
4079 +
4080 + BUG_ON(PageWriteback(page)); /* Writeback must be complete */
4081 +
4082 +- rc = migrate_page_move_mapping(mapping, newpage, page);
4083 ++ rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode);
4084 +
4085 + if (rc)
4086 + return rc;
4087 +@@ -438,28 +504,28 @@ EXPORT_SYMBOL(migrate_page);
4088 + * exist.
4089 + */
4090 + int buffer_migrate_page(struct address_space *mapping,
4091 +- struct page *newpage, struct page *page)
4092 ++ struct page *newpage, struct page *page, enum migrate_mode mode)
4093 + {
4094 + struct buffer_head *bh, *head;
4095 + int rc;
4096 +
4097 + if (!page_has_buffers(page))
4098 +- return migrate_page(mapping, newpage, page);
4099 ++ return migrate_page(mapping, newpage, page, mode);
4100 +
4101 + head = page_buffers(page);
4102 +
4103 +- rc = migrate_page_move_mapping(mapping, newpage, page);
4104 ++ rc = migrate_page_move_mapping(mapping, newpage, page, head, mode);
4105 +
4106 + if (rc)
4107 + return rc;
4108 +
4109 +- bh = head;
4110 +- do {
4111 +- get_bh(bh);
4112 +- lock_buffer(bh);
4113 +- bh = bh->b_this_page;
4114 +-
4115 +- } while (bh != head);
4116 ++ /*
4117 ++ * In the async case, migrate_page_move_mapping locked the buffers
4118 ++ * with an IRQ-safe spinlock held. In the sync case, the buffers
4119 ++ * need to be locked now
4120 ++ */
4121 ++ if (mode != MIGRATE_ASYNC)
4122 ++ BUG_ON(!buffer_migrate_lock_buffers(head, mode));
4123 +
4124 + ClearPagePrivate(page);
4125 + set_page_private(newpage, page_private(page));
4126 +@@ -536,10 +602,14 @@ static int writeout(struct address_space *mapping, struct page *page)
4127 + * Default handling if a filesystem does not provide a migration function.
4128 + */
4129 + static int fallback_migrate_page(struct address_space *mapping,
4130 +- struct page *newpage, struct page *page)
4131 ++ struct page *newpage, struct page *page, enum migrate_mode mode)
4132 + {
4133 +- if (PageDirty(page))
4134 ++ if (PageDirty(page)) {
4135 ++ /* Only writeback pages in full synchronous migration */
4136 ++ if (mode != MIGRATE_SYNC)
4137 ++ return -EBUSY;
4138 + return writeout(mapping, page);
4139 ++ }
4140 +
4141 + /*
4142 + * Buffers may be managed in a filesystem specific way.
4143 +@@ -549,7 +619,7 @@ static int fallback_migrate_page(struct address_space *mapping,
4144 + !try_to_release_page(page, GFP_KERNEL))
4145 + return -EAGAIN;
4146 +
4147 +- return migrate_page(mapping, newpage, page);
4148 ++ return migrate_page(mapping, newpage, page, mode);
4149 + }
4150 +
4151 + /*
4152 +@@ -564,7 +634,7 @@ static int fallback_migrate_page(struct address_space *mapping,
4153 + * == 0 - success
4154 + */
4155 + static int move_to_new_page(struct page *newpage, struct page *page,
4156 +- int remap_swapcache, bool sync)
4157 ++ int remap_swapcache, enum migrate_mode mode)
4158 + {
4159 + struct address_space *mapping;
4160 + int rc;
4161 +@@ -585,29 +655,18 @@ static int move_to_new_page(struct page *newpage, struct page *page,
4162 +
4163 + mapping = page_mapping(page);
4164 + if (!mapping)
4165 +- rc = migrate_page(mapping, newpage, page);
4166 +- else {
4167 ++ rc = migrate_page(mapping, newpage, page, mode);
4168 ++ else if (mapping->a_ops->migratepage)
4169 + /*
4170 +- * Do not writeback pages if !sync and migratepage is
4171 +- * not pointing to migrate_page() which is nonblocking
4172 +- * (swapcache/tmpfs uses migratepage = migrate_page).
4173 ++ * Most pages have a mapping and most filesystems provide a
4174 ++ * migratepage callback. Anonymous pages are part of swap
4175 ++ * space which also has its own migratepage callback. This
4176 ++ * is the most common path for page migration.
4177 + */
4178 +- if (PageDirty(page) && !sync &&
4179 +- mapping->a_ops->migratepage != migrate_page)
4180 +- rc = -EBUSY;
4181 +- else if (mapping->a_ops->migratepage)
4182 +- /*
4183 +- * Most pages have a mapping and most filesystems
4184 +- * should provide a migration function. Anonymous
4185 +- * pages are part of swap space which also has its
4186 +- * own migration function. This is the most common
4187 +- * path for page migration.
4188 +- */
4189 +- rc = mapping->a_ops->migratepage(mapping,
4190 +- newpage, page);
4191 +- else
4192 +- rc = fallback_migrate_page(mapping, newpage, page);
4193 +- }
4194 ++ rc = mapping->a_ops->migratepage(mapping,
4195 ++ newpage, page, mode);
4196 ++ else
4197 ++ rc = fallback_migrate_page(mapping, newpage, page, mode);
4198 +
4199 + if (rc) {
4200 + newpage->mapping = NULL;
4201 +@@ -621,38 +680,18 @@ static int move_to_new_page(struct page *newpage, struct page *page,
4202 + return rc;
4203 + }
4204 +
4205 +-/*
4206 +- * Obtain the lock on page, remove all ptes and migrate the page
4207 +- * to the newly allocated page in newpage.
4208 +- */
4209 +-static int unmap_and_move(new_page_t get_new_page, unsigned long private,
4210 +- struct page *page, int force, bool offlining, bool sync)
4211 ++static int __unmap_and_move(struct page *page, struct page *newpage,
4212 ++ int force, bool offlining, enum migrate_mode mode)
4213 + {
4214 +- int rc = 0;
4215 +- int *result = NULL;
4216 +- struct page *newpage = get_new_page(page, private, &result);
4217 ++ int rc = -EAGAIN;
4218 + int remap_swapcache = 1;
4219 + int charge = 0;
4220 + struct mem_cgroup *mem;
4221 + struct anon_vma *anon_vma = NULL;
4222 +
4223 +- if (!newpage)
4224 +- return -ENOMEM;
4225 +-
4226 +- if (page_count(page) == 1) {
4227 +- /* page was freed from under us. So we are done. */
4228 +- goto move_newpage;
4229 +- }
4230 +- if (unlikely(PageTransHuge(page)))
4231 +- if (unlikely(split_huge_page(page)))
4232 +- goto move_newpage;
4233 +-
4234 +- /* prepare cgroup just returns 0 or -ENOMEM */
4235 +- rc = -EAGAIN;
4236 +-
4237 + if (!trylock_page(page)) {
4238 +- if (!force || !sync)
4239 +- goto move_newpage;
4240 ++ if (!force || mode == MIGRATE_ASYNC)
4241 ++ goto out;
4242 +
4243 + /*
4244 + * It's not safe for direct compaction to call lock_page.
4245 +@@ -668,7 +707,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
4246 + * altogether.
4247 + */
4248 + if (current->flags & PF_MEMALLOC)
4249 +- goto move_newpage;
4250 ++ goto out;
4251 +
4252 + lock_page(page);
4253 + }
4254 +@@ -697,10 +736,12 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
4255 +
4256 + if (PageWriteback(page)) {
4257 + /*
4258 +- * For !sync, there is no point retrying as the retry loop
4259 +- * is expected to be too short for PageWriteback to be cleared
4260 ++ * Only in the case of a full syncronous migration is it
4261 ++ * necessary to wait for PageWriteback. In the async case,
4262 ++ * the retry loop is too short and in the sync-light case,
4263 ++ * the overhead of stalling is too much
4264 + */
4265 +- if (!sync) {
4266 ++ if (mode != MIGRATE_SYNC) {
4267 + rc = -EBUSY;
4268 + goto uncharge;
4269 + }
4270 +@@ -771,7 +812,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
4271 +
4272 + skip_unmap:
4273 + if (!page_mapped(page))
4274 +- rc = move_to_new_page(newpage, page, remap_swapcache, sync);
4275 ++ rc = move_to_new_page(newpage, page, remap_swapcache, mode);
4276 +
4277 + if (rc && remap_swapcache)
4278 + remove_migration_ptes(page, page);
4279 +@@ -785,27 +826,53 @@ uncharge:
4280 + mem_cgroup_end_migration(mem, page, newpage, rc == 0);
4281 + unlock:
4282 + unlock_page(page);
4283 ++out:
4284 ++ return rc;
4285 ++}
4286 +
4287 +-move_newpage:
4288 ++/*
4289 ++ * Obtain the lock on page, remove all ptes and migrate the page
4290 ++ * to the newly allocated page in newpage.
4291 ++ */
4292 ++static int unmap_and_move(new_page_t get_new_page, unsigned long private,
4293 ++ struct page *page, int force, bool offlining,
4294 ++ enum migrate_mode mode)
4295 ++{
4296 ++ int rc = 0;
4297 ++ int *result = NULL;
4298 ++ struct page *newpage = get_new_page(page, private, &result);
4299 ++
4300 ++ if (!newpage)
4301 ++ return -ENOMEM;
4302 ++
4303 ++ if (page_count(page) == 1) {
4304 ++ /* page was freed from under us. So we are done. */
4305 ++ goto out;
4306 ++ }
4307 ++
4308 ++ if (unlikely(PageTransHuge(page)))
4309 ++ if (unlikely(split_huge_page(page)))
4310 ++ goto out;
4311 ++
4312 ++ rc = __unmap_and_move(page, newpage, force, offlining, mode);
4313 ++out:
4314 + if (rc != -EAGAIN) {
4315 +- /*
4316 +- * A page that has been migrated has all references
4317 +- * removed and will be freed. A page that has not been
4318 +- * migrated will have kepts its references and be
4319 +- * restored.
4320 +- */
4321 +- list_del(&page->lru);
4322 ++ /*
4323 ++ * A page that has been migrated has all references
4324 ++ * removed and will be freed. A page that has not been
4325 ++ * migrated will have kepts its references and be
4326 ++ * restored.
4327 ++ */
4328 ++ list_del(&page->lru);
4329 + dec_zone_page_state(page, NR_ISOLATED_ANON +
4330 + page_is_file_cache(page));
4331 + putback_lru_page(page);
4332 + }
4333 +-
4334 + /*
4335 + * Move the new page to the LRU. If migration was not successful
4336 + * then this will free the page.
4337 + */
4338 + putback_lru_page(newpage);
4339 +-
4340 + if (result) {
4341 + if (rc)
4342 + *result = rc;
4343 +@@ -835,7 +902,8 @@ move_newpage:
4344 + */
4345 + static int unmap_and_move_huge_page(new_page_t get_new_page,
4346 + unsigned long private, struct page *hpage,
4347 +- int force, bool offlining, bool sync)
4348 ++ int force, bool offlining,
4349 ++ enum migrate_mode mode)
4350 + {
4351 + int rc = 0;
4352 + int *result = NULL;
4353 +@@ -848,7 +916,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
4354 + rc = -EAGAIN;
4355 +
4356 + if (!trylock_page(hpage)) {
4357 +- if (!force || !sync)
4358 ++ if (!force || mode != MIGRATE_SYNC)
4359 + goto out;
4360 + lock_page(hpage);
4361 + }
4362 +@@ -859,7 +927,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
4363 + try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
4364 +
4365 + if (!page_mapped(hpage))
4366 +- rc = move_to_new_page(new_hpage, hpage, 1, sync);
4367 ++ rc = move_to_new_page(new_hpage, hpage, 1, mode);
4368 +
4369 + if (rc)
4370 + remove_migration_ptes(hpage, hpage);
4371 +@@ -902,7 +970,7 @@ out:
4372 + */
4373 + int migrate_pages(struct list_head *from,
4374 + new_page_t get_new_page, unsigned long private, bool offlining,
4375 +- bool sync)
4376 ++ enum migrate_mode mode)
4377 + {
4378 + int retry = 1;
4379 + int nr_failed = 0;
4380 +@@ -923,7 +991,7 @@ int migrate_pages(struct list_head *from,
4381 +
4382 + rc = unmap_and_move(get_new_page, private,
4383 + page, pass > 2, offlining,
4384 +- sync);
4385 ++ mode);
4386 +
4387 + switch(rc) {
4388 + case -ENOMEM:
4389 +@@ -953,7 +1021,7 @@ out:
4390 +
4391 + int migrate_huge_pages(struct list_head *from,
4392 + new_page_t get_new_page, unsigned long private, bool offlining,
4393 +- bool sync)
4394 ++ enum migrate_mode mode)
4395 + {
4396 + int retry = 1;
4397 + int nr_failed = 0;
4398 +@@ -970,7 +1038,7 @@ int migrate_huge_pages(struct list_head *from,
4399 +
4400 + rc = unmap_and_move_huge_page(get_new_page,
4401 + private, page, pass > 2, offlining,
4402 +- sync);
4403 ++ mode);
4404 +
4405 + switch(rc) {
4406 + case -ENOMEM:
4407 +@@ -1099,7 +1167,7 @@ set_status:
4408 + err = 0;
4409 + if (!list_empty(&pagelist)) {
4410 + err = migrate_pages(&pagelist, new_page_node,
4411 +- (unsigned long)pm, 0, true);
4412 ++ (unsigned long)pm, 0, MIGRATE_SYNC);
4413 + if (err)
4414 + putback_lru_pages(&pagelist);
4415 + }
4416 +diff --git a/mm/page_alloc.c b/mm/page_alloc.c
4417 +index 947a7e9..9177aa3 100644
4418 +--- a/mm/page_alloc.c
4419 ++++ b/mm/page_alloc.c
4420 +@@ -1897,14 +1897,20 @@ static struct page *
4421 + __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
4422 + struct zonelist *zonelist, enum zone_type high_zoneidx,
4423 + nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
4424 +- int migratetype, unsigned long *did_some_progress,
4425 +- bool sync_migration)
4426 ++ int migratetype, bool sync_migration,
4427 ++ bool *deferred_compaction,
4428 ++ unsigned long *did_some_progress)
4429 + {
4430 + struct page *page;
4431 +
4432 +- if (!order || compaction_deferred(preferred_zone))
4433 ++ if (!order)
4434 + return NULL;
4435 +
4436 ++ if (compaction_deferred(preferred_zone)) {
4437 ++ *deferred_compaction = true;
4438 ++ return NULL;
4439 ++ }
4440 ++
4441 + current->flags |= PF_MEMALLOC;
4442 + *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
4443 + nodemask, sync_migration);
4444 +@@ -1932,7 +1938,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
4445 + * but not enough to satisfy watermarks.
4446 + */
4447 + count_vm_event(COMPACTFAIL);
4448 +- defer_compaction(preferred_zone);
4449 ++
4450 ++ /*
4451 ++ * As async compaction considers a subset of pageblocks, only
4452 ++ * defer if the failure was a sync compaction failure.
4453 ++ */
4454 ++ if (sync_migration)
4455 ++ defer_compaction(preferred_zone);
4456 +
4457 + cond_resched();
4458 + }
4459 +@@ -1944,8 +1956,9 @@ static inline struct page *
4460 + __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
4461 + struct zonelist *zonelist, enum zone_type high_zoneidx,
4462 + nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
4463 +- int migratetype, unsigned long *did_some_progress,
4464 +- bool sync_migration)
4465 ++ int migratetype, bool sync_migration,
4466 ++ bool *deferred_compaction,
4467 ++ unsigned long *did_some_progress)
4468 + {
4469 + return NULL;
4470 + }
4471 +@@ -2095,6 +2108,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
4472 + unsigned long pages_reclaimed = 0;
4473 + unsigned long did_some_progress;
4474 + bool sync_migration = false;
4475 ++ bool deferred_compaction = false;
4476 +
4477 + /*
4478 + * In the slowpath, we sanity check order to avoid ever trying to
4479 +@@ -2175,12 +2189,22 @@ rebalance:
4480 + zonelist, high_zoneidx,
4481 + nodemask,
4482 + alloc_flags, preferred_zone,
4483 +- migratetype, &did_some_progress,
4484 +- sync_migration);
4485 ++ migratetype, sync_migration,
4486 ++ &deferred_compaction,
4487 ++ &did_some_progress);
4488 + if (page)
4489 + goto got_pg;
4490 + sync_migration = true;
4491 +
4492 ++ /*
4493 ++ * If compaction is deferred for high-order allocations, it is because
4494 ++ * sync compaction recently failed. In this is the case and the caller
4495 ++ * has requested the system not be heavily disrupted, fail the
4496 ++ * allocation now instead of entering direct reclaim
4497 ++ */
4498 ++ if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD))
4499 ++ goto nopage;
4500 ++
4501 + /* Try direct reclaim and then allocating */
4502 + page = __alloc_pages_direct_reclaim(gfp_mask, order,
4503 + zonelist, high_zoneidx,
4504 +@@ -2243,8 +2267,9 @@ rebalance:
4505 + zonelist, high_zoneidx,
4506 + nodemask,
4507 + alloc_flags, preferred_zone,
4508 +- migratetype, &did_some_progress,
4509 +- sync_migration);
4510 ++ migratetype, sync_migration,
4511 ++ &deferred_compaction,
4512 ++ &did_some_progress);
4513 + if (page)
4514 + goto got_pg;
4515 + }
4516 +@@ -2268,8 +2293,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
4517 + {
4518 + enum zone_type high_zoneidx = gfp_zone(gfp_mask);
4519 + struct zone *preferred_zone;
4520 +- struct page *page;
4521 ++ struct page *page = NULL;
4522 + int migratetype = allocflags_to_migratetype(gfp_mask);
4523 ++ unsigned int cpuset_mems_cookie;
4524 +
4525 + gfp_mask &= gfp_allowed_mask;
4526 +
4527 +@@ -2288,15 +2314,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
4528 + if (unlikely(!zonelist->_zonerefs->zone))
4529 + return NULL;
4530 +
4531 +- get_mems_allowed();
4532 ++retry_cpuset:
4533 ++ cpuset_mems_cookie = get_mems_allowed();
4534 ++
4535 + /* The preferred zone is used for statistics later */
4536 + first_zones_zonelist(zonelist, high_zoneidx,
4537 + nodemask ? : &cpuset_current_mems_allowed,
4538 + &preferred_zone);
4539 +- if (!preferred_zone) {
4540 +- put_mems_allowed();
4541 +- return NULL;
4542 +- }
4543 ++ if (!preferred_zone)
4544 ++ goto out;
4545 +
4546 + /* First allocation attempt */
4547 + page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
4548 +@@ -2306,9 +2332,19 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
4549 + page = __alloc_pages_slowpath(gfp_mask, order,
4550 + zonelist, high_zoneidx, nodemask,
4551 + preferred_zone, migratetype);
4552 +- put_mems_allowed();
4553 +
4554 + trace_mm_page_alloc(page, order, gfp_mask, migratetype);
4555 ++
4556 ++out:
4557 ++ /*
4558 ++ * When updating a task's mems_allowed, it is possible to race with
4559 ++ * parallel threads in such a way that an allocation can fail while
4560 ++ * the mask is being updated. If a page allocation is about to fail,
4561 ++ * check if the cpuset changed during allocation and if so, retry.
4562 ++ */
4563 ++ if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
4564 ++ goto retry_cpuset;
4565 ++
4566 + return page;
4567 + }
4568 + EXPORT_SYMBOL(__alloc_pages_nodemask);
4569 +@@ -2532,13 +2568,15 @@ void si_meminfo_node(struct sysinfo *val, int nid)
4570 + bool skip_free_areas_node(unsigned int flags, int nid)
4571 + {
4572 + bool ret = false;
4573 ++ unsigned int cpuset_mems_cookie;
4574 +
4575 + if (!(flags & SHOW_MEM_FILTER_NODES))
4576 + goto out;
4577 +
4578 +- get_mems_allowed();
4579 +- ret = !node_isset(nid, cpuset_current_mems_allowed);
4580 +- put_mems_allowed();
4581 ++ do {
4582 ++ cpuset_mems_cookie = get_mems_allowed();
4583 ++ ret = !node_isset(nid, cpuset_current_mems_allowed);
4584 ++ } while (!put_mems_allowed(cpuset_mems_cookie));
4585 + out:
4586 + return ret;
4587 + }
4588 +@@ -3418,25 +3456,33 @@ static void setup_zone_migrate_reserve(struct zone *zone)
4589 + if (page_to_nid(page) != zone_to_nid(zone))
4590 + continue;
4591 +
4592 +- /* Blocks with reserved pages will never free, skip them. */
4593 +- block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
4594 +- if (pageblock_is_reserved(pfn, block_end_pfn))
4595 +- continue;
4596 +-
4597 + block_migratetype = get_pageblock_migratetype(page);
4598 +
4599 +- /* If this block is reserved, account for it */
4600 +- if (reserve > 0 && block_migratetype == MIGRATE_RESERVE) {
4601 +- reserve--;
4602 +- continue;
4603 +- }
4604 ++ /* Only test what is necessary when the reserves are not met */
4605 ++ if (reserve > 0) {
4606 ++ /*
4607 ++ * Blocks with reserved pages will never free, skip
4608 ++ * them.
4609 ++ */
4610 ++ block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
4611 ++ if (pageblock_is_reserved(pfn, block_end_pfn))
4612 ++ continue;
4613 +
4614 +- /* Suitable for reserving if this block is movable */
4615 +- if (reserve > 0 && block_migratetype == MIGRATE_MOVABLE) {
4616 +- set_pageblock_migratetype(page, MIGRATE_RESERVE);
4617 +- move_freepages_block(zone, page, MIGRATE_RESERVE);
4618 +- reserve--;
4619 +- continue;
4620 ++ /* If this block is reserved, account for it */
4621 ++ if (block_migratetype == MIGRATE_RESERVE) {
4622 ++ reserve--;
4623 ++ continue;
4624 ++ }
4625 ++
4626 ++ /* Suitable for reserving if this block is movable */
4627 ++ if (block_migratetype == MIGRATE_MOVABLE) {
4628 ++ set_pageblock_migratetype(page,
4629 ++ MIGRATE_RESERVE);
4630 ++ move_freepages_block(zone, page,
4631 ++ MIGRATE_RESERVE);
4632 ++ reserve--;
4633 ++ continue;
4634 ++ }
4635 + }
4636 +
4637 + /*
4638 +diff --git a/mm/slab.c b/mm/slab.c
4639 +index d96e223..a67f812 100644
4640 +--- a/mm/slab.c
4641 ++++ b/mm/slab.c
4642 +@@ -3218,12 +3218,10 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
4643 + if (in_interrupt() || (flags & __GFP_THISNODE))
4644 + return NULL;
4645 + nid_alloc = nid_here = numa_mem_id();
4646 +- get_mems_allowed();
4647 + if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
4648 + nid_alloc = cpuset_slab_spread_node();
4649 + else if (current->mempolicy)
4650 + nid_alloc = slab_node(current->mempolicy);
4651 +- put_mems_allowed();
4652 + if (nid_alloc != nid_here)
4653 + return ____cache_alloc_node(cachep, flags, nid_alloc);
4654 + return NULL;
4655 +@@ -3246,14 +3244,17 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
4656 + enum zone_type high_zoneidx = gfp_zone(flags);
4657 + void *obj = NULL;
4658 + int nid;
4659 ++ unsigned int cpuset_mems_cookie;
4660 +
4661 + if (flags & __GFP_THISNODE)
4662 + return NULL;
4663 +
4664 +- get_mems_allowed();
4665 +- zonelist = node_zonelist(slab_node(current->mempolicy), flags);
4666 + local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
4667 +
4668 ++retry_cpuset:
4669 ++ cpuset_mems_cookie = get_mems_allowed();
4670 ++ zonelist = node_zonelist(slab_node(current->mempolicy), flags);
4671 ++
4672 + retry:
4673 + /*
4674 + * Look through allowed nodes for objects available
4675 +@@ -3306,7 +3307,9 @@ retry:
4676 + }
4677 + }
4678 + }
4679 +- put_mems_allowed();
4680 ++
4681 ++ if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !obj))
4682 ++ goto retry_cpuset;
4683 + return obj;
4684 + }
4685 +
4686 +diff --git a/mm/slub.c b/mm/slub.c
4687 +index 10ab233..ae6e80e 100644
4688 +--- a/mm/slub.c
4689 ++++ b/mm/slub.c
4690 +@@ -1457,6 +1457,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
4691 + struct zone *zone;
4692 + enum zone_type high_zoneidx = gfp_zone(flags);
4693 + struct page *page;
4694 ++ unsigned int cpuset_mems_cookie;
4695 +
4696 + /*
4697 + * The defrag ratio allows a configuration of the tradeoffs between
4698 +@@ -1480,23 +1481,32 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
4699 + get_cycles() % 1024 > s->remote_node_defrag_ratio)
4700 + return NULL;
4701 +
4702 +- get_mems_allowed();
4703 +- zonelist = node_zonelist(slab_node(current->mempolicy), flags);
4704 +- for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
4705 +- struct kmem_cache_node *n;
4706 +-
4707 +- n = get_node(s, zone_to_nid(zone));
4708 +-
4709 +- if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
4710 +- n->nr_partial > s->min_partial) {
4711 +- page = get_partial_node(n);
4712 +- if (page) {
4713 +- put_mems_allowed();
4714 +- return page;
4715 ++ do {
4716 ++ cpuset_mems_cookie = get_mems_allowed();
4717 ++ zonelist = node_zonelist(slab_node(current->mempolicy), flags);
4718 ++ for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
4719 ++ struct kmem_cache_node *n;
4720 ++
4721 ++ n = get_node(s, zone_to_nid(zone));
4722 ++
4723 ++ if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
4724 ++ n->nr_partial > s->min_partial) {
4725 ++ page = get_partial_node(n);
4726 ++ if (page) {
4727 ++ /*
4728 ++ * Return the object even if
4729 ++ * put_mems_allowed indicated that
4730 ++ * the cpuset mems_allowed was
4731 ++ * updated in parallel. It's a
4732 ++ * harmless race between the alloc
4733 ++ * and the cpuset update.
4734 ++ */
4735 ++ put_mems_allowed(cpuset_mems_cookie);
4736 ++ return page;
4737 ++ }
4738 + }
4739 + }
4740 +- }
4741 +- put_mems_allowed();
4742 ++ } while (!put_mems_allowed(cpuset_mems_cookie));
4743 + #endif
4744 + return NULL;
4745 + }
4746 +diff --git a/mm/vmscan.c b/mm/vmscan.c
4747 +index 1b0ed36..5326f98 100644
4748 +--- a/mm/vmscan.c
4749 ++++ b/mm/vmscan.c
4750 +@@ -248,35 +248,66 @@ unsigned long shrink_slab(struct shrink_control *shrink,
4751 +
4752 + list_for_each_entry(shrinker, &shrinker_list, list) {
4753 + unsigned long long delta;
4754 +- unsigned long total_scan;
4755 +- unsigned long max_pass;
4756 ++ long total_scan;
4757 ++ long max_pass;
4758 ++ int shrink_ret = 0;
4759 ++ long nr;
4760 ++ long new_nr;
4761 +
4762 + max_pass = do_shrinker_shrink(shrinker, shrink, 0);
4763 ++ if (max_pass <= 0)
4764 ++ continue;
4765 ++
4766 ++ /*
4767 ++ * copy the current shrinker scan count into a local variable
4768 ++ * and zero it so that other concurrent shrinker invocations
4769 ++ * don't also do this scanning work.
4770 ++ */
4771 ++ do {
4772 ++ nr = shrinker->nr;
4773 ++ } while (cmpxchg(&shrinker->nr, nr, 0) != nr);
4774 ++
4775 ++ total_scan = nr;
4776 + delta = (4 * nr_pages_scanned) / shrinker->seeks;
4777 + delta *= max_pass;
4778 + do_div(delta, lru_pages + 1);
4779 +- shrinker->nr += delta;
4780 +- if (shrinker->nr < 0) {
4781 ++ total_scan += delta;
4782 ++ if (total_scan < 0) {
4783 + printk(KERN_ERR "shrink_slab: %pF negative objects to "
4784 + "delete nr=%ld\n",
4785 +- shrinker->shrink, shrinker->nr);
4786 +- shrinker->nr = max_pass;
4787 ++ shrinker->shrink, total_scan);
4788 ++ total_scan = max_pass;
4789 + }
4790 +
4791 + /*
4792 ++ * We need to avoid excessive windup on filesystem shrinkers
4793 ++ * due to large numbers of GFP_NOFS allocations causing the
4794 ++ * shrinkers to return -1 all the time. This results in a large
4795 ++ * nr being built up so when a shrink that can do some work
4796 ++ * comes along it empties the entire cache due to nr >>>
4797 ++ * max_pass. This is bad for sustaining a working set in
4798 ++ * memory.
4799 ++ *
4800 ++ * Hence only allow the shrinker to scan the entire cache when
4801 ++ * a large delta change is calculated directly.
4802 ++ */
4803 ++ if (delta < max_pass / 4)
4804 ++ total_scan = min(total_scan, max_pass / 2);
4805 ++
4806 ++ /*
4807 + * Avoid risking looping forever due to too large nr value:
4808 + * never try to free more than twice the estimate number of
4809 + * freeable entries.
4810 + */
4811 +- if (shrinker->nr > max_pass * 2)
4812 +- shrinker->nr = max_pass * 2;
4813 ++ if (total_scan > max_pass * 2)
4814 ++ total_scan = max_pass * 2;
4815 +
4816 +- total_scan = shrinker->nr;
4817 +- shrinker->nr = 0;
4818 ++ trace_mm_shrink_slab_start(shrinker, shrink, nr,
4819 ++ nr_pages_scanned, lru_pages,
4820 ++ max_pass, delta, total_scan);
4821 +
4822 + while (total_scan >= SHRINK_BATCH) {
4823 + long this_scan = SHRINK_BATCH;
4824 +- int shrink_ret;
4825 + int nr_before;
4826 +
4827 + nr_before = do_shrinker_shrink(shrinker, shrink, 0);
4828 +@@ -292,7 +323,19 @@ unsigned long shrink_slab(struct shrink_control *shrink,
4829 + cond_resched();
4830 + }
4831 +
4832 +- shrinker->nr += total_scan;
4833 ++ /*
4834 ++ * move the unused scan count back into the shrinker in a
4835 ++ * manner that handles concurrent updates. If we exhausted the
4836 ++ * scan, there is no need to do an update.
4837 ++ */
4838 ++ do {
4839 ++ nr = shrinker->nr;
4840 ++ new_nr = total_scan + nr;
4841 ++ if (total_scan <= 0)
4842 ++ break;
4843 ++ } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr);
4844 ++
4845 ++ trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
4846 + }
4847 + up_read(&shrinker_rwsem);
4848 + out:
4849 +@@ -683,7 +726,13 @@ static enum page_references page_check_references(struct page *page,
4850 + */
4851 + SetPageReferenced(page);
4852 +
4853 +- if (referenced_page)
4854 ++ if (referenced_page || referenced_ptes > 1)
4855 ++ return PAGEREF_ACTIVATE;
4856 ++
4857 ++ /*
4858 ++ * Activate file-backed executable pages after first usage.
4859 ++ */
4860 ++ if (vm_flags & VM_EXEC)
4861 + return PAGEREF_ACTIVATE;
4862 +
4863 + return PAGEREF_KEEP;
4864 +@@ -972,23 +1021,27 @@ keep_lumpy:
4865 + *
4866 + * returns 0 on success, -ve errno on failure.
4867 + */
4868 +-int __isolate_lru_page(struct page *page, int mode, int file)
4869 ++int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
4870 + {
4871 ++ bool all_lru_mode;
4872 + int ret = -EINVAL;
4873 +
4874 + /* Only take pages on the LRU. */
4875 + if (!PageLRU(page))
4876 + return ret;
4877 +
4878 ++ all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) ==
4879 ++ (ISOLATE_ACTIVE|ISOLATE_INACTIVE);
4880 ++
4881 + /*
4882 + * When checking the active state, we need to be sure we are
4883 + * dealing with comparible boolean values. Take the logical not
4884 + * of each.
4885 + */
4886 +- if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
4887 ++ if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE))
4888 + return ret;
4889 +
4890 +- if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file)
4891 ++ if (!all_lru_mode && !!page_is_file_cache(page) != file)
4892 + return ret;
4893 +
4894 + /*
4895 +@@ -1001,6 +1054,43 @@ int __isolate_lru_page(struct page *page, int mode, int file)
4896 +
4897 + ret = -EBUSY;
4898 +
4899 ++ /*
4900 ++ * To minimise LRU disruption, the caller can indicate that it only
4901 ++ * wants to isolate pages it will be able to operate on without
4902 ++ * blocking - clean pages for the most part.
4903 ++ *
4904 ++ * ISOLATE_CLEAN means that only clean pages should be isolated. This
4905 ++ * is used by reclaim when it is cannot write to backing storage
4906 ++ *
4907 ++ * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
4908 ++ * that it is possible to migrate without blocking
4909 ++ */
4910 ++ if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
4911 ++ /* All the caller can do on PageWriteback is block */
4912 ++ if (PageWriteback(page))
4913 ++ return ret;
4914 ++
4915 ++ if (PageDirty(page)) {
4916 ++ struct address_space *mapping;
4917 ++
4918 ++ /* ISOLATE_CLEAN means only clean pages */
4919 ++ if (mode & ISOLATE_CLEAN)
4920 ++ return ret;
4921 ++
4922 ++ /*
4923 ++ * Only pages without mappings or that have a
4924 ++ * ->migratepage callback are possible to migrate
4925 ++ * without blocking
4926 ++ */
4927 ++ mapping = page_mapping(page);
4928 ++ if (mapping && !mapping->a_ops->migratepage)
4929 ++ return ret;
4930 ++ }
4931 ++ }
4932 ++
4933 ++ if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
4934 ++ return ret;
4935 ++
4936 + if (likely(get_page_unless_zero(page))) {
4937 + /*
4938 + * Be careful not to clear PageLRU until after we're
4939 +@@ -1036,7 +1126,8 @@ int __isolate_lru_page(struct page *page, int mode, int file)
4940 + */
4941 + static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
4942 + struct list_head *src, struct list_head *dst,
4943 +- unsigned long *scanned, int order, int mode, int file)
4944 ++ unsigned long *scanned, int order, isolate_mode_t mode,
4945 ++ int file)
4946 + {
4947 + unsigned long nr_taken = 0;
4948 + unsigned long nr_lumpy_taken = 0;
4949 +@@ -1111,7 +1202,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
4950 + * anon page which don't already have a swap slot is
4951 + * pointless.
4952 + */
4953 +- if (nr_swap_pages <= 0 && PageAnon(cursor_page) &&
4954 ++ if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) &&
4955 + !PageSwapCache(cursor_page))
4956 + break;
4957 +
4958 +@@ -1161,8 +1252,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
4959 + static unsigned long isolate_pages_global(unsigned long nr,
4960 + struct list_head *dst,
4961 + unsigned long *scanned, int order,
4962 +- int mode, struct zone *z,
4963 +- int active, int file)
4964 ++ isolate_mode_t mode,
4965 ++ struct zone *z, int active, int file)
4966 + {
4967 + int lru = LRU_BASE;
4968 + if (active)
4969 +@@ -1408,6 +1499,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
4970 + unsigned long nr_taken;
4971 + unsigned long nr_anon;
4972 + unsigned long nr_file;
4973 ++ isolate_mode_t reclaim_mode = ISOLATE_INACTIVE;
4974 +
4975 + while (unlikely(too_many_isolated(zone, file, sc))) {
4976 + congestion_wait(BLK_RW_ASYNC, HZ/10);
4977 +@@ -1418,15 +1510,21 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
4978 + }
4979 +
4980 + set_reclaim_mode(priority, sc, false);
4981 ++ if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
4982 ++ reclaim_mode |= ISOLATE_ACTIVE;
4983 ++
4984 + lru_add_drain();
4985 ++
4986 ++ if (!sc->may_unmap)
4987 ++ reclaim_mode |= ISOLATE_UNMAPPED;
4988 ++ if (!sc->may_writepage)
4989 ++ reclaim_mode |= ISOLATE_CLEAN;
4990 ++
4991 + spin_lock_irq(&zone->lru_lock);
4992 +
4993 + if (scanning_global_lru(sc)) {
4994 +- nr_taken = isolate_pages_global(nr_to_scan,
4995 +- &page_list, &nr_scanned, sc->order,
4996 +- sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ?
4997 +- ISOLATE_BOTH : ISOLATE_INACTIVE,
4998 +- zone, 0, file);
4999 ++ nr_taken = isolate_pages_global(nr_to_scan, &page_list,
5000 ++ &nr_scanned, sc->order, reclaim_mode, zone, 0, file);
5001 + zone->pages_scanned += nr_scanned;
5002 + if (current_is_kswapd())
5003 + __count_zone_vm_events(PGSCAN_KSWAPD, zone,
5004 +@@ -1435,12 +1533,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
5005 + __count_zone_vm_events(PGSCAN_DIRECT, zone,
5006 + nr_scanned);
5007 + } else {
5008 +- nr_taken = mem_cgroup_isolate_pages(nr_to_scan,
5009 +- &page_list, &nr_scanned, sc->order,
5010 +- sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ?
5011 +- ISOLATE_BOTH : ISOLATE_INACTIVE,
5012 +- zone, sc->mem_cgroup,
5013 +- 0, file);
5014 ++ nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list,
5015 ++ &nr_scanned, sc->order, reclaim_mode, zone,
5016 ++ sc->mem_cgroup, 0, file);
5017 + /*
5018 + * mem_cgroup_isolate_pages() keeps track of
5019 + * scanned pages on its own.
5020 +@@ -1542,19 +1637,26 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
5021 + struct page *page;
5022 + struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
5023 + unsigned long nr_rotated = 0;
5024 ++ isolate_mode_t reclaim_mode = ISOLATE_ACTIVE;
5025 +
5026 + lru_add_drain();
5027 ++
5028 ++ if (!sc->may_unmap)
5029 ++ reclaim_mode |= ISOLATE_UNMAPPED;
5030 ++ if (!sc->may_writepage)
5031 ++ reclaim_mode |= ISOLATE_CLEAN;
5032 ++
5033 + spin_lock_irq(&zone->lru_lock);
5034 + if (scanning_global_lru(sc)) {
5035 + nr_taken = isolate_pages_global(nr_pages, &l_hold,
5036 + &pgscanned, sc->order,
5037 +- ISOLATE_ACTIVE, zone,
5038 ++ reclaim_mode, zone,
5039 + 1, file);
5040 + zone->pages_scanned += pgscanned;
5041 + } else {
5042 + nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold,
5043 + &pgscanned, sc->order,
5044 +- ISOLATE_ACTIVE, zone,
5045 ++ reclaim_mode, zone,
5046 + sc->mem_cgroup, 1, file);
5047 + /*
5048 + * mem_cgroup_isolate_pages() keeps track of
5049 +@@ -1747,23 +1849,16 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
5050 + u64 fraction[2], denominator;
5051 + enum lru_list l;
5052 + int noswap = 0;
5053 +- int force_scan = 0;
5054 ++ bool force_scan = false;
5055 + unsigned long nr_force_scan[2];
5056 +
5057 +-
5058 +- anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
5059 +- zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
5060 +- file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
5061 +- zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
5062 +-
5063 +- if (((anon + file) >> priority) < SWAP_CLUSTER_MAX) {
5064 +- /* kswapd does zone balancing and need to scan this zone */
5065 +- if (scanning_global_lru(sc) && current_is_kswapd())
5066 +- force_scan = 1;
5067 +- /* memcg may have small limit and need to avoid priority drop */
5068 +- if (!scanning_global_lru(sc))
5069 +- force_scan = 1;
5070 +- }
5071 ++ /* kswapd does zone balancing and needs to scan this zone */
5072 ++ if (scanning_global_lru(sc) && current_is_kswapd() &&
5073 ++ zone->all_unreclaimable)
5074 ++ force_scan = true;
5075 ++ /* memcg may have small limit and need to avoid priority drop */
5076 ++ if (!scanning_global_lru(sc))
5077 ++ force_scan = true;
5078 +
5079 + /* If we have no swap space, do not bother scanning anon pages. */
5080 + if (!sc->may_swap || (nr_swap_pages <= 0)) {
5081 +@@ -1776,6 +1871,11 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
5082 + goto out;
5083 + }
5084 +
5085 ++ anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
5086 ++ zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
5087 ++ file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
5088 ++ zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
5089 ++
5090 + if (scanning_global_lru(sc)) {
5091 + free = zone_page_state(zone, NR_FREE_PAGES);
5092 + /* If we have very few page cache pages,
5093 +@@ -1912,8 +2012,9 @@ static inline bool should_continue_reclaim(struct zone *zone,
5094 + * inactive lists are large enough, continue reclaiming
5095 + */
5096 + pages_for_compaction = (2UL << sc->order);
5097 +- inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON) +
5098 +- zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
5099 ++ inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
5100 ++ if (nr_swap_pages > 0)
5101 ++ inactive_lru_pages += zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
5102 + if (sc->nr_reclaimed < pages_for_compaction &&
5103 + inactive_lru_pages > pages_for_compaction)
5104 + return true;
5105 +@@ -1985,6 +2086,42 @@ restart:
5106 + throttle_vm_writeout(sc->gfp_mask);
5107 + }
5108 +
5109 ++/* Returns true if compaction should go ahead for a high-order request */
5110 ++static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
5111 ++{
5112 ++ unsigned long balance_gap, watermark;
5113 ++ bool watermark_ok;
5114 ++
5115 ++ /* Do not consider compaction for orders reclaim is meant to satisfy */
5116 ++ if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
5117 ++ return false;
5118 ++
5119 ++ /*
5120 ++ * Compaction takes time to run and there are potentially other
5121 ++ * callers using the pages just freed. Continue reclaiming until
5122 ++ * there is a buffer of free pages available to give compaction
5123 ++ * a reasonable chance of completing and allocating the page
5124 ++ */
5125 ++ balance_gap = min(low_wmark_pages(zone),
5126 ++ (zone->present_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
5127 ++ KSWAPD_ZONE_BALANCE_GAP_RATIO);
5128 ++ watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
5129 ++ watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
5130 ++
5131 ++ /*
5132 ++ * If compaction is deferred, reclaim up to a point where
5133 ++ * compaction will have a chance of success when re-enabled
5134 ++ */
5135 ++ if (compaction_deferred(zone))
5136 ++ return watermark_ok;
5137 ++
5138 ++ /* If compaction is not ready to start, keep reclaiming */
5139 ++ if (!compaction_suitable(zone, sc->order))
5140 ++ return false;
5141 ++
5142 ++ return watermark_ok;
5143 ++}
5144 ++
5145 + /*
5146 + * This is the direct reclaim path, for page-allocating processes. We only
5147 + * try to reclaim pages from zones which will satisfy the caller's allocation
5148 +@@ -2000,14 +2137,20 @@ restart:
5149 + *
5150 + * If a zone is deemed to be full of pinned pages then just give it a light
5151 + * scan then give up on it.
5152 ++ *
5153 ++ * This function returns true if a zone is being reclaimed for a costly
5154 ++ * high-order allocation and compaction is ready to begin. This indicates to
5155 ++ * the caller that it should consider retrying the allocation instead of
5156 ++ * further reclaim.
5157 + */
5158 +-static void shrink_zones(int priority, struct zonelist *zonelist,
5159 ++static bool shrink_zones(int priority, struct zonelist *zonelist,
5160 + struct scan_control *sc)
5161 + {
5162 + struct zoneref *z;
5163 + struct zone *zone;
5164 + unsigned long nr_soft_reclaimed;
5165 + unsigned long nr_soft_scanned;
5166 ++ bool aborted_reclaim = false;
5167 +
5168 + for_each_zone_zonelist_nodemask(zone, z, zonelist,
5169 + gfp_zone(sc->gfp_mask), sc->nodemask) {
5170 +@@ -2022,6 +2165,21 @@ static void shrink_zones(int priority, struct zonelist *zonelist,
5171 + continue;
5172 + if (zone->all_unreclaimable && priority != DEF_PRIORITY)
5173 + continue; /* Let kswapd poll it */
5174 ++ if (COMPACTION_BUILD) {
5175 ++ /*
5176 ++ * If we already have plenty of memory free for
5177 ++ * compaction in this zone, don't free any more.
5178 ++ * Even though compaction is invoked for any
5179 ++ * non-zero order, only frequent costly order
5180 ++ * reclamation is disruptive enough to become a
5181 ++ * noticable problem, like transparent huge page
5182 ++ * allocations.
5183 ++ */
5184 ++ if (compaction_ready(zone, sc)) {
5185 ++ aborted_reclaim = true;
5186 ++ continue;
5187 ++ }
5188 ++ }
5189 + /*
5190 + * This steals pages from memory cgroups over softlimit
5191 + * and returns the number of reclaimed pages and
5192 +@@ -2039,6 +2197,8 @@ static void shrink_zones(int priority, struct zonelist *zonelist,
5193 +
5194 + shrink_zone(priority, zone, sc);
5195 + }
5196 ++
5197 ++ return aborted_reclaim;
5198 + }
5199 +
5200 + static bool zone_reclaimable(struct zone *zone)
5201 +@@ -2092,8 +2252,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
5202 + struct zoneref *z;
5203 + struct zone *zone;
5204 + unsigned long writeback_threshold;
5205 ++ bool aborted_reclaim;
5206 +
5207 +- get_mems_allowed();
5208 + delayacct_freepages_start();
5209 +
5210 + if (scanning_global_lru(sc))
5211 +@@ -2103,7 +2263,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
5212 + sc->nr_scanned = 0;
5213 + if (!priority)
5214 + disable_swap_token(sc->mem_cgroup);
5215 +- shrink_zones(priority, zonelist, sc);
5216 ++ aborted_reclaim = shrink_zones(priority, zonelist, sc);
5217 ++
5218 + /*
5219 + * Don't shrink slabs when reclaiming memory from
5220 + * over limit cgroups
5221 +@@ -2155,7 +2316,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
5222 +
5223 + out:
5224 + delayacct_freepages_end();
5225 +- put_mems_allowed();
5226 +
5227 + if (sc->nr_reclaimed)
5228 + return sc->nr_reclaimed;
5229 +@@ -2168,6 +2328,10 @@ out:
5230 + if (oom_killer_disabled)
5231 + return 0;
5232 +
5233 ++ /* Aborted reclaim to try compaction? don't OOM, then */
5234 ++ if (aborted_reclaim)
5235 ++ return 1;
5236 ++
5237 + /* top priority shrink_zones still had more to do? don't OOM, then */
5238 + if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc))
5239 + return 1;
5240 +@@ -2459,6 +2623,9 @@ loop_again:
5241 + high_wmark_pages(zone), 0, 0)) {
5242 + end_zone = i;
5243 + break;
5244 ++ } else {
5245 ++ /* If balanced, clear the congested flag */
5246 ++ zone_clear_flag(zone, ZONE_CONGESTED);
5247 + }
5248 + }
5249 + if (i < 0)
5250 +@@ -2695,7 +2862,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
5251 + * them before going back to sleep.
5252 + */
5253 + set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold);
5254 +- schedule();
5255 ++
5256 ++ if (!kthread_should_stop())
5257 ++ schedule();
5258 ++
5259 + set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold);
5260 + } else {
5261 + if (remaining)
5262 +@@ -2722,7 +2892,9 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
5263 + static int kswapd(void *p)
5264 + {
5265 + unsigned long order, new_order;
5266 ++ unsigned balanced_order;
5267 + int classzone_idx, new_classzone_idx;
5268 ++ int balanced_classzone_idx;
5269 + pg_data_t *pgdat = (pg_data_t*)p;
5270 + struct task_struct *tsk = current;
5271 +
5272 +@@ -2753,7 +2925,9 @@ static int kswapd(void *p)
5273 + set_freezable();
5274 +
5275 + order = new_order = 0;
5276 ++ balanced_order = 0;
5277 + classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
5278 ++ balanced_classzone_idx = classzone_idx;
5279 + for ( ; ; ) {
5280 + int ret;
5281 +
5282 +@@ -2762,7 +2936,8 @@ static int kswapd(void *p)
5283 + * new request of a similar or harder type will succeed soon
5284 + * so consider going to sleep on the basis we reclaimed at
5285 + */
5286 +- if (classzone_idx >= new_classzone_idx && order == new_order) {
5287 ++ if (balanced_classzone_idx >= new_classzone_idx &&
5288 ++ balanced_order == new_order) {
5289 + new_order = pgdat->kswapd_max_order;
5290 + new_classzone_idx = pgdat->classzone_idx;
5291 + pgdat->kswapd_max_order = 0;
5292 +@@ -2777,9 +2952,12 @@ static int kswapd(void *p)
5293 + order = new_order;
5294 + classzone_idx = new_classzone_idx;
5295 + } else {
5296 +- kswapd_try_to_sleep(pgdat, order, classzone_idx);
5297 ++ kswapd_try_to_sleep(pgdat, balanced_order,
5298 ++ balanced_classzone_idx);
5299 + order = pgdat->kswapd_max_order;
5300 + classzone_idx = pgdat->classzone_idx;
5301 ++ new_order = order;
5302 ++ new_classzone_idx = classzone_idx;
5303 + pgdat->kswapd_max_order = 0;
5304 + pgdat->classzone_idx = pgdat->nr_zones - 1;
5305 + }
5306 +@@ -2794,7 +2972,9 @@ static int kswapd(void *p)
5307 + */
5308 + if (!ret) {
5309 + trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
5310 +- order = balance_pgdat(pgdat, order, &classzone_idx);
5311 ++ balanced_classzone_idx = classzone_idx;
5312 ++ balanced_order = balance_pgdat(pgdat, order,
5313 ++ &balanced_classzone_idx);
5314 + }
5315 + }
5316 + return 0;
5317 +diff --git a/mm/vmstat.c b/mm/vmstat.c
5318 +index 20c18b7..6559013 100644
5319 +--- a/mm/vmstat.c
5320 ++++ b/mm/vmstat.c
5321 +@@ -78,7 +78,7 @@ void vm_events_fold_cpu(int cpu)
5322 + *
5323 + * vm_stat contains the global counters
5324 + */
5325 +-atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
5326 ++atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
5327 + EXPORT_SYMBOL(vm_stat);
5328 +
5329 + #ifdef CONFIG_SMP