Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:5.4 commit in: /
Date: Tue, 23 Feb 2021 17:01:15
Message-Id: 1614099647.e616fa25700e101a7b268272ca8d29c9f3bb22ae.mpagano@gentoo
1 commit: e616fa25700e101a7b268272ca8d29c9f3bb22ae
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Tue Feb 23 17:00:47 2021 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Tue Feb 23 17:00:47 2021 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=e616fa25
7
8 Remove shiftfs, broken patch, I will fix soon
9
10 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>
11
12 0000_README | 4 -
13 5000_shifts-ubuntu-20.04.patch | 2202 ----------------------------------------
14 2 files changed, 2206 deletions(-)
15
16 diff --git a/0000_README b/0000_README
17 index d459040..23e242b 100644
18 --- a/0000_README
19 +++ b/0000_README
20 @@ -471,10 +471,6 @@ Patch: 4567_distro-Gentoo-Kconfig.patch
21 From: Tom Wijsman <TomWij@g.o>
22 Desc: Add Gentoo Linux support config settings and defaults.
23
24 -Patch: 5000_shifts-ubuntu-20.04.patch
25 -From: https://git.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux/+git/focal
26 -Desc: UID/GID shifting overlay filesystem for containers
27 -
28 Patch: 5011_enable-cpu-optimizations-for-gcc8.patch
29 From: https://github.com/graysky2/kernel_gcc_patch/
30 Desc: Kernel patch for >= gccv8 enables kernel >= v4.13 optimizations for additional CPUs.
31
32 diff --git a/5000_shifts-ubuntu-20.04.patch b/5000_shifts-ubuntu-20.04.patch
33 deleted file mode 100644
34 index f213d93..0000000
35 --- a/5000_shifts-ubuntu-20.04.patch
36 +++ /dev/null
37 @@ -1,2202 +0,0 @@
38 ---- /dev/null 2021-01-06 15:31:07.232620794 -0500
39 -+++ b/fs/shiftfs.c 2021-01-06 19:04:01.754354287 -0500
40 -@@ -0,0 +1,2156 @@
41 -+#include <linux/btrfs.h>
42 -+#include <linux/capability.h>
43 -+#include <linux/cred.h>
44 -+#include <linux/mount.h>
45 -+#include <linux/fdtable.h>
46 -+#include <linux/file.h>
47 -+#include <linux/fs.h>
48 -+#include <linux/namei.h>
49 -+#include <linux/module.h>
50 -+#include <linux/kernel.h>
51 -+#include <linux/magic.h>
52 -+#include <linux/parser.h>
53 -+#include <linux/security.h>
54 -+#include <linux/seq_file.h>
55 -+#include <linux/statfs.h>
56 -+#include <linux/slab.h>
57 -+#include <linux/user_namespace.h>
58 -+#include <linux/uidgid.h>
59 -+#include <linux/xattr.h>
60 -+#include <linux/posix_acl.h>
61 -+#include <linux/posix_acl_xattr.h>
62 -+#include <linux/uio.h>
63 -+
64 -+struct shiftfs_super_info {
65 -+ struct vfsmount *mnt;
66 -+ struct user_namespace *userns;
67 -+ /* creds of process who created the super block */
68 -+ const struct cred *creator_cred;
69 -+ bool mark;
70 -+ unsigned int passthrough;
71 -+ unsigned int passthrough_mark;
72 -+};
73 -+
74 -+static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
75 -+ umode_t mode, dev_t dev, struct dentry *dentry);
76 -+
77 -+#define SHIFTFS_PASSTHROUGH_NONE 0
78 -+#define SHIFTFS_PASSTHROUGH_STAT 1
79 -+#define SHIFTFS_PASSTHROUGH_IOCTL 2
80 -+#define SHIFTFS_PASSTHROUGH_ALL \
81 -+ (SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL)
82 -+
83 -+static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info)
84 -+{
85 -+ if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL))
86 -+ return false;
87 -+
88 -+ return true;
89 -+}
90 -+
91 -+static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info)
92 -+{
93 -+ if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT))
94 -+ return false;
95 -+
96 -+ return true;
97 -+}
98 -+
99 -+enum {
100 -+ OPT_MARK,
101 -+ OPT_PASSTHROUGH,
102 -+ OPT_LAST,
103 -+};
104 -+
105 -+/* global filesystem options */
106 -+static const match_table_t tokens = {
107 -+ { OPT_MARK, "mark" },
108 -+ { OPT_PASSTHROUGH, "passthrough=%u" },
109 -+ { OPT_LAST, NULL }
110 -+};
111 -+
112 -+static const struct cred *shiftfs_override_creds(const struct super_block *sb)
113 -+{
114 -+ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
115 -+
116 -+ return override_creds(sbinfo->creator_cred);
117 -+}
118 -+
119 -+static inline void shiftfs_revert_object_creds(const struct cred *oldcred,
120 -+ struct cred *newcred)
121 -+{
122 -+ revert_creds(oldcred);
123 -+ put_cred(newcred);
124 -+}
125 -+
126 -+static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to,
127 -+ kuid_t kuid)
128 -+{
129 -+ uid_t uid = from_kuid(from, kuid);
130 -+ return make_kuid(to, uid);
131 -+}
132 -+
133 -+static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to,
134 -+ kgid_t kgid)
135 -+{
136 -+ gid_t gid = from_kgid(from, kgid);
137 -+ return make_kgid(to, gid);
138 -+}
139 -+
140 -+static int shiftfs_override_object_creds(const struct super_block *sb,
141 -+ const struct cred **oldcred,
142 -+ struct cred **newcred,
143 -+ struct dentry *dentry, umode_t mode,
144 -+ bool hardlink)
145 -+{
146 -+ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
147 -+ kuid_t fsuid = current_fsuid();
148 -+ kgid_t fsgid = current_fsgid();
149 -+
150 -+ *oldcred = shiftfs_override_creds(sb);
151 -+
152 -+ *newcred = prepare_creds();
153 -+ if (!*newcred) {
154 -+ revert_creds(*oldcred);
155 -+ return -ENOMEM;
156 -+ }
157 -+
158 -+ (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
159 -+ (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
160 -+
161 -+ if (!hardlink) {
162 -+ int err = security_dentry_create_files_as(dentry, mode,
163 -+ &dentry->d_name,
164 -+ *oldcred, *newcred);
165 -+ if (err) {
166 -+ shiftfs_revert_object_creds(*oldcred, *newcred);
167 -+ return err;
168 -+ }
169 -+ }
170 -+
171 -+ put_cred(override_creds(*newcred));
172 -+ return 0;
173 -+}
174 -+
175 -+static void shiftfs_copyattr(struct inode *from, struct inode *to)
176 -+{
177 -+ struct user_namespace *from_ns = from->i_sb->s_user_ns;
178 -+ struct user_namespace *to_ns = to->i_sb->s_user_ns;
179 -+
180 -+ to->i_uid = shift_kuid(from_ns, to_ns, from->i_uid);
181 -+ to->i_gid = shift_kgid(from_ns, to_ns, from->i_gid);
182 -+ to->i_mode = from->i_mode;
183 -+ to->i_atime = from->i_atime;
184 -+ to->i_mtime = from->i_mtime;
185 -+ to->i_ctime = from->i_ctime;
186 -+ i_size_write(to, i_size_read(from));
187 -+}
188 -+
189 -+static void shiftfs_copyflags(struct inode *from, struct inode *to)
190 -+{
191 -+ unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
192 -+
193 -+ inode_set_flags(to, from->i_flags & mask, mask);
194 -+}
195 -+
196 -+static void shiftfs_file_accessed(struct file *file)
197 -+{
198 -+ struct inode *upperi, *loweri;
199 -+
200 -+ if (file->f_flags & O_NOATIME)
201 -+ return;
202 -+
203 -+ upperi = file_inode(file);
204 -+ loweri = upperi->i_private;
205 -+
206 -+ if (!loweri)
207 -+ return;
208 -+
209 -+ upperi->i_mtime = loweri->i_mtime;
210 -+ upperi->i_ctime = loweri->i_ctime;
211 -+
212 -+ touch_atime(&file->f_path);
213 -+}
214 -+
215 -+static int shiftfs_parse_mount_options(struct shiftfs_super_info *sbinfo,
216 -+ char *options)
217 -+{
218 -+ char *p;
219 -+ substring_t args[MAX_OPT_ARGS];
220 -+
221 -+ sbinfo->mark = false;
222 -+ sbinfo->passthrough = 0;
223 -+
224 -+ while ((p = strsep(&options, ",")) != NULL) {
225 -+ int err, intarg, token;
226 -+
227 -+ if (!*p)
228 -+ continue;
229 -+
230 -+ token = match_token(p, tokens, args);
231 -+ switch (token) {
232 -+ case OPT_MARK:
233 -+ sbinfo->mark = true;
234 -+ break;
235 -+ case OPT_PASSTHROUGH:
236 -+ err = match_int(&args[0], &intarg);
237 -+ if (err)
238 -+ return err;
239 -+
240 -+ if (intarg & ~SHIFTFS_PASSTHROUGH_ALL)
241 -+ return -EINVAL;
242 -+
243 -+ sbinfo->passthrough = intarg;
244 -+ break;
245 -+ default:
246 -+ return -EINVAL;
247 -+ }
248 -+ }
249 -+
250 -+ return 0;
251 -+}
252 -+
253 -+static void shiftfs_d_release(struct dentry *dentry)
254 -+{
255 -+ struct dentry *lowerd = dentry->d_fsdata;
256 -+
257 -+ if (lowerd)
258 -+ dput(lowerd);
259 -+}
260 -+
261 -+static struct dentry *shiftfs_d_real(struct dentry *dentry,
262 -+ const struct inode *inode)
263 -+{
264 -+ struct dentry *lowerd = dentry->d_fsdata;
265 -+
266 -+ if (inode && d_inode(dentry) == inode)
267 -+ return dentry;
268 -+
269 -+ lowerd = d_real(lowerd, inode);
270 -+ if (lowerd && (!inode || inode == d_inode(lowerd)))
271 -+ return lowerd;
272 -+
273 -+ WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
274 -+ inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
275 -+ return dentry;
276 -+}
277 -+
278 -+static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags)
279 -+{
280 -+ int err = 1;
281 -+ struct dentry *lowerd = dentry->d_fsdata;
282 -+
283 -+ if (d_is_negative(lowerd) != d_is_negative(dentry))
284 -+ return 0;
285 -+
286 -+ if ((lowerd->d_flags & DCACHE_OP_WEAK_REVALIDATE))
287 -+ err = lowerd->d_op->d_weak_revalidate(lowerd, flags);
288 -+
289 -+ if (d_really_is_positive(dentry)) {
290 -+ struct inode *inode = d_inode(dentry);
291 -+ struct inode *loweri = d_inode(lowerd);
292 -+
293 -+ shiftfs_copyattr(loweri, inode);
294 -+ }
295 -+
296 -+ return err;
297 -+}
298 -+
299 -+static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags)
300 -+{
301 -+ int err = 1;
302 -+ struct dentry *lowerd = dentry->d_fsdata;
303 -+
304 -+ if (d_unhashed(lowerd) ||
305 -+ ((d_is_negative(lowerd) != d_is_negative(dentry))))
306 -+ return 0;
307 -+
308 -+ if (flags & LOOKUP_RCU)
309 -+ return -ECHILD;
310 -+
311 -+ if ((lowerd->d_flags & DCACHE_OP_REVALIDATE))
312 -+ err = lowerd->d_op->d_revalidate(lowerd, flags);
313 -+
314 -+ if (d_really_is_positive(dentry)) {
315 -+ struct inode *inode = d_inode(dentry);
316 -+ struct inode *loweri = d_inode(lowerd);
317 -+
318 -+ shiftfs_copyattr(loweri, inode);
319 -+ }
320 -+
321 -+ return err;
322 -+}
323 -+
324 -+static const struct dentry_operations shiftfs_dentry_ops = {
325 -+ .d_release = shiftfs_d_release,
326 -+ .d_real = shiftfs_d_real,
327 -+ .d_revalidate = shiftfs_d_revalidate,
328 -+ .d_weak_revalidate = shiftfs_d_weak_revalidate,
329 -+};
330 -+
331 -+static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode,
332 -+ struct delayed_call *done)
333 -+{
334 -+ const char *p;
335 -+ const struct cred *oldcred;
336 -+ struct dentry *lowerd;
337 -+
338 -+ /* RCU lookup not supported */
339 -+ if (!dentry)
340 -+ return ERR_PTR(-ECHILD);
341 -+
342 -+ lowerd = dentry->d_fsdata;
343 -+ oldcred = shiftfs_override_creds(dentry->d_sb);
344 -+ p = vfs_get_link(lowerd, done);
345 -+ revert_creds(oldcred);
346 -+
347 -+ return p;
348 -+}
349 -+
350 -+static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode,
351 -+ const char *name, const void *value,
352 -+ size_t size, int flags)
353 -+{
354 -+ struct dentry *lowerd = dentry->d_fsdata;
355 -+ int err;
356 -+ const struct cred *oldcred;
357 -+
358 -+ oldcred = shiftfs_override_creds(dentry->d_sb);
359 -+ err = vfs_setxattr(lowerd, name, value, size, flags);
360 -+ revert_creds(oldcred);
361 -+
362 -+ shiftfs_copyattr(lowerd->d_inode, inode);
363 -+
364 -+ return err;
365 -+}
366 -+
367 -+static int shiftfs_xattr_get(const struct xattr_handler *handler,
368 -+ struct dentry *dentry, struct inode *inode,
369 -+ const char *name, void *value, size_t size)
370 -+{
371 -+ struct dentry *lowerd = dentry->d_fsdata;
372 -+ int err;
373 -+ const struct cred *oldcred;
374 -+
375 -+ oldcred = shiftfs_override_creds(dentry->d_sb);
376 -+ err = vfs_getxattr(lowerd, name, value, size);
377 -+ revert_creds(oldcred);
378 -+
379 -+ return err;
380 -+}
381 -+
382 -+static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list,
383 -+ size_t size)
384 -+{
385 -+ struct dentry *lowerd = dentry->d_fsdata;
386 -+ int err;
387 -+ const struct cred *oldcred;
388 -+
389 -+ oldcred = shiftfs_override_creds(dentry->d_sb);
390 -+ err = vfs_listxattr(lowerd, list, size);
391 -+ revert_creds(oldcred);
392 -+
393 -+ return err;
394 -+}
395 -+
396 -+static int shiftfs_removexattr(struct dentry *dentry, const char *name)
397 -+{
398 -+ struct dentry *lowerd = dentry->d_fsdata;
399 -+ int err;
400 -+ const struct cred *oldcred;
401 -+
402 -+ oldcred = shiftfs_override_creds(dentry->d_sb);
403 -+ err = vfs_removexattr(lowerd, name);
404 -+ revert_creds(oldcred);
405 -+
406 -+ /* update c/mtime */
407 -+ shiftfs_copyattr(lowerd->d_inode, d_inode(dentry));
408 -+
409 -+ return err;
410 -+}
411 -+
412 -+static int shiftfs_xattr_set(const struct xattr_handler *handler,
413 -+ struct dentry *dentry, struct inode *inode,
414 -+ const char *name, const void *value, size_t size,
415 -+ int flags)
416 -+{
417 -+ if (!value)
418 -+ return shiftfs_removexattr(dentry, name);
419 -+ return shiftfs_setxattr(dentry, inode, name, value, size, flags);
420 -+}
421 -+
422 -+static int shiftfs_inode_test(struct inode *inode, void *data)
423 -+{
424 -+ return inode->i_private == data;
425 -+}
426 -+
427 -+static int shiftfs_inode_set(struct inode *inode, void *data)
428 -+{
429 -+ inode->i_private = data;
430 -+ return 0;
431 -+}
432 -+
433 -+static int shiftfs_create_object(struct inode *diri, struct dentry *dentry,
434 -+ umode_t mode, const char *symlink,
435 -+ struct dentry *hardlink, bool excl)
436 -+{
437 -+ int err;
438 -+ const struct cred *oldcred;
439 -+ struct cred *newcred;
440 -+ void *loweri_iop_ptr = NULL;
441 -+ umode_t modei = mode;
442 -+ struct super_block *dir_sb = diri->i_sb;
443 -+ struct dentry *lowerd_new = dentry->d_fsdata;
444 -+ struct inode *inode = NULL, *loweri_dir = diri->i_private;
445 -+ const struct inode_operations *loweri_dir_iop = loweri_dir->i_op;
446 -+ struct dentry *lowerd_link = NULL;
447 -+
448 -+ if (hardlink) {
449 -+ loweri_iop_ptr = loweri_dir_iop->link;
450 -+ } else {
451 -+ switch (mode & S_IFMT) {
452 -+ case S_IFDIR:
453 -+ loweri_iop_ptr = loweri_dir_iop->mkdir;
454 -+ break;
455 -+ case S_IFREG:
456 -+ loweri_iop_ptr = loweri_dir_iop->create;
457 -+ break;
458 -+ case S_IFLNK:
459 -+ loweri_iop_ptr = loweri_dir_iop->symlink;
460 -+ break;
461 -+ case S_IFSOCK:
462 -+ /* fall through */
463 -+ case S_IFIFO:
464 -+ loweri_iop_ptr = loweri_dir_iop->mknod;
465 -+ break;
466 -+ }
467 -+ }
468 -+ if (!loweri_iop_ptr) {
469 -+ err = -EINVAL;
470 -+ goto out_iput;
471 -+ }
472 -+
473 -+ inode_lock_nested(loweri_dir, I_MUTEX_PARENT);
474 -+
475 -+ if (!hardlink) {
476 -+ inode = new_inode(dir_sb);
477 -+ if (!inode) {
478 -+ err = -ENOMEM;
479 -+ goto out_iput;
480 -+ }
481 -+
482 -+ /*
483 -+ * new_inode() will have added the new inode to the super
484 -+ * block's list of inodes. Further below we will call
485 -+ * inode_insert5() Which would perform the same operation again
486 -+ * thereby corrupting the list. To avoid this raise I_CREATING
487 -+ * in i_state which will cause inode_insert5() to skip this
488 -+ * step. I_CREATING will be cleared by d_instantiate_new()
489 -+ * below.
490 -+ */
491 -+ spin_lock(&inode->i_lock);
492 -+ inode->i_state |= I_CREATING;
493 -+ spin_unlock(&inode->i_lock);
494 -+
495 -+ inode_init_owner(inode, diri, mode);
496 -+ modei = inode->i_mode;
497 -+ }
498 -+
499 -+ err = shiftfs_override_object_creds(dentry->d_sb, &oldcred, &newcred,
500 -+ dentry, modei, hardlink != NULL);
501 -+ if (err)
502 -+ goto out_iput;
503 -+
504 -+ if (hardlink) {
505 -+ lowerd_link = hardlink->d_fsdata;
506 -+ err = vfs_link(lowerd_link, loweri_dir, lowerd_new, NULL);
507 -+ } else {
508 -+ switch (modei & S_IFMT) {
509 -+ case S_IFDIR:
510 -+ err = vfs_mkdir(loweri_dir, lowerd_new, modei);
511 -+ break;
512 -+ case S_IFREG:
513 -+ err = vfs_create(loweri_dir, lowerd_new, modei, excl);
514 -+ break;
515 -+ case S_IFLNK:
516 -+ err = vfs_symlink(loweri_dir, lowerd_new, symlink);
517 -+ break;
518 -+ case S_IFSOCK:
519 -+ /* fall through */
520 -+ case S_IFIFO:
521 -+ err = vfs_mknod(loweri_dir, lowerd_new, modei, 0);
522 -+ break;
523 -+ default:
524 -+ err = -EINVAL;
525 -+ break;
526 -+ }
527 -+ }
528 -+
529 -+ shiftfs_revert_object_creds(oldcred, newcred);
530 -+
531 -+ if (!err && WARN_ON(!lowerd_new->d_inode))
532 -+ err = -EIO;
533 -+ if (err)
534 -+ goto out_iput;
535 -+
536 -+ if (hardlink) {
537 -+ inode = d_inode(hardlink);
538 -+ ihold(inode);
539 -+
540 -+ /* copy up times from lower inode */
541 -+ shiftfs_copyattr(d_inode(lowerd_link), inode);
542 -+ set_nlink(d_inode(hardlink), d_inode(lowerd_link)->i_nlink);
543 -+ d_instantiate(dentry, inode);
544 -+ } else {
545 -+ struct inode *inode_tmp;
546 -+ struct inode *loweri_new = d_inode(lowerd_new);
547 -+
548 -+ inode_tmp = inode_insert5(inode, (unsigned long)loweri_new,
549 -+ shiftfs_inode_test, shiftfs_inode_set,
550 -+ loweri_new);
551 -+ if (unlikely(inode_tmp != inode)) {
552 -+ pr_err_ratelimited("shiftfs: newly created inode found in cache\n");
553 -+ iput(inode_tmp);
554 -+ err = -EINVAL;
555 -+ goto out_iput;
556 -+ }
557 -+
558 -+ ihold(loweri_new);
559 -+ shiftfs_fill_inode(inode, loweri_new->i_ino, loweri_new->i_mode,
560 -+ 0, lowerd_new);
561 -+ d_instantiate_new(dentry, inode);
562 -+ }
563 -+
564 -+ shiftfs_copyattr(loweri_dir, diri);
565 -+ if (loweri_iop_ptr == loweri_dir_iop->mkdir)
566 -+ set_nlink(diri, loweri_dir->i_nlink);
567 -+
568 -+ inode = NULL;
569 -+
570 -+out_iput:
571 -+ iput(inode);
572 -+ inode_unlock(loweri_dir);
573 -+
574 -+ return err;
575 -+}
576 -+
577 -+static int shiftfs_create(struct inode *dir, struct dentry *dentry,
578 -+ umode_t mode, bool excl)
579 -+{
580 -+ mode |= S_IFREG;
581 -+
582 -+ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl);
583 -+}
584 -+
585 -+static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry,
586 -+ umode_t mode)
587 -+{
588 -+ mode |= S_IFDIR;
589 -+
590 -+ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
591 -+}
592 -+
593 -+static int shiftfs_link(struct dentry *hardlink, struct inode *dir,
594 -+ struct dentry *dentry)
595 -+{
596 -+ return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false);
597 -+}
598 -+
599 -+static int shiftfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
600 -+ dev_t rdev)
601 -+{
602 -+ if (!S_ISFIFO(mode) && !S_ISSOCK(mode))
603 -+ return -EPERM;
604 -+
605 -+ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
606 -+}
607 -+
608 -+static int shiftfs_symlink(struct inode *dir, struct dentry *dentry,
609 -+ const char *symlink)
610 -+{
611 -+ return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false);
612 -+}
613 -+
614 -+static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir)
615 -+{
616 -+ struct dentry *lowerd = dentry->d_fsdata;
617 -+ struct inode *loweri = dir->i_private;
618 -+ struct inode *inode = d_inode(dentry);
619 -+ int err;
620 -+ const struct cred *oldcred;
621 -+
622 -+ dget(lowerd);
623 -+ oldcred = shiftfs_override_creds(dentry->d_sb);
624 -+ inode_lock_nested(loweri, I_MUTEX_PARENT);
625 -+ if (rmdir)
626 -+ err = vfs_rmdir(loweri, lowerd);
627 -+ else
628 -+ err = vfs_unlink(loweri, lowerd, NULL);
629 -+ revert_creds(oldcred);
630 -+
631 -+ if (!err) {
632 -+ d_drop(dentry);
633 -+
634 -+ if (rmdir)
635 -+ clear_nlink(inode);
636 -+ else
637 -+ drop_nlink(inode);
638 -+ }
639 -+ inode_unlock(loweri);
640 -+
641 -+ shiftfs_copyattr(loweri, dir);
642 -+ dput(lowerd);
643 -+
644 -+ return err;
645 -+}
646 -+
647 -+static int shiftfs_unlink(struct inode *dir, struct dentry *dentry)
648 -+{
649 -+ return shiftfs_rm(dir, dentry, false);
650 -+}
651 -+
652 -+static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry)
653 -+{
654 -+ return shiftfs_rm(dir, dentry, true);
655 -+}
656 -+
657 -+static int shiftfs_rename(struct inode *olddir, struct dentry *old,
658 -+ struct inode *newdir, struct dentry *new,
659 -+ unsigned int flags)
660 -+{
661 -+ struct dentry *lowerd_dir_old = old->d_parent->d_fsdata,
662 -+ *lowerd_dir_new = new->d_parent->d_fsdata,
663 -+ *lowerd_old = old->d_fsdata, *lowerd_new = new->d_fsdata,
664 -+ *trapd;
665 -+ struct inode *loweri_dir_old = lowerd_dir_old->d_inode,
666 -+ *loweri_dir_new = lowerd_dir_new->d_inode;
667 -+ int err = -EINVAL;
668 -+ const struct cred *oldcred;
669 -+
670 -+ trapd = lock_rename(lowerd_dir_new, lowerd_dir_old);
671 -+
672 -+ if (trapd == lowerd_old || trapd == lowerd_new)
673 -+ goto out_unlock;
674 -+
675 -+ oldcred = shiftfs_override_creds(old->d_sb);
676 -+ err = vfs_rename(loweri_dir_old, lowerd_old, loweri_dir_new, lowerd_new,
677 -+ NULL, flags);
678 -+ revert_creds(oldcred);
679 -+
680 -+ shiftfs_copyattr(loweri_dir_old, olddir);
681 -+ shiftfs_copyattr(loweri_dir_new, newdir);
682 -+
683 -+out_unlock:
684 -+ unlock_rename(lowerd_dir_new, lowerd_dir_old);
685 -+
686 -+ return err;
687 -+}
688 -+
689 -+static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry,
690 -+ unsigned int flags)
691 -+{
692 -+ struct dentry *new;
693 -+ struct inode *newi;
694 -+ const struct cred *oldcred;
695 -+ struct dentry *lowerd = dentry->d_parent->d_fsdata;
696 -+ struct inode *inode = NULL, *loweri = lowerd->d_inode;
697 -+
698 -+ inode_lock(loweri);
699 -+ oldcred = shiftfs_override_creds(dentry->d_sb);
700 -+ new = lookup_one_len(dentry->d_name.name, lowerd, dentry->d_name.len);
701 -+ revert_creds(oldcred);
702 -+ inode_unlock(loweri);
703 -+
704 -+ if (IS_ERR(new))
705 -+ return new;
706 -+
707 -+ dentry->d_fsdata = new;
708 -+
709 -+ newi = new->d_inode;
710 -+ if (!newi)
711 -+ goto out;
712 -+
713 -+ inode = iget5_locked(dentry->d_sb, (unsigned long)newi,
714 -+ shiftfs_inode_test, shiftfs_inode_set, newi);
715 -+ if (!inode) {
716 -+ dput(new);
717 -+ return ERR_PTR(-ENOMEM);
718 -+ }
719 -+ if (inode->i_state & I_NEW) {
720 -+ /*
721 -+ * inode->i_private set by shiftfs_inode_set(), but we still
722 -+ * need to take a reference
723 -+ */
724 -+ ihold(newi);
725 -+ shiftfs_fill_inode(inode, newi->i_ino, newi->i_mode, 0, new);
726 -+ unlock_new_inode(inode);
727 -+ }
728 -+
729 -+out:
730 -+ return d_splice_alias(inode, dentry);
731 -+}
732 -+
733 -+static int shiftfs_permission(struct inode *inode, int mask)
734 -+{
735 -+ int err;
736 -+ const struct cred *oldcred;
737 -+ struct inode *loweri = inode->i_private;
738 -+
739 -+ if (!loweri) {
740 -+ WARN_ON(!(mask & MAY_NOT_BLOCK));
741 -+ return -ECHILD;
742 -+ }
743 -+
744 -+ err = generic_permission(inode, mask);
745 -+ if (err)
746 -+ return err;
747 -+
748 -+ oldcred = shiftfs_override_creds(inode->i_sb);
749 -+ err = inode_permission(loweri, mask);
750 -+ revert_creds(oldcred);
751 -+
752 -+ return err;
753 -+}
754 -+
755 -+static int shiftfs_fiemap(struct inode *inode,
756 -+ struct fiemap_extent_info *fieinfo, u64 start,
757 -+ u64 len)
758 -+{
759 -+ int err;
760 -+ const struct cred *oldcred;
761 -+ struct inode *loweri = inode->i_private;
762 -+
763 -+ if (!loweri->i_op->fiemap)
764 -+ return -EOPNOTSUPP;
765 -+
766 -+ oldcred = shiftfs_override_creds(inode->i_sb);
767 -+ if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
768 -+ filemap_write_and_wait(loweri->i_mapping);
769 -+ err = loweri->i_op->fiemap(loweri, fieinfo, start, len);
770 -+ revert_creds(oldcred);
771 -+
772 -+ return err;
773 -+}
774 -+
775 -+static int shiftfs_tmpfile(struct inode *dir, struct dentry *dentry,
776 -+ umode_t mode)
777 -+{
778 -+ int err;
779 -+ const struct cred *oldcred;
780 -+ struct dentry *lowerd = dentry->d_fsdata;
781 -+ struct inode *loweri = dir->i_private;
782 -+
783 -+ if (!loweri->i_op->tmpfile)
784 -+ return -EOPNOTSUPP;
785 -+
786 -+ oldcred = shiftfs_override_creds(dir->i_sb);
787 -+ err = loweri->i_op->tmpfile(loweri, lowerd, mode);
788 -+ revert_creds(oldcred);
789 -+
790 -+ return err;
791 -+}
792 -+
793 -+static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr)
794 -+{
795 -+ struct dentry *lowerd = dentry->d_fsdata;
796 -+ struct inode *loweri = lowerd->d_inode;
797 -+ struct iattr newattr;
798 -+ const struct cred *oldcred;
799 -+ struct super_block *sb = dentry->d_sb;
800 -+ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
801 -+ int err;
802 -+
803 -+ err = setattr_prepare(dentry, attr);
804 -+ if (err)
805 -+ return err;
806 -+
807 -+ newattr = *attr;
808 -+ newattr.ia_uid = shift_kuid(sb->s_user_ns, sbinfo->userns, attr->ia_uid);
809 -+ newattr.ia_gid = shift_kgid(sb->s_user_ns, sbinfo->userns, attr->ia_gid);
810 -+
811 -+ /*
812 -+ * mode change is for clearing setuid/setgid bits. Allow lower fs
813 -+ * to interpret this in its own way.
814 -+ */
815 -+ if (newattr.ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
816 -+ newattr.ia_valid &= ~ATTR_MODE;
817 -+
818 -+ inode_lock(loweri);
819 -+ oldcred = shiftfs_override_creds(dentry->d_sb);
820 -+ err = notify_change(lowerd, &newattr, NULL);
821 -+ revert_creds(oldcred);
822 -+ inode_unlock(loweri);
823 -+
824 -+ shiftfs_copyattr(loweri, d_inode(dentry));
825 -+
826 -+ return err;
827 -+}
828 -+
829 -+static int shiftfs_getattr(const struct path *path, struct kstat *stat,
830 -+ u32 request_mask, unsigned int query_flags)
831 -+{
832 -+ struct inode *inode = path->dentry->d_inode;
833 -+ struct dentry *lowerd = path->dentry->d_fsdata;
834 -+ struct inode *loweri = lowerd->d_inode;
835 -+ struct shiftfs_super_info *info = path->dentry->d_sb->s_fs_info;
836 -+ struct path newpath = { .mnt = info->mnt, .dentry = lowerd };
837 -+ struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
838 -+ struct user_namespace *to_ns = inode->i_sb->s_user_ns;
839 -+ const struct cred *oldcred;
840 -+ int err;
841 -+
842 -+ oldcred = shiftfs_override_creds(inode->i_sb);
843 -+ err = vfs_getattr(&newpath, stat, request_mask, query_flags);
844 -+ revert_creds(oldcred);
845 -+
846 -+ if (err)
847 -+ return err;
848 -+
849 -+ /* transform the underlying id */
850 -+ stat->uid = shift_kuid(from_ns, to_ns, stat->uid);
851 -+ stat->gid = shift_kgid(from_ns, to_ns, stat->gid);
852 -+ return 0;
853 -+}
854 -+
855 -+#ifdef CONFIG_SHIFT_FS_POSIX_ACL
856 -+
857 -+static int
858 -+shift_acl_ids(struct user_namespace *from, struct user_namespace *to,
859 -+ struct posix_acl *acl)
860 -+{
861 -+ int i;
862 -+
863 -+ for (i = 0; i < acl->a_count; i++) {
864 -+ struct posix_acl_entry *e = &acl->a_entries[i];
865 -+ switch(e->e_tag) {
866 -+ case ACL_USER:
867 -+ e->e_uid = shift_kuid(from, to, e->e_uid);
868 -+ if (!uid_valid(e->e_uid))
869 -+ return -EOVERFLOW;
870 -+ break;
871 -+ case ACL_GROUP:
872 -+ e->e_gid = shift_kgid(from, to, e->e_gid);
873 -+ if (!gid_valid(e->e_gid))
874 -+ return -EOVERFLOW;
875 -+ break;
876 -+ }
877 -+ }
878 -+ return 0;
879 -+}
880 -+
881 -+static void
882 -+shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to,
883 -+ void *value, size_t size)
884 -+{
885 -+ struct posix_acl_xattr_header *header = value;
886 -+ struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
887 -+ int count;
888 -+ kuid_t kuid;
889 -+ kgid_t kgid;
890 -+
891 -+ if (!value)
892 -+ return;
893 -+ if (size < sizeof(struct posix_acl_xattr_header))
894 -+ return;
895 -+ if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
896 -+ return;
897 -+
898 -+ count = posix_acl_xattr_count(size);
899 -+ if (count < 0)
900 -+ return;
901 -+ if (count == 0)
902 -+ return;
903 -+
904 -+ for (end = entry + count; entry != end; entry++) {
905 -+ switch(le16_to_cpu(entry->e_tag)) {
906 -+ case ACL_USER:
907 -+ kuid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
908 -+ kuid = shift_kuid(from, to, kuid);
909 -+ entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid));
910 -+ break;
911 -+ case ACL_GROUP:
912 -+ kgid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
913 -+ kgid = shift_kgid(from, to, kgid);
914 -+ entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, kgid));
915 -+ break;
916 -+ default:
917 -+ break;
918 -+ }
919 -+ }
920 -+}
921 -+
922 -+static struct posix_acl *shiftfs_get_acl(struct inode *inode, int type)
923 -+{
924 -+ struct inode *loweri = inode->i_private;
925 -+ const struct cred *oldcred;
926 -+ struct posix_acl *lower_acl, *acl = NULL;
927 -+ struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
928 -+ struct user_namespace *to_ns = inode->i_sb->s_user_ns;
929 -+ int size;
930 -+ int err;
931 -+
932 -+ if (!IS_POSIXACL(loweri))
933 -+ return NULL;
934 -+
935 -+ oldcred = shiftfs_override_creds(inode->i_sb);
936 -+ lower_acl = get_acl(loweri, type);
937 -+ revert_creds(oldcred);
938 -+
939 -+ if (lower_acl && !IS_ERR(lower_acl)) {
940 -+ /* XXX: export posix_acl_clone? */
941 -+ size = sizeof(struct posix_acl) +
942 -+ lower_acl->a_count * sizeof(struct posix_acl_entry);
943 -+ acl = kmemdup(lower_acl, size, GFP_KERNEL);
944 -+ posix_acl_release(lower_acl);
945 -+
946 -+ if (!acl)
947 -+ return ERR_PTR(-ENOMEM);
948 -+
949 -+ refcount_set(&acl->a_refcount, 1);
950 -+
951 -+ err = shift_acl_ids(from_ns, to_ns, acl);
952 -+ if (err) {
953 -+ kfree(acl);
954 -+ return ERR_PTR(err);
955 -+ }
956 -+ }
957 -+
958 -+ return acl;
959 -+}
960 -+
961 -+static int
962 -+shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler,
963 -+ struct dentry *dentry, struct inode *inode,
964 -+ const char *name, void *buffer, size_t size)
965 -+{
966 -+ struct inode *loweri = inode->i_private;
967 -+ int ret;
968 -+
969 -+ ret = shiftfs_xattr_get(NULL, dentry, inode, handler->name,
970 -+ buffer, size);
971 -+ if (ret < 0)
972 -+ return ret;
973 -+
974 -+ inode_lock(loweri);
975 -+ shift_acl_xattr_ids(loweri->i_sb->s_user_ns, inode->i_sb->s_user_ns,
976 -+ buffer, size);
977 -+ inode_unlock(loweri);
978 -+ return ret;
979 -+}
980 -+
981 -+static int
982 -+shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler,
983 -+ struct dentry *dentry, struct inode *inode,
984 -+ const char *name, const void *value,
985 -+ size_t size, int flags)
986 -+{
987 -+ struct inode *loweri = inode->i_private;
988 -+ int err;
989 -+
990 -+ if (!IS_POSIXACL(loweri) || !loweri->i_op->set_acl)
991 -+ return -EOPNOTSUPP;
992 -+ if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
993 -+ return value ? -EACCES : 0;
994 -+ if (!inode_owner_or_capable(inode))
995 -+ return -EPERM;
996 -+
997 -+ if (value) {
998 -+ shift_acl_xattr_ids(inode->i_sb->s_user_ns,
999 -+ loweri->i_sb->s_user_ns,
1000 -+ (void *)value, size);
1001 -+ err = shiftfs_setxattr(dentry, inode, handler->name, value,
1002 -+ size, flags);
1003 -+ } else {
1004 -+ err = shiftfs_removexattr(dentry, handler->name);
1005 -+ }
1006 -+
1007 -+ if (!err)
1008 -+ shiftfs_copyattr(loweri, inode);
1009 -+
1010 -+ return err;
1011 -+}
1012 -+
1013 -+static const struct xattr_handler
1014 -+shiftfs_posix_acl_access_xattr_handler = {
1015 -+ .name = XATTR_NAME_POSIX_ACL_ACCESS,
1016 -+ .flags = ACL_TYPE_ACCESS,
1017 -+ .get = shiftfs_posix_acl_xattr_get,
1018 -+ .set = shiftfs_posix_acl_xattr_set,
1019 -+};
1020 -+
1021 -+static const struct xattr_handler
1022 -+shiftfs_posix_acl_default_xattr_handler = {
1023 -+ .name = XATTR_NAME_POSIX_ACL_DEFAULT,
1024 -+ .flags = ACL_TYPE_DEFAULT,
1025 -+ .get = shiftfs_posix_acl_xattr_get,
1026 -+ .set = shiftfs_posix_acl_xattr_set,
1027 -+};
1028 -+
1029 -+#else /* !CONFIG_SHIFT_FS_POSIX_ACL */
1030 -+
1031 -+#define shiftfs_get_acl NULL
1032 -+
1033 -+#endif /* CONFIG_SHIFT_FS_POSIX_ACL */
1034 -+
1035 -+static const struct inode_operations shiftfs_dir_inode_operations = {
1036 -+ .lookup = shiftfs_lookup,
1037 -+ .mkdir = shiftfs_mkdir,
1038 -+ .symlink = shiftfs_symlink,
1039 -+ .unlink = shiftfs_unlink,
1040 -+ .rmdir = shiftfs_rmdir,
1041 -+ .rename = shiftfs_rename,
1042 -+ .link = shiftfs_link,
1043 -+ .setattr = shiftfs_setattr,
1044 -+ .create = shiftfs_create,
1045 -+ .mknod = shiftfs_mknod,
1046 -+ .permission = shiftfs_permission,
1047 -+ .getattr = shiftfs_getattr,
1048 -+ .listxattr = shiftfs_listxattr,
1049 -+ .get_acl = shiftfs_get_acl,
1050 -+};
1051 -+
1052 -+static const struct inode_operations shiftfs_file_inode_operations = {
1053 -+ .fiemap = shiftfs_fiemap,
1054 -+ .getattr = shiftfs_getattr,
1055 -+ .get_acl = shiftfs_get_acl,
1056 -+ .listxattr = shiftfs_listxattr,
1057 -+ .permission = shiftfs_permission,
1058 -+ .setattr = shiftfs_setattr,
1059 -+ .tmpfile = shiftfs_tmpfile,
1060 -+};
1061 -+
1062 -+static const struct inode_operations shiftfs_special_inode_operations = {
1063 -+ .getattr = shiftfs_getattr,
1064 -+ .get_acl = shiftfs_get_acl,
1065 -+ .listxattr = shiftfs_listxattr,
1066 -+ .permission = shiftfs_permission,
1067 -+ .setattr = shiftfs_setattr,
1068 -+};
1069 -+
1070 -+static const struct inode_operations shiftfs_symlink_inode_operations = {
1071 -+ .getattr = shiftfs_getattr,
1072 -+ .get_link = shiftfs_get_link,
1073 -+ .listxattr = shiftfs_listxattr,
1074 -+ .setattr = shiftfs_setattr,
1075 -+};
1076 -+
1077 -+static struct file *shiftfs_open_realfile(const struct file *file,
1078 -+ struct inode *realinode)
1079 -+{
1080 -+ struct file *realfile;
1081 -+ const struct cred *old_cred;
1082 -+ struct inode *inode = file_inode(file);
1083 -+ struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1084 -+ struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1085 -+ struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1086 -+
1087 -+ old_cred = shiftfs_override_creds(inode->i_sb);
1088 -+ realfile = open_with_fake_path(&realpath, file->f_flags, realinode,
1089 -+ info->creator_cred);
1090 -+ revert_creds(old_cred);
1091 -+
1092 -+ return realfile;
1093 -+}
1094 -+
1095 -+#define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
1096 -+
1097 -+static int shiftfs_change_flags(struct file *file, unsigned int flags)
1098 -+{
1099 -+ struct inode *inode = file_inode(file);
1100 -+ int err;
1101 -+
1102 -+ /* if some flag changed that cannot be changed then something's amiss */
1103 -+ if (WARN_ON((file->f_flags ^ flags) & ~SHIFTFS_SETFL_MASK))
1104 -+ return -EIO;
1105 -+
1106 -+ flags &= SHIFTFS_SETFL_MASK;
1107 -+
1108 -+ if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
1109 -+ return -EPERM;
1110 -+
1111 -+ if (flags & O_DIRECT) {
1112 -+ if (!file->f_mapping->a_ops ||
1113 -+ !file->f_mapping->a_ops->direct_IO)
1114 -+ return -EINVAL;
1115 -+ }
1116 -+
1117 -+ if (file->f_op->check_flags) {
1118 -+ err = file->f_op->check_flags(flags);
1119 -+ if (err)
1120 -+ return err;
1121 -+ }
1122 -+
1123 -+ spin_lock(&file->f_lock);
1124 -+ file->f_flags = (file->f_flags & ~SHIFTFS_SETFL_MASK) | flags;
1125 -+ spin_unlock(&file->f_lock);
1126 -+
1127 -+ return 0;
1128 -+}
1129 -+
1130 -+static int shiftfs_open(struct inode *inode, struct file *file)
1131 -+{
1132 -+ struct file *realfile;
1133 -+
1134 -+ realfile = shiftfs_open_realfile(file, inode->i_private);
1135 -+ if (IS_ERR(realfile))
1136 -+ return PTR_ERR(realfile);
1137 -+
1138 -+ file->private_data = realfile;
1139 -+ /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */
1140 -+ file->f_mapping = realfile->f_mapping;
1141 -+
1142 -+ return 0;
1143 -+}
1144 -+
1145 -+static int shiftfs_dir_open(struct inode *inode, struct file *file)
1146 -+{
1147 -+ struct file *realfile;
1148 -+ const struct cred *oldcred;
1149 -+ struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1150 -+ struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1151 -+ struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1152 -+
1153 -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1154 -+ realfile = dentry_open(&realpath, file->f_flags | O_NOATIME,
1155 -+ info->creator_cred);
1156 -+ revert_creds(oldcred);
1157 -+ if (IS_ERR(realfile))
1158 -+ return PTR_ERR(realfile);
1159 -+
1160 -+ file->private_data = realfile;
1161 -+
1162 -+ return 0;
1163 -+}
1164 -+
1165 -+static int shiftfs_release(struct inode *inode, struct file *file)
1166 -+{
1167 -+ struct file *realfile = file->private_data;
1168 -+
1169 -+ if (realfile)
1170 -+ fput(realfile);
1171 -+
1172 -+ return 0;
1173 -+}
1174 -+
1175 -+static int shiftfs_dir_release(struct inode *inode, struct file *file)
1176 -+{
1177 -+ return shiftfs_release(inode, file);
1178 -+}
1179 -+
1180 -+static loff_t shiftfs_dir_llseek(struct file *file, loff_t offset, int whence)
1181 -+{
1182 -+ struct file *realfile = file->private_data;
1183 -+
1184 -+ return vfs_llseek(realfile, offset, whence);
1185 -+}
1186 -+
1187 -+static loff_t shiftfs_file_llseek(struct file *file, loff_t offset, int whence)
1188 -+{
1189 -+ struct inode *realinode = file_inode(file)->i_private;
1190 -+
1191 -+ return generic_file_llseek_size(file, offset, whence,
1192 -+ realinode->i_sb->s_maxbytes,
1193 -+ i_size_read(realinode));
1194 -+}
1195 -+
1196 -+/* XXX: Need to figure out what to to about atime updates, maybe other
1197 -+ * timestamps too ... ref. ovl_file_accessed() */
1198 -+
1199 -+static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb)
1200 -+{
1201 -+ int ifl = iocb->ki_flags;
1202 -+ rwf_t flags = 0;
1203 -+
1204 -+ if (ifl & IOCB_NOWAIT)
1205 -+ flags |= RWF_NOWAIT;
1206 -+ if (ifl & IOCB_HIPRI)
1207 -+ flags |= RWF_HIPRI;
1208 -+ if (ifl & IOCB_DSYNC)
1209 -+ flags |= RWF_DSYNC;
1210 -+ if (ifl & IOCB_SYNC)
1211 -+ flags |= RWF_SYNC;
1212 -+
1213 -+ return flags;
1214 -+}
1215 -+
1216 -+static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd)
1217 -+{
1218 -+ struct file *realfile;
1219 -+
1220 -+ if (file->f_op->open != shiftfs_open &&
1221 -+ file->f_op->open != shiftfs_dir_open)
1222 -+ return -EINVAL;
1223 -+
1224 -+ realfile = file->private_data;
1225 -+ lowerfd->flags = 0;
1226 -+ lowerfd->file = realfile;
1227 -+
1228 -+ /* Did the flags change since open? */
1229 -+ if (unlikely(file->f_flags & ~lowerfd->file->f_flags))
1230 -+ return shiftfs_change_flags(lowerfd->file, file->f_flags);
1231 -+
1232 -+ return 0;
1233 -+}
1234 -+
1235 -+static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
1236 -+{
1237 -+ struct file *file = iocb->ki_filp;
1238 -+ struct fd lowerfd;
1239 -+ const struct cred *oldcred;
1240 -+ ssize_t ret;
1241 -+
1242 -+ if (!iov_iter_count(iter))
1243 -+ return 0;
1244 -+
1245 -+ ret = shiftfs_real_fdget(file, &lowerfd);
1246 -+ if (ret)
1247 -+ return ret;
1248 -+
1249 -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1250 -+ ret = vfs_iter_read(lowerfd.file, iter, &iocb->ki_pos,
1251 -+ shiftfs_iocb_to_rwf(iocb));
1252 -+ revert_creds(oldcred);
1253 -+
1254 -+ shiftfs_file_accessed(file);
1255 -+
1256 -+ fdput(lowerfd);
1257 -+ return ret;
1258 -+}
1259 -+
1260 -+static ssize_t shiftfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
1261 -+{
1262 -+ struct file *file = iocb->ki_filp;
1263 -+ struct inode *inode = file_inode(file);
1264 -+ struct fd lowerfd;
1265 -+ const struct cred *oldcred;
1266 -+ ssize_t ret;
1267 -+
1268 -+ if (!iov_iter_count(iter))
1269 -+ return 0;
1270 -+
1271 -+ inode_lock(inode);
1272 -+ /* Update mode */
1273 -+ shiftfs_copyattr(inode->i_private, inode);
1274 -+ ret = file_remove_privs(file);
1275 -+ if (ret)
1276 -+ goto out_unlock;
1277 -+
1278 -+ ret = shiftfs_real_fdget(file, &lowerfd);
1279 -+ if (ret)
1280 -+ goto out_unlock;
1281 -+
1282 -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1283 -+ file_start_write(lowerfd.file);
1284 -+ ret = vfs_iter_write(lowerfd.file, iter, &iocb->ki_pos,
1285 -+ shiftfs_iocb_to_rwf(iocb));
1286 -+ file_end_write(lowerfd.file);
1287 -+ revert_creds(oldcred);
1288 -+
1289 -+ /* Update size */
1290 -+ shiftfs_copyattr(inode->i_private, inode);
1291 -+
1292 -+ fdput(lowerfd);
1293 -+
1294 -+out_unlock:
1295 -+ inode_unlock(inode);
1296 -+ return ret;
1297 -+}
1298 -+
1299 -+static int shiftfs_fsync(struct file *file, loff_t start, loff_t end,
1300 -+ int datasync)
1301 -+{
1302 -+ struct fd lowerfd;
1303 -+ const struct cred *oldcred;
1304 -+ int ret;
1305 -+
1306 -+ ret = shiftfs_real_fdget(file, &lowerfd);
1307 -+ if (ret)
1308 -+ return ret;
1309 -+
1310 -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1311 -+ ret = vfs_fsync_range(lowerfd.file, start, end, datasync);
1312 -+ revert_creds(oldcred);
1313 -+
1314 -+ fdput(lowerfd);
1315 -+ return ret;
1316 -+}
1317 -+
1318 -+static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma)
1319 -+{
1320 -+ struct file *realfile = file->private_data;
1321 -+ const struct cred *oldcred;
1322 -+ int ret;
1323 -+
1324 -+ if (!realfile->f_op->mmap)
1325 -+ return -ENODEV;
1326 -+
1327 -+ if (WARN_ON(file != vma->vm_file))
1328 -+ return -EIO;
1329 -+
1330 -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1331 -+ vma->vm_file = get_file(realfile);
1332 -+ ret = call_mmap(vma->vm_file, vma);
1333 -+ revert_creds(oldcred);
1334 -+
1335 -+ shiftfs_file_accessed(file);
1336 -+
1337 -+ if (ret) {
1338 -+ /*
1339 -+ * Drop refcount from new vm_file value and restore original
1340 -+ * vm_file value
1341 -+ */
1342 -+ vma->vm_file = file;
1343 -+ fput(realfile);
1344 -+ } else {
1345 -+ /* Drop refcount from previous vm_file value */
1346 -+ fput(file);
1347 -+ }
1348 -+
1349 -+ return ret;
1350 -+}
1351 -+
1352 -+static long shiftfs_fallocate(struct file *file, int mode, loff_t offset,
1353 -+ loff_t len)
1354 -+{
1355 -+ struct inode *inode = file_inode(file);
1356 -+ struct inode *loweri = inode->i_private;
1357 -+ struct fd lowerfd;
1358 -+ const struct cred *oldcred;
1359 -+ int ret;
1360 -+
1361 -+ ret = shiftfs_real_fdget(file, &lowerfd);
1362 -+ if (ret)
1363 -+ return ret;
1364 -+
1365 -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1366 -+ ret = vfs_fallocate(lowerfd.file, mode, offset, len);
1367 -+ revert_creds(oldcred);
1368 -+
1369 -+ /* Update size */
1370 -+ shiftfs_copyattr(loweri, inode);
1371 -+
1372 -+ fdput(lowerfd);
1373 -+ return ret;
1374 -+}
1375 -+
1376 -+static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len,
1377 -+ int advice)
1378 -+{
1379 -+ struct fd lowerfd;
1380 -+ const struct cred *oldcred;
1381 -+ int ret;
1382 -+
1383 -+ ret = shiftfs_real_fdget(file, &lowerfd);
1384 -+ if (ret)
1385 -+ return ret;
1386 -+
1387 -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1388 -+ ret = vfs_fadvise(lowerfd.file, offset, len, advice);
1389 -+ revert_creds(oldcred);
1390 -+
1391 -+ fdput(lowerfd);
1392 -+ return ret;
1393 -+}
1394 -+
1395 -+static int shiftfs_override_ioctl_creds(int cmd, const struct super_block *sb,
1396 -+ const struct cred **oldcred,
1397 -+ struct cred **newcred)
1398 -+{
1399 -+ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1400 -+ kuid_t fsuid = current_fsuid();
1401 -+ kgid_t fsgid = current_fsgid();
1402 -+
1403 -+ *oldcred = shiftfs_override_creds(sb);
1404 -+
1405 -+ *newcred = prepare_creds();
1406 -+ if (!*newcred) {
1407 -+ revert_creds(*oldcred);
1408 -+ return -ENOMEM;
1409 -+ }
1410 -+
1411 -+ (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
1412 -+ (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
1413 -+
1414 -+ /* clear all caps to prevent bypassing capable() checks */
1415 -+ cap_clear((*newcred)->cap_bset);
1416 -+ cap_clear((*newcred)->cap_effective);
1417 -+ cap_clear((*newcred)->cap_inheritable);
1418 -+ cap_clear((*newcred)->cap_permitted);
1419 -+
1420 -+ if (cmd == BTRFS_IOC_SNAP_DESTROY) {
1421 -+ kuid_t kuid_root = make_kuid(sb->s_user_ns, 0);
1422 -+ /*
1423 -+ * Allow the root user in the container to remove subvolumes
1424 -+ * from other users.
1425 -+ */
1426 -+ if (uid_valid(kuid_root) && uid_eq(fsuid, kuid_root))
1427 -+ cap_raise((*newcred)->cap_effective, CAP_DAC_OVERRIDE);
1428 -+ }
1429 -+
1430 -+ put_cred(override_creds(*newcred));
1431 -+ return 0;
1432 -+}
1433 -+
1434 -+static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred,
1435 -+ struct cred *newcred)
1436 -+{
1437 -+ return shiftfs_revert_object_creds(oldcred, newcred);
1438 -+}
1439 -+
1440 -+static inline bool is_btrfs_snap_ioctl(int cmd)
1441 -+{
1442 -+ if ((cmd == BTRFS_IOC_SNAP_CREATE) || (cmd == BTRFS_IOC_SNAP_CREATE_V2))
1443 -+ return true;
1444 -+
1445 -+ return false;
1446 -+}
1447 -+
1448 -+static int shiftfs_btrfs_ioctl_fd_restore(int cmd, int fd, void __user *arg,
1449 -+ struct btrfs_ioctl_vol_args *v1,
1450 -+ struct btrfs_ioctl_vol_args_v2 *v2)
1451 -+{
1452 -+ int ret;
1453 -+
1454 -+ if (!is_btrfs_snap_ioctl(cmd))
1455 -+ return 0;
1456 -+
1457 -+ if (cmd == BTRFS_IOC_SNAP_CREATE)
1458 -+ ret = copy_to_user(arg, v1, sizeof(*v1));
1459 -+ else
1460 -+ ret = copy_to_user(arg, v2, sizeof(*v2));
1461 -+
1462 -+ __close_fd(current->files, fd);
1463 -+ kfree(v1);
1464 -+ kfree(v2);
1465 -+
1466 -+ return ret;
1467 -+}
1468 -+
1469 -+static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg,
1470 -+ struct btrfs_ioctl_vol_args **b1,
1471 -+ struct btrfs_ioctl_vol_args_v2 **b2,
1472 -+ int *newfd)
1473 -+{
1474 -+ int oldfd, ret;
1475 -+ struct fd src;
1476 -+ struct fd lfd = {};
1477 -+ struct btrfs_ioctl_vol_args *v1 = NULL;
1478 -+ struct btrfs_ioctl_vol_args_v2 *v2 = NULL;
1479 -+
1480 -+ if (!is_btrfs_snap_ioctl(cmd))
1481 -+ return 0;
1482 -+
1483 -+ if (cmd == BTRFS_IOC_SNAP_CREATE) {
1484 -+ v1 = memdup_user(arg, sizeof(*v1));
1485 -+ if (IS_ERR(v1))
1486 -+ return PTR_ERR(v1);
1487 -+ oldfd = v1->fd;
1488 -+ *b1 = v1;
1489 -+ } else {
1490 -+ v2 = memdup_user(arg, sizeof(*v2));
1491 -+ if (IS_ERR(v2))
1492 -+ return PTR_ERR(v2);
1493 -+ oldfd = v2->fd;
1494 -+ *b2 = v2;
1495 -+ }
1496 -+
1497 -+ src = fdget(oldfd);
1498 -+ if (!src.file)
1499 -+ return -EINVAL;
1500 -+
1501 -+ ret = shiftfs_real_fdget(src.file, &lfd);
1502 -+ if (ret) {
1503 -+ fdput(src);
1504 -+ return ret;
1505 -+ }
1506 -+
1507 -+ /*
1508 -+ * shiftfs_real_fdget() does not take a reference to lfd.file, so
1509 -+ * take a reference here to offset the one which will be put by
1510 -+ * __close_fd(), and make sure that reference is put on fdput(lfd).
1511 -+ */
1512 -+ get_file(lfd.file);
1513 -+ lfd.flags |= FDPUT_FPUT;
1514 -+ fdput(src);
1515 -+
1516 -+ *newfd = get_unused_fd_flags(lfd.file->f_flags);
1517 -+ if (*newfd < 0) {
1518 -+ fdput(lfd);
1519 -+ return *newfd;
1520 -+ }
1521 -+
1522 -+ fd_install(*newfd, lfd.file);
1523 -+
1524 -+ if (cmd == BTRFS_IOC_SNAP_CREATE) {
1525 -+ v1->fd = *newfd;
1526 -+ ret = copy_to_user(arg, v1, sizeof(*v1));
1527 -+ v1->fd = oldfd;
1528 -+ } else {
1529 -+ v2->fd = *newfd;
1530 -+ ret = copy_to_user(arg, v2, sizeof(*v2));
1531 -+ v2->fd = oldfd;
1532 -+ }
1533 -+
1534 -+ if (ret)
1535 -+ shiftfs_btrfs_ioctl_fd_restore(cmd, *newfd, arg, v1, v2);
1536 -+
1537 -+ return ret;
1538 -+}
1539 -+
1540 -+static long shiftfs_real_ioctl(struct file *file, unsigned int cmd,
1541 -+ unsigned long arg)
1542 -+{
1543 -+ struct fd lowerfd;
1544 -+ struct cred *newcred;
1545 -+ const struct cred *oldcred;
1546 -+ int newfd = -EBADF;
1547 -+ long err = 0, ret = 0;
1548 -+ void __user *argp = (void __user *)arg;
1549 -+ struct super_block *sb = file->f_path.dentry->d_sb;
1550 -+ struct btrfs_ioctl_vol_args *btrfs_v1 = NULL;
1551 -+ struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL;
1552 -+
1553 -+ ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2,
1554 -+ &newfd);
1555 -+ if (ret < 0)
1556 -+ return ret;
1557 -+
1558 -+ ret = shiftfs_real_fdget(file, &lowerfd);
1559 -+ if (ret)
1560 -+ goto out_restore;
1561 -+
1562 -+ ret = shiftfs_override_ioctl_creds(cmd, sb, &oldcred, &newcred);
1563 -+ if (ret)
1564 -+ goto out_fdput;
1565 -+
1566 -+ ret = vfs_ioctl(lowerfd.file, cmd, arg);
1567 -+
1568 -+ shiftfs_revert_ioctl_creds(oldcred, newcred);
1569 -+
1570 -+ shiftfs_copyattr(file_inode(lowerfd.file), file_inode(file));
1571 -+ shiftfs_copyflags(file_inode(lowerfd.file), file_inode(file));
1572 -+
1573 -+out_fdput:
1574 -+ fdput(lowerfd);
1575 -+
1576 -+out_restore:
1577 -+ err = shiftfs_btrfs_ioctl_fd_restore(cmd, newfd, argp,
1578 -+ btrfs_v1, btrfs_v2);
1579 -+ if (!ret)
1580 -+ ret = err;
1581 -+
1582 -+ return ret;
1583 -+}
1584 -+
1585 -+static bool in_ioctl_whitelist(int flag, unsigned long arg)
1586 -+{
1587 -+ void __user *argp = (void __user *)arg;
1588 -+ u64 flags = 0;
1589 -+
1590 -+ switch (flag) {
1591 -+ case BTRFS_IOC_FS_INFO:
1592 -+ return true;
1593 -+ case BTRFS_IOC_SNAP_CREATE:
1594 -+ return true;
1595 -+ case BTRFS_IOC_SNAP_CREATE_V2:
1596 -+ return true;
1597 -+ case BTRFS_IOC_SUBVOL_CREATE:
1598 -+ return true;
1599 -+ case BTRFS_IOC_SUBVOL_CREATE_V2:
1600 -+ return true;
1601 -+ case BTRFS_IOC_SUBVOL_GETFLAGS:
1602 -+ return true;
1603 -+ case BTRFS_IOC_SUBVOL_SETFLAGS:
1604 -+ if (copy_from_user(&flags, argp, sizeof(flags)))
1605 -+ return false;
1606 -+
1607 -+ if (flags & ~BTRFS_SUBVOL_RDONLY)
1608 -+ return false;
1609 -+
1610 -+ return true;
1611 -+ case BTRFS_IOC_SNAP_DESTROY:
1612 -+ return true;
1613 -+ }
1614 -+
1615 -+ return false;
1616 -+}
1617 -+
1618 -+static long shiftfs_ioctl(struct file *file, unsigned int cmd,
1619 -+ unsigned long arg)
1620 -+{
1621 -+ switch (cmd) {
1622 -+ case FS_IOC_GETVERSION:
1623 -+ /* fall through */
1624 -+ case FS_IOC_GETFLAGS:
1625 -+ /* fall through */
1626 -+ case FS_IOC_SETFLAGS:
1627 -+ break;
1628 -+ default:
1629 -+ if (!in_ioctl_whitelist(cmd, arg) ||
1630 -+ !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1631 -+ return -ENOTTY;
1632 -+ }
1633 -+
1634 -+ return shiftfs_real_ioctl(file, cmd, arg);
1635 -+}
1636 -+
1637 -+static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd,
1638 -+ unsigned long arg)
1639 -+{
1640 -+ switch (cmd) {
1641 -+ case FS_IOC32_GETVERSION:
1642 -+ /* fall through */
1643 -+ case FS_IOC32_GETFLAGS:
1644 -+ /* fall through */
1645 -+ case FS_IOC32_SETFLAGS:
1646 -+ break;
1647 -+ default:
1648 -+ if (!in_ioctl_whitelist(cmd, arg) ||
1649 -+ !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1650 -+ return -ENOIOCTLCMD;
1651 -+ }
1652 -+
1653 -+ return shiftfs_real_ioctl(file, cmd, arg);
1654 -+}
1655 -+
1656 -+enum shiftfs_copyop {
1657 -+ SHIFTFS_COPY,
1658 -+ SHIFTFS_CLONE,
1659 -+ SHIFTFS_DEDUPE,
1660 -+};
1661 -+
1662 -+static ssize_t shiftfs_copyfile(struct file *file_in, loff_t pos_in,
1663 -+ struct file *file_out, loff_t pos_out, u64 len,
1664 -+ unsigned int flags, enum shiftfs_copyop op)
1665 -+{
1666 -+ ssize_t ret;
1667 -+ struct fd real_in, real_out;
1668 -+ const struct cred *oldcred;
1669 -+ struct inode *inode_out = file_inode(file_out);
1670 -+ struct inode *loweri = inode_out->i_private;
1671 -+
1672 -+ ret = shiftfs_real_fdget(file_out, &real_out);
1673 -+ if (ret)
1674 -+ return ret;
1675 -+
1676 -+ ret = shiftfs_real_fdget(file_in, &real_in);
1677 -+ if (ret) {
1678 -+ fdput(real_out);
1679 -+ return ret;
1680 -+ }
1681 -+
1682 -+ oldcred = shiftfs_override_creds(inode_out->i_sb);
1683 -+ switch (op) {
1684 -+ case SHIFTFS_COPY:
1685 -+ ret = vfs_copy_file_range(real_in.file, pos_in, real_out.file,
1686 -+ pos_out, len, flags);
1687 -+ break;
1688 -+
1689 -+ case SHIFTFS_CLONE:
1690 -+ ret = vfs_clone_file_range(real_in.file, pos_in, real_out.file,
1691 -+ pos_out, len, flags);
1692 -+ break;
1693 -+
1694 -+ case SHIFTFS_DEDUPE:
1695 -+ ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
1696 -+ real_out.file, pos_out, len,
1697 -+ flags);
1698 -+ break;
1699 -+ }
1700 -+ revert_creds(oldcred);
1701 -+
1702 -+ /* Update size */
1703 -+ shiftfs_copyattr(loweri, inode_out);
1704 -+
1705 -+ fdput(real_in);
1706 -+ fdput(real_out);
1707 -+
1708 -+ return ret;
1709 -+}
1710 -+
1711 -+static ssize_t shiftfs_copy_file_range(struct file *file_in, loff_t pos_in,
1712 -+ struct file *file_out, loff_t pos_out,
1713 -+ size_t len, unsigned int flags)
1714 -+{
1715 -+ return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
1716 -+ SHIFTFS_COPY);
1717 -+}
1718 -+
1719 -+static loff_t shiftfs_remap_file_range(struct file *file_in, loff_t pos_in,
1720 -+ struct file *file_out, loff_t pos_out,
1721 -+ loff_t len, unsigned int remap_flags)
1722 -+{
1723 -+ enum shiftfs_copyop op;
1724 -+
1725 -+ if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
1726 -+ return -EINVAL;
1727 -+
1728 -+ if (remap_flags & REMAP_FILE_DEDUP)
1729 -+ op = SHIFTFS_DEDUPE;
1730 -+ else
1731 -+ op = SHIFTFS_CLONE;
1732 -+
1733 -+ return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len,
1734 -+ remap_flags, op);
1735 -+}
1736 -+
1737 -+static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx)
1738 -+{
1739 -+ const struct cred *oldcred;
1740 -+ int err = -ENOTDIR;
1741 -+ struct file *realfile = file->private_data;
1742 -+
1743 -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1744 -+ err = iterate_dir(realfile, ctx);
1745 -+ revert_creds(oldcred);
1746 -+
1747 -+ return err;
1748 -+}
1749 -+
1750 -+const struct file_operations shiftfs_file_operations = {
1751 -+ .open = shiftfs_open,
1752 -+ .release = shiftfs_release,
1753 -+ .llseek = shiftfs_file_llseek,
1754 -+ .read_iter = shiftfs_read_iter,
1755 -+ .write_iter = shiftfs_write_iter,
1756 -+ .fsync = shiftfs_fsync,
1757 -+ .mmap = shiftfs_mmap,
1758 -+ .fallocate = shiftfs_fallocate,
1759 -+ .fadvise = shiftfs_fadvise,
1760 -+ .unlocked_ioctl = shiftfs_ioctl,
1761 -+ .compat_ioctl = shiftfs_compat_ioctl,
1762 -+ .copy_file_range = shiftfs_copy_file_range,
1763 -+ .remap_file_range = shiftfs_remap_file_range,
1764 -+};
1765 -+
1766 -+const struct file_operations shiftfs_dir_operations = {
1767 -+ .open = shiftfs_dir_open,
1768 -+ .release = shiftfs_dir_release,
1769 -+ .compat_ioctl = shiftfs_compat_ioctl,
1770 -+ .fsync = shiftfs_fsync,
1771 -+ .iterate_shared = shiftfs_iterate_shared,
1772 -+ .llseek = shiftfs_dir_llseek,
1773 -+ .read = generic_read_dir,
1774 -+ .unlocked_ioctl = shiftfs_ioctl,
1775 -+};
1776 -+
1777 -+static const struct address_space_operations shiftfs_aops = {
1778 -+ /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
1779 -+ .direct_IO = noop_direct_IO,
1780 -+};
1781 -+
1782 -+static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
1783 -+ umode_t mode, dev_t dev, struct dentry *dentry)
1784 -+{
1785 -+ struct inode *loweri;
1786 -+
1787 -+ inode->i_ino = ino;
1788 -+ inode->i_flags |= S_NOCMTIME;
1789 -+
1790 -+ mode &= S_IFMT;
1791 -+ inode->i_mode = mode;
1792 -+ switch (mode & S_IFMT) {
1793 -+ case S_IFDIR:
1794 -+ inode->i_op = &shiftfs_dir_inode_operations;
1795 -+ inode->i_fop = &shiftfs_dir_operations;
1796 -+ break;
1797 -+ case S_IFLNK:
1798 -+ inode->i_op = &shiftfs_symlink_inode_operations;
1799 -+ break;
1800 -+ case S_IFREG:
1801 -+ inode->i_op = &shiftfs_file_inode_operations;
1802 -+ inode->i_fop = &shiftfs_file_operations;
1803 -+ inode->i_mapping->a_ops = &shiftfs_aops;
1804 -+ break;
1805 -+ default:
1806 -+ inode->i_op = &shiftfs_special_inode_operations;
1807 -+ init_special_inode(inode, mode, dev);
1808 -+ break;
1809 -+ }
1810 -+
1811 -+ if (!dentry)
1812 -+ return;
1813 -+
1814 -+ loweri = dentry->d_inode;
1815 -+ if (!loweri->i_op->get_link)
1816 -+ inode->i_opflags |= IOP_NOFOLLOW;
1817 -+
1818 -+ shiftfs_copyattr(loweri, inode);
1819 -+ shiftfs_copyflags(loweri, inode);
1820 -+ set_nlink(inode, loweri->i_nlink);
1821 -+}
1822 -+
1823 -+static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry)
1824 -+{
1825 -+ struct super_block *sb = dentry->d_sb;
1826 -+ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1827 -+
1828 -+ if (sbinfo->mark)
1829 -+ seq_show_option(m, "mark", NULL);
1830 -+
1831 -+ if (sbinfo->passthrough)
1832 -+ seq_printf(m, ",passthrough=%u", sbinfo->passthrough);
1833 -+
1834 -+ return 0;
1835 -+}
1836 -+
1837 -+static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1838 -+{
1839 -+ struct super_block *sb = dentry->d_sb;
1840 -+ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1841 -+ struct dentry *root = sb->s_root;
1842 -+ struct dentry *realroot = root->d_fsdata;
1843 -+ struct path realpath = { .mnt = sbinfo->mnt, .dentry = realroot };
1844 -+ int err;
1845 -+
1846 -+ err = vfs_statfs(&realpath, buf);
1847 -+ if (err)
1848 -+ return err;
1849 -+
1850 -+ if (!shiftfs_passthrough_statfs(sbinfo))
1851 -+ buf->f_type = sb->s_magic;
1852 -+
1853 -+ return 0;
1854 -+}
1855 -+
1856 -+static void shiftfs_evict_inode(struct inode *inode)
1857 -+{
1858 -+ struct inode *loweri = inode->i_private;
1859 -+
1860 -+ clear_inode(inode);
1861 -+
1862 -+ if (loweri)
1863 -+ iput(loweri);
1864 -+}
1865 -+
1866 -+static void shiftfs_put_super(struct super_block *sb)
1867 -+{
1868 -+ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1869 -+
1870 -+ if (sbinfo) {
1871 -+ mntput(sbinfo->mnt);
1872 -+ put_cred(sbinfo->creator_cred);
1873 -+ kfree(sbinfo);
1874 -+ }
1875 -+}
1876 -+
1877 -+static const struct xattr_handler shiftfs_xattr_handler = {
1878 -+ .prefix = "",
1879 -+ .get = shiftfs_xattr_get,
1880 -+ .set = shiftfs_xattr_set,
1881 -+};
1882 -+
1883 -+const struct xattr_handler *shiftfs_xattr_handlers[] = {
1884 -+#ifdef CONFIG_SHIFT_FS_POSIX_ACL
1885 -+ &shiftfs_posix_acl_access_xattr_handler,
1886 -+ &shiftfs_posix_acl_default_xattr_handler,
1887 -+#endif
1888 -+ &shiftfs_xattr_handler,
1889 -+ NULL
1890 -+};
1891 -+
1892 -+static inline bool passthrough_is_subset(int old_flags, int new_flags)
1893 -+{
1894 -+ if ((new_flags & old_flags) != new_flags)
1895 -+ return false;
1896 -+
1897 -+ return true;
1898 -+}
1899 -+
1900 -+static int shiftfs_super_check_flags(unsigned long old_flags,
1901 -+ unsigned long new_flags)
1902 -+{
1903 -+ if ((old_flags & SB_RDONLY) && !(new_flags & SB_RDONLY))
1904 -+ return -EPERM;
1905 -+
1906 -+ if ((old_flags & SB_NOSUID) && !(new_flags & SB_NOSUID))
1907 -+ return -EPERM;
1908 -+
1909 -+ if ((old_flags & SB_NODEV) && !(new_flags & SB_NODEV))
1910 -+ return -EPERM;
1911 -+
1912 -+ if ((old_flags & SB_NOEXEC) && !(new_flags & SB_NOEXEC))
1913 -+ return -EPERM;
1914 -+
1915 -+ if ((old_flags & SB_NOATIME) && !(new_flags & SB_NOATIME))
1916 -+ return -EPERM;
1917 -+
1918 -+ if ((old_flags & SB_NODIRATIME) && !(new_flags & SB_NODIRATIME))
1919 -+ return -EPERM;
1920 -+
1921 -+ if (!(old_flags & SB_POSIXACL) && (new_flags & SB_POSIXACL))
1922 -+ return -EPERM;
1923 -+
1924 -+ return 0;
1925 -+}
1926 -+
1927 -+static int shiftfs_remount(struct super_block *sb, int *flags, char *data)
1928 -+{
1929 -+ int err;
1930 -+ struct shiftfs_super_info new = {};
1931 -+ struct shiftfs_super_info *info = sb->s_fs_info;
1932 -+
1933 -+ err = shiftfs_parse_mount_options(&new, data);
1934 -+ if (err)
1935 -+ return err;
1936 -+
1937 -+ err = shiftfs_super_check_flags(sb->s_flags, *flags);
1938 -+ if (err)
1939 -+ return err;
1940 -+
1941 -+ /* Mark mount option cannot be changed. */
1942 -+ if (info->mark || (info->mark != new.mark))
1943 -+ return -EPERM;
1944 -+
1945 -+ if (info->passthrough != new.passthrough) {
1946 -+ /* Don't allow exceeding passthrough options of mark mount. */
1947 -+ if (!passthrough_is_subset(info->passthrough_mark,
1948 -+ info->passthrough))
1949 -+ return -EPERM;
1950 -+
1951 -+ info->passthrough = new.passthrough;
1952 -+ }
1953 -+
1954 -+ return 0;
1955 -+}
1956 -+
1957 -+static const struct super_operations shiftfs_super_ops = {
1958 -+ .put_super = shiftfs_put_super,
1959 -+ .show_options = shiftfs_show_options,
1960 -+ .statfs = shiftfs_statfs,
1961 -+ .remount_fs = shiftfs_remount,
1962 -+ .evict_inode = shiftfs_evict_inode,
1963 -+};
1964 -+
1965 -+struct shiftfs_data {
1966 -+ void *data;
1967 -+ const char *path;
1968 -+};
1969 -+
1970 -+static void shiftfs_super_force_flags(struct super_block *sb,
1971 -+ unsigned long lower_flags)
1972 -+{
1973 -+ sb->s_flags |= lower_flags & (SB_RDONLY | SB_NOSUID | SB_NODEV |
1974 -+ SB_NOEXEC | SB_NOATIME | SB_NODIRATIME);
1975 -+
1976 -+ if (!(lower_flags & SB_POSIXACL))
1977 -+ sb->s_flags &= ~SB_POSIXACL;
1978 -+}
1979 -+
1980 -+static int shiftfs_fill_super(struct super_block *sb, void *raw_data,
1981 -+ int silent)
1982 -+{
1983 -+ int err;
1984 -+ struct path path = {};
1985 -+ struct shiftfs_super_info *sbinfo_mp;
1986 -+ char *name = NULL;
1987 -+ struct inode *inode = NULL;
1988 -+ struct dentry *dentry = NULL;
1989 -+ struct shiftfs_data *data = raw_data;
1990 -+ struct shiftfs_super_info *sbinfo = NULL;
1991 -+
1992 -+ if (!data->path)
1993 -+ return -EINVAL;
1994 -+
1995 -+ sb->s_fs_info = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
1996 -+ if (!sb->s_fs_info)
1997 -+ return -ENOMEM;
1998 -+ sbinfo = sb->s_fs_info;
1999 -+
2000 -+ err = shiftfs_parse_mount_options(sbinfo, data->data);
2001 -+ if (err)
2002 -+ return err;
2003 -+
2004 -+ /* to mount a mark, must be userns admin */
2005 -+ if (!sbinfo->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
2006 -+ return -EPERM;
2007 -+
2008 -+ name = kstrdup(data->path, GFP_KERNEL);
2009 -+ if (!name)
2010 -+ return -ENOMEM;
2011 -+
2012 -+ err = kern_path(name, LOOKUP_FOLLOW, &path);
2013 -+ if (err)
2014 -+ goto out_free_name;
2015 -+
2016 -+ if (!S_ISDIR(path.dentry->d_inode->i_mode)) {
2017 -+ err = -ENOTDIR;
2018 -+ goto out_put_path;
2019 -+ }
2020 -+
2021 -+ sb->s_flags |= SB_POSIXACL;
2022 -+
2023 -+ if (sbinfo->mark) {
2024 -+ struct cred *cred_tmp;
2025 -+ struct super_block *lower_sb = path.mnt->mnt_sb;
2026 -+
2027 -+ /* to mark a mount point, must root wrt lower s_user_ns */
2028 -+ if (!ns_capable(lower_sb->s_user_ns, CAP_SYS_ADMIN)) {
2029 -+ err = -EPERM;
2030 -+ goto out_put_path;
2031 -+ }
2032 -+
2033 -+ /*
2034 -+ * this part is visible unshifted, so make sure no
2035 -+ * executables that could be used to give suid
2036 -+ * privileges
2037 -+ */
2038 -+ sb->s_iflags = SB_I_NOEXEC;
2039 -+
2040 -+ shiftfs_super_force_flags(sb, lower_sb->s_flags);
2041 -+
2042 -+ /*
2043 -+ * Handle nesting of shiftfs mounts by referring this mark
2044 -+ * mount back to the original mark mount. This is more
2045 -+ * efficient and alleviates concerns about stack depth.
2046 -+ */
2047 -+ if (lower_sb->s_magic == SHIFTFS_MAGIC) {
2048 -+ sbinfo_mp = lower_sb->s_fs_info;
2049 -+
2050 -+ /* Doesn't make sense to mark a mark mount */
2051 -+ if (sbinfo_mp->mark) {
2052 -+ err = -EINVAL;
2053 -+ goto out_put_path;
2054 -+ }
2055 -+
2056 -+ if (!passthrough_is_subset(sbinfo_mp->passthrough,
2057 -+ sbinfo->passthrough)) {
2058 -+ err = -EPERM;
2059 -+ goto out_put_path;
2060 -+ }
2061 -+
2062 -+ sbinfo->mnt = mntget(sbinfo_mp->mnt);
2063 -+ dentry = dget(path.dentry->d_fsdata);
2064 -+ /*
2065 -+ * Copy up the passthrough mount options from the
2066 -+ * parent mark mountpoint.
2067 -+ */
2068 -+ sbinfo->passthrough_mark = sbinfo_mp->passthrough_mark;
2069 -+ sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
2070 -+ } else {
2071 -+ sbinfo->mnt = mntget(path.mnt);
2072 -+ dentry = dget(path.dentry);
2073 -+ /*
2074 -+ * For a new mark passthrough_mark and passthrough
2075 -+ * are identical.
2076 -+ */
2077 -+ sbinfo->passthrough_mark = sbinfo->passthrough;
2078 -+
2079 -+ cred_tmp = prepare_creds();
2080 -+ if (!cred_tmp) {
2081 -+ err = -ENOMEM;
2082 -+ goto out_put_path;
2083 -+ }
2084 -+ /* Don't override disk quota limits or use reserved space. */
2085 -+ cap_lower(cred_tmp->cap_effective, CAP_SYS_RESOURCE);
2086 -+ sbinfo->creator_cred = cred_tmp;
2087 -+ }
2088 -+ } else {
2089 -+ /*
2090 -+ * This leg executes if we're admin capable in the namespace,
2091 -+ * so be very careful.
2092 -+ */
2093 -+ err = -EPERM;
2094 -+ if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC)
2095 -+ goto out_put_path;
2096 -+
2097 -+ sbinfo_mp = path.dentry->d_sb->s_fs_info;
2098 -+ if (!sbinfo_mp->mark)
2099 -+ goto out_put_path;
2100 -+
2101 -+ if (!passthrough_is_subset(sbinfo_mp->passthrough,
2102 -+ sbinfo->passthrough))
2103 -+ goto out_put_path;
2104 -+
2105 -+ sbinfo->mnt = mntget(sbinfo_mp->mnt);
2106 -+ sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
2107 -+ dentry = dget(path.dentry->d_fsdata);
2108 -+ /*
2109 -+ * Copy up passthrough settings from mark mountpoint so we can
2110 -+ * verify when the overlay wants to remount with different
2111 -+ * passthrough settings.
2112 -+ */
2113 -+ sbinfo->passthrough_mark = sbinfo_mp->passthrough;
2114 -+ shiftfs_super_force_flags(sb, path.mnt->mnt_sb->s_flags);
2115 -+ }
2116 -+
2117 -+ sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1;
2118 -+ if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
2119 -+ printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n");
2120 -+ err = -EINVAL;
2121 -+ goto out_put_path;
2122 -+ }
2123 -+
2124 -+ inode = new_inode(sb);
2125 -+ if (!inode) {
2126 -+ err = -ENOMEM;
2127 -+ goto out_put_path;
2128 -+ }
2129 -+ shiftfs_fill_inode(inode, dentry->d_inode->i_ino, S_IFDIR, 0, dentry);
2130 -+
2131 -+ ihold(dentry->d_inode);
2132 -+ inode->i_private = dentry->d_inode;
2133 -+
2134 -+ sb->s_magic = SHIFTFS_MAGIC;
2135 -+ sb->s_maxbytes = MAX_LFS_FILESIZE;
2136 -+ sb->s_op = &shiftfs_super_ops;
2137 -+ sb->s_xattr = shiftfs_xattr_handlers;
2138 -+ sb->s_d_op = &shiftfs_dentry_ops;
2139 -+ sb->s_root = d_make_root(inode);
2140 -+ if (!sb->s_root) {
2141 -+ err = -ENOMEM;
2142 -+ goto out_put_path;
2143 -+ }
2144 -+
2145 -+ sb->s_root->d_fsdata = dentry;
2146 -+ sbinfo->userns = get_user_ns(dentry->d_sb->s_user_ns);
2147 -+ shiftfs_copyattr(dentry->d_inode, sb->s_root->d_inode);
2148 -+
2149 -+ dentry = NULL;
2150 -+ err = 0;
2151 -+
2152 -+out_put_path:
2153 -+ path_put(&path);
2154 -+
2155 -+out_free_name:
2156 -+ kfree(name);
2157 -+
2158 -+ dput(dentry);
2159 -+
2160 -+ return err;
2161 -+}
2162 -+
2163 -+static struct dentry *shiftfs_mount(struct file_system_type *fs_type,
2164 -+ int flags, const char *dev_name, void *data)
2165 -+{
2166 -+ struct shiftfs_data d = { data, dev_name };
2167 -+
2168 -+ return mount_nodev(fs_type, flags, &d, shiftfs_fill_super);
2169 -+}
2170 -+
2171 -+static struct file_system_type shiftfs_type = {
2172 -+ .owner = THIS_MODULE,
2173 -+ .name = "shiftfs",
2174 -+ .mount = shiftfs_mount,
2175 -+ .kill_sb = kill_anon_super,
2176 -+ .fs_flags = FS_USERNS_MOUNT,
2177 -+};
2178 -+
2179 -+static int __init shiftfs_init(void)
2180 -+{
2181 -+ return register_filesystem(&shiftfs_type);
2182 -+}
2183 -+
2184 -+static void __exit shiftfs_exit(void)
2185 -+{
2186 -+ unregister_filesystem(&shiftfs_type);
2187 -+}
2188 -+
2189 -+MODULE_ALIAS_FS("shiftfs");
2190 -+MODULE_AUTHOR("James Bottomley");
2191 -+MODULE_AUTHOR("Seth Forshee <seth.forshee@×××××××××.com>");
2192 -+MODULE_AUTHOR("Christian Brauner <christian.brauner@××××××.com>");
2193 -+MODULE_DESCRIPTION("id shifting filesystem");
2194 -+MODULE_LICENSE("GPL v2");
2195 -+module_init(shiftfs_init)
2196 -+module_exit(shiftfs_exit)
2197 ---- a/include/uapi/linux/magic.h 2021-01-06 19:08:45.234777659 -0500
2198 -+++ b/include/uapi/linux/magic.h 2021-01-06 19:09:53.900375394 -0500
2199 -@@ -96,4 +96,6 @@
2200 - #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */
2201 - #define Z3FOLD_MAGIC 0x33
2202 -
2203 -+#define SHIFTFS_MAGIC 0x6a656a62
2204 -+
2205 - #endif /* __LINUX_MAGIC_H__ */
2206 ---- a/fs/Makefile 2021-01-06 19:10:56.009918778 -0500
2207 -+++ b/fs/Makefile 2021-01-06 19:11:55.632442564 -0500
2208 -@@ -132,3 +132,4 @@ obj-$(CONFIG_CEPH_FS) += ceph/
2209 - obj-$(CONFIG_PSTORE) += pstore/
2210 - obj-$(CONFIG_EFIVAR_FS) += efivarfs/
2211 - obj-$(CONFIG_EROFS_FS) += erofs/
2212 -+obj-$(CONFIG_SHIFT_FS) += shiftfs.o
2213 ---- a/fs/Kconfig 2021-01-06 19:14:17.709697891 -0500
2214 -+++ b/fs/Kconfig 2021-01-06 19:15:23.413281282 -0500
2215 -@@ -122,6 +122,24 @@ source "fs/autofs/Kconfig"
2216 - source "fs/fuse/Kconfig"
2217 - source "fs/overlayfs/Kconfig"
2218 -
2219 -+config SHIFT_FS
2220 -+ tristate "UID/GID shifting overlay filesystem for containers"
2221 -+ help
2222 -+ This filesystem can overlay any mounted filesystem and shift
2223 -+ the uid/gid the files appear at. The idea is that
2224 -+ unprivileged containers can use this to mount root volumes
2225 -+ using this technique.
2226 -+
2227 -+config SHIFT_FS_POSIX_ACL
2228 -+ bool "shiftfs POSIX Access Control Lists"
2229 -+ depends on SHIFT_FS
2230 -+ select FS_POSIX_ACL
2231 -+ help
2232 -+ POSIX Access Control Lists (ACLs) support permissions for users and
2233 -+ groups beyond the owner/group/world scheme.
2234 -+
2235 -+ If you don't know what Access Control Lists are, say N.
2236 -+
2237 - menu "Caches"
2238 -
2239 - source "fs/fscache/Kconfig"