Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:5.4 commit in: /
Date: Fri, 08 Jan 2021 16:08:35
Message-Id: 1610122080.b5f2b18d6b9ad5b8f3b963ceb25887cdb567101a.mpagano@gentoo
1 commit: b5f2b18d6b9ad5b8f3b963ceb25887cdb567101a
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Fri Jan 8 16:08:00 2021 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Fri Jan 8 16:08:00 2021 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=b5f2b18d
7
8 Add support for shiftfs
9
10 UID/GID shifting overlay filesystem for containers
11
12 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>
13
14 0000_README | 4 +
15 5000_shifts-ubuntu-20.04.patch | 2202 ++++++++++++++++++++++++++++++++++++++++
16 2 files changed, 2206 insertions(+)
17
18 diff --git a/0000_README b/0000_README
19 index 25a8827..b2d5482 100644
20 --- a/0000_README
21 +++ b/0000_README
22 @@ -419,6 +419,10 @@ Patch: 4567_distro-Gentoo-Kconfig.patch
23 From: Tom Wijsman <TomWij@g.o>
24 Desc: Add Gentoo Linux support config settings and defaults.
25
26 +Patch: 5000_shifts-ubuntu-20.04.patch
27 +From: https://git.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux/+git/focal
28 +Desc: UID/GID shifting overlay filesystem for containers
29 +
30 Patch: 5011_enable-cpu-optimizations-for-gcc8.patch
31 From: https://github.com/graysky2/kernel_gcc_patch/
32 Desc: Kernel patch for >= gccv8 enables kernel >= v4.13 optimizations for additional CPUs.
33
34 diff --git a/5000_shifts-ubuntu-20.04.patch b/5000_shifts-ubuntu-20.04.patch
35 new file mode 100644
36 index 0000000..f213d93
37 --- /dev/null
38 +++ b/5000_shifts-ubuntu-20.04.patch
39 @@ -0,0 +1,2202 @@
40 +--- /dev/null 2021-01-06 15:31:07.232620794 -0500
41 ++++ b/fs/shiftfs.c 2021-01-06 19:04:01.754354287 -0500
42 +@@ -0,0 +1,2156 @@
43 ++#include <linux/btrfs.h>
44 ++#include <linux/capability.h>
45 ++#include <linux/cred.h>
46 ++#include <linux/mount.h>
47 ++#include <linux/fdtable.h>
48 ++#include <linux/file.h>
49 ++#include <linux/fs.h>
50 ++#include <linux/namei.h>
51 ++#include <linux/module.h>
52 ++#include <linux/kernel.h>
53 ++#include <linux/magic.h>
54 ++#include <linux/parser.h>
55 ++#include <linux/security.h>
56 ++#include <linux/seq_file.h>
57 ++#include <linux/statfs.h>
58 ++#include <linux/slab.h>
59 ++#include <linux/user_namespace.h>
60 ++#include <linux/uidgid.h>
61 ++#include <linux/xattr.h>
62 ++#include <linux/posix_acl.h>
63 ++#include <linux/posix_acl_xattr.h>
64 ++#include <linux/uio.h>
65 ++
66 ++struct shiftfs_super_info {
67 ++ struct vfsmount *mnt;
68 ++ struct user_namespace *userns;
69 ++ /* creds of process who created the super block */
70 ++ const struct cred *creator_cred;
71 ++ bool mark;
72 ++ unsigned int passthrough;
73 ++ unsigned int passthrough_mark;
74 ++};
75 ++
76 ++static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
77 ++ umode_t mode, dev_t dev, struct dentry *dentry);
78 ++
79 ++#define SHIFTFS_PASSTHROUGH_NONE 0
80 ++#define SHIFTFS_PASSTHROUGH_STAT 1
81 ++#define SHIFTFS_PASSTHROUGH_IOCTL 2
82 ++#define SHIFTFS_PASSTHROUGH_ALL \
83 ++ (SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL)
84 ++
85 ++static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info)
86 ++{
87 ++ if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL))
88 ++ return false;
89 ++
90 ++ return true;
91 ++}
92 ++
93 ++static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info)
94 ++{
95 ++ if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT))
96 ++ return false;
97 ++
98 ++ return true;
99 ++}
100 ++
101 ++enum {
102 ++ OPT_MARK,
103 ++ OPT_PASSTHROUGH,
104 ++ OPT_LAST,
105 ++};
106 ++
107 ++/* global filesystem options */
108 ++static const match_table_t tokens = {
109 ++ { OPT_MARK, "mark" },
110 ++ { OPT_PASSTHROUGH, "passthrough=%u" },
111 ++ { OPT_LAST, NULL }
112 ++};
113 ++
114 ++static const struct cred *shiftfs_override_creds(const struct super_block *sb)
115 ++{
116 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
117 ++
118 ++ return override_creds(sbinfo->creator_cred);
119 ++}
120 ++
121 ++static inline void shiftfs_revert_object_creds(const struct cred *oldcred,
122 ++ struct cred *newcred)
123 ++{
124 ++ revert_creds(oldcred);
125 ++ put_cred(newcred);
126 ++}
127 ++
128 ++static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to,
129 ++ kuid_t kuid)
130 ++{
131 ++ uid_t uid = from_kuid(from, kuid);
132 ++ return make_kuid(to, uid);
133 ++}
134 ++
135 ++static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to,
136 ++ kgid_t kgid)
137 ++{
138 ++ gid_t gid = from_kgid(from, kgid);
139 ++ return make_kgid(to, gid);
140 ++}
141 ++
142 ++static int shiftfs_override_object_creds(const struct super_block *sb,
143 ++ const struct cred **oldcred,
144 ++ struct cred **newcred,
145 ++ struct dentry *dentry, umode_t mode,
146 ++ bool hardlink)
147 ++{
148 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
149 ++ kuid_t fsuid = current_fsuid();
150 ++ kgid_t fsgid = current_fsgid();
151 ++
152 ++ *oldcred = shiftfs_override_creds(sb);
153 ++
154 ++ *newcred = prepare_creds();
155 ++ if (!*newcred) {
156 ++ revert_creds(*oldcred);
157 ++ return -ENOMEM;
158 ++ }
159 ++
160 ++ (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
161 ++ (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
162 ++
163 ++ if (!hardlink) {
164 ++ int err = security_dentry_create_files_as(dentry, mode,
165 ++ &dentry->d_name,
166 ++ *oldcred, *newcred);
167 ++ if (err) {
168 ++ shiftfs_revert_object_creds(*oldcred, *newcred);
169 ++ return err;
170 ++ }
171 ++ }
172 ++
173 ++ put_cred(override_creds(*newcred));
174 ++ return 0;
175 ++}
176 ++
177 ++static void shiftfs_copyattr(struct inode *from, struct inode *to)
178 ++{
179 ++ struct user_namespace *from_ns = from->i_sb->s_user_ns;
180 ++ struct user_namespace *to_ns = to->i_sb->s_user_ns;
181 ++
182 ++ to->i_uid = shift_kuid(from_ns, to_ns, from->i_uid);
183 ++ to->i_gid = shift_kgid(from_ns, to_ns, from->i_gid);
184 ++ to->i_mode = from->i_mode;
185 ++ to->i_atime = from->i_atime;
186 ++ to->i_mtime = from->i_mtime;
187 ++ to->i_ctime = from->i_ctime;
188 ++ i_size_write(to, i_size_read(from));
189 ++}
190 ++
191 ++static void shiftfs_copyflags(struct inode *from, struct inode *to)
192 ++{
193 ++ unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
194 ++
195 ++ inode_set_flags(to, from->i_flags & mask, mask);
196 ++}
197 ++
198 ++static void shiftfs_file_accessed(struct file *file)
199 ++{
200 ++ struct inode *upperi, *loweri;
201 ++
202 ++ if (file->f_flags & O_NOATIME)
203 ++ return;
204 ++
205 ++ upperi = file_inode(file);
206 ++ loweri = upperi->i_private;
207 ++
208 ++ if (!loweri)
209 ++ return;
210 ++
211 ++ upperi->i_mtime = loweri->i_mtime;
212 ++ upperi->i_ctime = loweri->i_ctime;
213 ++
214 ++ touch_atime(&file->f_path);
215 ++}
216 ++
217 ++static int shiftfs_parse_mount_options(struct shiftfs_super_info *sbinfo,
218 ++ char *options)
219 ++{
220 ++ char *p;
221 ++ substring_t args[MAX_OPT_ARGS];
222 ++
223 ++ sbinfo->mark = false;
224 ++ sbinfo->passthrough = 0;
225 ++
226 ++ while ((p = strsep(&options, ",")) != NULL) {
227 ++ int err, intarg, token;
228 ++
229 ++ if (!*p)
230 ++ continue;
231 ++
232 ++ token = match_token(p, tokens, args);
233 ++ switch (token) {
234 ++ case OPT_MARK:
235 ++ sbinfo->mark = true;
236 ++ break;
237 ++ case OPT_PASSTHROUGH:
238 ++ err = match_int(&args[0], &intarg);
239 ++ if (err)
240 ++ return err;
241 ++
242 ++ if (intarg & ~SHIFTFS_PASSTHROUGH_ALL)
243 ++ return -EINVAL;
244 ++
245 ++ sbinfo->passthrough = intarg;
246 ++ break;
247 ++ default:
248 ++ return -EINVAL;
249 ++ }
250 ++ }
251 ++
252 ++ return 0;
253 ++}
254 ++
255 ++static void shiftfs_d_release(struct dentry *dentry)
256 ++{
257 ++ struct dentry *lowerd = dentry->d_fsdata;
258 ++
259 ++ if (lowerd)
260 ++ dput(lowerd);
261 ++}
262 ++
263 ++static struct dentry *shiftfs_d_real(struct dentry *dentry,
264 ++ const struct inode *inode)
265 ++{
266 ++ struct dentry *lowerd = dentry->d_fsdata;
267 ++
268 ++ if (inode && d_inode(dentry) == inode)
269 ++ return dentry;
270 ++
271 ++ lowerd = d_real(lowerd, inode);
272 ++ if (lowerd && (!inode || inode == d_inode(lowerd)))
273 ++ return lowerd;
274 ++
275 ++ WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
276 ++ inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
277 ++ return dentry;
278 ++}
279 ++
280 ++static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags)
281 ++{
282 ++ int err = 1;
283 ++ struct dentry *lowerd = dentry->d_fsdata;
284 ++
285 ++ if (d_is_negative(lowerd) != d_is_negative(dentry))
286 ++ return 0;
287 ++
288 ++ if ((lowerd->d_flags & DCACHE_OP_WEAK_REVALIDATE))
289 ++ err = lowerd->d_op->d_weak_revalidate(lowerd, flags);
290 ++
291 ++ if (d_really_is_positive(dentry)) {
292 ++ struct inode *inode = d_inode(dentry);
293 ++ struct inode *loweri = d_inode(lowerd);
294 ++
295 ++ shiftfs_copyattr(loweri, inode);
296 ++ }
297 ++
298 ++ return err;
299 ++}
300 ++
301 ++static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags)
302 ++{
303 ++ int err = 1;
304 ++ struct dentry *lowerd = dentry->d_fsdata;
305 ++
306 ++ if (d_unhashed(lowerd) ||
307 ++ ((d_is_negative(lowerd) != d_is_negative(dentry))))
308 ++ return 0;
309 ++
310 ++ if (flags & LOOKUP_RCU)
311 ++ return -ECHILD;
312 ++
313 ++ if ((lowerd->d_flags & DCACHE_OP_REVALIDATE))
314 ++ err = lowerd->d_op->d_revalidate(lowerd, flags);
315 ++
316 ++ if (d_really_is_positive(dentry)) {
317 ++ struct inode *inode = d_inode(dentry);
318 ++ struct inode *loweri = d_inode(lowerd);
319 ++
320 ++ shiftfs_copyattr(loweri, inode);
321 ++ }
322 ++
323 ++ return err;
324 ++}
325 ++
326 ++static const struct dentry_operations shiftfs_dentry_ops = {
327 ++ .d_release = shiftfs_d_release,
328 ++ .d_real = shiftfs_d_real,
329 ++ .d_revalidate = shiftfs_d_revalidate,
330 ++ .d_weak_revalidate = shiftfs_d_weak_revalidate,
331 ++};
332 ++
333 ++static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode,
334 ++ struct delayed_call *done)
335 ++{
336 ++ const char *p;
337 ++ const struct cred *oldcred;
338 ++ struct dentry *lowerd;
339 ++
340 ++ /* RCU lookup not supported */
341 ++ if (!dentry)
342 ++ return ERR_PTR(-ECHILD);
343 ++
344 ++ lowerd = dentry->d_fsdata;
345 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
346 ++ p = vfs_get_link(lowerd, done);
347 ++ revert_creds(oldcred);
348 ++
349 ++ return p;
350 ++}
351 ++
352 ++static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode,
353 ++ const char *name, const void *value,
354 ++ size_t size, int flags)
355 ++{
356 ++ struct dentry *lowerd = dentry->d_fsdata;
357 ++ int err;
358 ++ const struct cred *oldcred;
359 ++
360 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
361 ++ err = vfs_setxattr(lowerd, name, value, size, flags);
362 ++ revert_creds(oldcred);
363 ++
364 ++ shiftfs_copyattr(lowerd->d_inode, inode);
365 ++
366 ++ return err;
367 ++}
368 ++
369 ++static int shiftfs_xattr_get(const struct xattr_handler *handler,
370 ++ struct dentry *dentry, struct inode *inode,
371 ++ const char *name, void *value, size_t size)
372 ++{
373 ++ struct dentry *lowerd = dentry->d_fsdata;
374 ++ int err;
375 ++ const struct cred *oldcred;
376 ++
377 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
378 ++ err = vfs_getxattr(lowerd, name, value, size);
379 ++ revert_creds(oldcred);
380 ++
381 ++ return err;
382 ++}
383 ++
384 ++static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list,
385 ++ size_t size)
386 ++{
387 ++ struct dentry *lowerd = dentry->d_fsdata;
388 ++ int err;
389 ++ const struct cred *oldcred;
390 ++
391 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
392 ++ err = vfs_listxattr(lowerd, list, size);
393 ++ revert_creds(oldcred);
394 ++
395 ++ return err;
396 ++}
397 ++
398 ++static int shiftfs_removexattr(struct dentry *dentry, const char *name)
399 ++{
400 ++ struct dentry *lowerd = dentry->d_fsdata;
401 ++ int err;
402 ++ const struct cred *oldcred;
403 ++
404 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
405 ++ err = vfs_removexattr(lowerd, name);
406 ++ revert_creds(oldcred);
407 ++
408 ++ /* update c/mtime */
409 ++ shiftfs_copyattr(lowerd->d_inode, d_inode(dentry));
410 ++
411 ++ return err;
412 ++}
413 ++
414 ++static int shiftfs_xattr_set(const struct xattr_handler *handler,
415 ++ struct dentry *dentry, struct inode *inode,
416 ++ const char *name, const void *value, size_t size,
417 ++ int flags)
418 ++{
419 ++ if (!value)
420 ++ return shiftfs_removexattr(dentry, name);
421 ++ return shiftfs_setxattr(dentry, inode, name, value, size, flags);
422 ++}
423 ++
424 ++static int shiftfs_inode_test(struct inode *inode, void *data)
425 ++{
426 ++ return inode->i_private == data;
427 ++}
428 ++
429 ++static int shiftfs_inode_set(struct inode *inode, void *data)
430 ++{
431 ++ inode->i_private = data;
432 ++ return 0;
433 ++}
434 ++
435 ++static int shiftfs_create_object(struct inode *diri, struct dentry *dentry,
436 ++ umode_t mode, const char *symlink,
437 ++ struct dentry *hardlink, bool excl)
438 ++{
439 ++ int err;
440 ++ const struct cred *oldcred;
441 ++ struct cred *newcred;
442 ++ void *loweri_iop_ptr = NULL;
443 ++ umode_t modei = mode;
444 ++ struct super_block *dir_sb = diri->i_sb;
445 ++ struct dentry *lowerd_new = dentry->d_fsdata;
446 ++ struct inode *inode = NULL, *loweri_dir = diri->i_private;
447 ++ const struct inode_operations *loweri_dir_iop = loweri_dir->i_op;
448 ++ struct dentry *lowerd_link = NULL;
449 ++
450 ++ if (hardlink) {
451 ++ loweri_iop_ptr = loweri_dir_iop->link;
452 ++ } else {
453 ++ switch (mode & S_IFMT) {
454 ++ case S_IFDIR:
455 ++ loweri_iop_ptr = loweri_dir_iop->mkdir;
456 ++ break;
457 ++ case S_IFREG:
458 ++ loweri_iop_ptr = loweri_dir_iop->create;
459 ++ break;
460 ++ case S_IFLNK:
461 ++ loweri_iop_ptr = loweri_dir_iop->symlink;
462 ++ break;
463 ++ case S_IFSOCK:
464 ++ /* fall through */
465 ++ case S_IFIFO:
466 ++ loweri_iop_ptr = loweri_dir_iop->mknod;
467 ++ break;
468 ++ }
469 ++ }
470 ++ if (!loweri_iop_ptr) {
471 ++ err = -EINVAL;
472 ++ goto out_iput;
473 ++ }
474 ++
475 ++ inode_lock_nested(loweri_dir, I_MUTEX_PARENT);
476 ++
477 ++ if (!hardlink) {
478 ++ inode = new_inode(dir_sb);
479 ++ if (!inode) {
480 ++ err = -ENOMEM;
481 ++ goto out_iput;
482 ++ }
483 ++
484 ++ /*
485 ++ * new_inode() will have added the new inode to the super
486 ++ * block's list of inodes. Further below we will call
487 ++ * inode_insert5() Which would perform the same operation again
488 ++ * thereby corrupting the list. To avoid this raise I_CREATING
489 ++ * in i_state which will cause inode_insert5() to skip this
490 ++ * step. I_CREATING will be cleared by d_instantiate_new()
491 ++ * below.
492 ++ */
493 ++ spin_lock(&inode->i_lock);
494 ++ inode->i_state |= I_CREATING;
495 ++ spin_unlock(&inode->i_lock);
496 ++
497 ++ inode_init_owner(inode, diri, mode);
498 ++ modei = inode->i_mode;
499 ++ }
500 ++
501 ++ err = shiftfs_override_object_creds(dentry->d_sb, &oldcred, &newcred,
502 ++ dentry, modei, hardlink != NULL);
503 ++ if (err)
504 ++ goto out_iput;
505 ++
506 ++ if (hardlink) {
507 ++ lowerd_link = hardlink->d_fsdata;
508 ++ err = vfs_link(lowerd_link, loweri_dir, lowerd_new, NULL);
509 ++ } else {
510 ++ switch (modei & S_IFMT) {
511 ++ case S_IFDIR:
512 ++ err = vfs_mkdir(loweri_dir, lowerd_new, modei);
513 ++ break;
514 ++ case S_IFREG:
515 ++ err = vfs_create(loweri_dir, lowerd_new, modei, excl);
516 ++ break;
517 ++ case S_IFLNK:
518 ++ err = vfs_symlink(loweri_dir, lowerd_new, symlink);
519 ++ break;
520 ++ case S_IFSOCK:
521 ++ /* fall through */
522 ++ case S_IFIFO:
523 ++ err = vfs_mknod(loweri_dir, lowerd_new, modei, 0);
524 ++ break;
525 ++ default:
526 ++ err = -EINVAL;
527 ++ break;
528 ++ }
529 ++ }
530 ++
531 ++ shiftfs_revert_object_creds(oldcred, newcred);
532 ++
533 ++ if (!err && WARN_ON(!lowerd_new->d_inode))
534 ++ err = -EIO;
535 ++ if (err)
536 ++ goto out_iput;
537 ++
538 ++ if (hardlink) {
539 ++ inode = d_inode(hardlink);
540 ++ ihold(inode);
541 ++
542 ++ /* copy up times from lower inode */
543 ++ shiftfs_copyattr(d_inode(lowerd_link), inode);
544 ++ set_nlink(d_inode(hardlink), d_inode(lowerd_link)->i_nlink);
545 ++ d_instantiate(dentry, inode);
546 ++ } else {
547 ++ struct inode *inode_tmp;
548 ++ struct inode *loweri_new = d_inode(lowerd_new);
549 ++
550 ++ inode_tmp = inode_insert5(inode, (unsigned long)loweri_new,
551 ++ shiftfs_inode_test, shiftfs_inode_set,
552 ++ loweri_new);
553 ++ if (unlikely(inode_tmp != inode)) {
554 ++ pr_err_ratelimited("shiftfs: newly created inode found in cache\n");
555 ++ iput(inode_tmp);
556 ++ err = -EINVAL;
557 ++ goto out_iput;
558 ++ }
559 ++
560 ++ ihold(loweri_new);
561 ++ shiftfs_fill_inode(inode, loweri_new->i_ino, loweri_new->i_mode,
562 ++ 0, lowerd_new);
563 ++ d_instantiate_new(dentry, inode);
564 ++ }
565 ++
566 ++ shiftfs_copyattr(loweri_dir, diri);
567 ++ if (loweri_iop_ptr == loweri_dir_iop->mkdir)
568 ++ set_nlink(diri, loweri_dir->i_nlink);
569 ++
570 ++ inode = NULL;
571 ++
572 ++out_iput:
573 ++ iput(inode);
574 ++ inode_unlock(loweri_dir);
575 ++
576 ++ return err;
577 ++}
578 ++
579 ++static int shiftfs_create(struct inode *dir, struct dentry *dentry,
580 ++ umode_t mode, bool excl)
581 ++{
582 ++ mode |= S_IFREG;
583 ++
584 ++ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl);
585 ++}
586 ++
587 ++static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry,
588 ++ umode_t mode)
589 ++{
590 ++ mode |= S_IFDIR;
591 ++
592 ++ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
593 ++}
594 ++
595 ++static int shiftfs_link(struct dentry *hardlink, struct inode *dir,
596 ++ struct dentry *dentry)
597 ++{
598 ++ return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false);
599 ++}
600 ++
601 ++static int shiftfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
602 ++ dev_t rdev)
603 ++{
604 ++ if (!S_ISFIFO(mode) && !S_ISSOCK(mode))
605 ++ return -EPERM;
606 ++
607 ++ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
608 ++}
609 ++
610 ++static int shiftfs_symlink(struct inode *dir, struct dentry *dentry,
611 ++ const char *symlink)
612 ++{
613 ++ return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false);
614 ++}
615 ++
616 ++static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir)
617 ++{
618 ++ struct dentry *lowerd = dentry->d_fsdata;
619 ++ struct inode *loweri = dir->i_private;
620 ++ struct inode *inode = d_inode(dentry);
621 ++ int err;
622 ++ const struct cred *oldcred;
623 ++
624 ++ dget(lowerd);
625 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
626 ++ inode_lock_nested(loweri, I_MUTEX_PARENT);
627 ++ if (rmdir)
628 ++ err = vfs_rmdir(loweri, lowerd);
629 ++ else
630 ++ err = vfs_unlink(loweri, lowerd, NULL);
631 ++ revert_creds(oldcred);
632 ++
633 ++ if (!err) {
634 ++ d_drop(dentry);
635 ++
636 ++ if (rmdir)
637 ++ clear_nlink(inode);
638 ++ else
639 ++ drop_nlink(inode);
640 ++ }
641 ++ inode_unlock(loweri);
642 ++
643 ++ shiftfs_copyattr(loweri, dir);
644 ++ dput(lowerd);
645 ++
646 ++ return err;
647 ++}
648 ++
649 ++static int shiftfs_unlink(struct inode *dir, struct dentry *dentry)
650 ++{
651 ++ return shiftfs_rm(dir, dentry, false);
652 ++}
653 ++
654 ++static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry)
655 ++{
656 ++ return shiftfs_rm(dir, dentry, true);
657 ++}
658 ++
659 ++static int shiftfs_rename(struct inode *olddir, struct dentry *old,
660 ++ struct inode *newdir, struct dentry *new,
661 ++ unsigned int flags)
662 ++{
663 ++ struct dentry *lowerd_dir_old = old->d_parent->d_fsdata,
664 ++ *lowerd_dir_new = new->d_parent->d_fsdata,
665 ++ *lowerd_old = old->d_fsdata, *lowerd_new = new->d_fsdata,
666 ++ *trapd;
667 ++ struct inode *loweri_dir_old = lowerd_dir_old->d_inode,
668 ++ *loweri_dir_new = lowerd_dir_new->d_inode;
669 ++ int err = -EINVAL;
670 ++ const struct cred *oldcred;
671 ++
672 ++ trapd = lock_rename(lowerd_dir_new, lowerd_dir_old);
673 ++
674 ++ if (trapd == lowerd_old || trapd == lowerd_new)
675 ++ goto out_unlock;
676 ++
677 ++ oldcred = shiftfs_override_creds(old->d_sb);
678 ++ err = vfs_rename(loweri_dir_old, lowerd_old, loweri_dir_new, lowerd_new,
679 ++ NULL, flags);
680 ++ revert_creds(oldcred);
681 ++
682 ++ shiftfs_copyattr(loweri_dir_old, olddir);
683 ++ shiftfs_copyattr(loweri_dir_new, newdir);
684 ++
685 ++out_unlock:
686 ++ unlock_rename(lowerd_dir_new, lowerd_dir_old);
687 ++
688 ++ return err;
689 ++}
690 ++
691 ++static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry,
692 ++ unsigned int flags)
693 ++{
694 ++ struct dentry *new;
695 ++ struct inode *newi;
696 ++ const struct cred *oldcred;
697 ++ struct dentry *lowerd = dentry->d_parent->d_fsdata;
698 ++ struct inode *inode = NULL, *loweri = lowerd->d_inode;
699 ++
700 ++ inode_lock(loweri);
701 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
702 ++ new = lookup_one_len(dentry->d_name.name, lowerd, dentry->d_name.len);
703 ++ revert_creds(oldcred);
704 ++ inode_unlock(loweri);
705 ++
706 ++ if (IS_ERR(new))
707 ++ return new;
708 ++
709 ++ dentry->d_fsdata = new;
710 ++
711 ++ newi = new->d_inode;
712 ++ if (!newi)
713 ++ goto out;
714 ++
715 ++ inode = iget5_locked(dentry->d_sb, (unsigned long)newi,
716 ++ shiftfs_inode_test, shiftfs_inode_set, newi);
717 ++ if (!inode) {
718 ++ dput(new);
719 ++ return ERR_PTR(-ENOMEM);
720 ++ }
721 ++ if (inode->i_state & I_NEW) {
722 ++ /*
723 ++ * inode->i_private set by shiftfs_inode_set(), but we still
724 ++ * need to take a reference
725 ++ */
726 ++ ihold(newi);
727 ++ shiftfs_fill_inode(inode, newi->i_ino, newi->i_mode, 0, new);
728 ++ unlock_new_inode(inode);
729 ++ }
730 ++
731 ++out:
732 ++ return d_splice_alias(inode, dentry);
733 ++}
734 ++
735 ++static int shiftfs_permission(struct inode *inode, int mask)
736 ++{
737 ++ int err;
738 ++ const struct cred *oldcred;
739 ++ struct inode *loweri = inode->i_private;
740 ++
741 ++ if (!loweri) {
742 ++ WARN_ON(!(mask & MAY_NOT_BLOCK));
743 ++ return -ECHILD;
744 ++ }
745 ++
746 ++ err = generic_permission(inode, mask);
747 ++ if (err)
748 ++ return err;
749 ++
750 ++ oldcred = shiftfs_override_creds(inode->i_sb);
751 ++ err = inode_permission(loweri, mask);
752 ++ revert_creds(oldcred);
753 ++
754 ++ return err;
755 ++}
756 ++
757 ++static int shiftfs_fiemap(struct inode *inode,
758 ++ struct fiemap_extent_info *fieinfo, u64 start,
759 ++ u64 len)
760 ++{
761 ++ int err;
762 ++ const struct cred *oldcred;
763 ++ struct inode *loweri = inode->i_private;
764 ++
765 ++ if (!loweri->i_op->fiemap)
766 ++ return -EOPNOTSUPP;
767 ++
768 ++ oldcred = shiftfs_override_creds(inode->i_sb);
769 ++ if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
770 ++ filemap_write_and_wait(loweri->i_mapping);
771 ++ err = loweri->i_op->fiemap(loweri, fieinfo, start, len);
772 ++ revert_creds(oldcred);
773 ++
774 ++ return err;
775 ++}
776 ++
777 ++static int shiftfs_tmpfile(struct inode *dir, struct dentry *dentry,
778 ++ umode_t mode)
779 ++{
780 ++ int err;
781 ++ const struct cred *oldcred;
782 ++ struct dentry *lowerd = dentry->d_fsdata;
783 ++ struct inode *loweri = dir->i_private;
784 ++
785 ++ if (!loweri->i_op->tmpfile)
786 ++ return -EOPNOTSUPP;
787 ++
788 ++ oldcred = shiftfs_override_creds(dir->i_sb);
789 ++ err = loweri->i_op->tmpfile(loweri, lowerd, mode);
790 ++ revert_creds(oldcred);
791 ++
792 ++ return err;
793 ++}
794 ++
795 ++static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr)
796 ++{
797 ++ struct dentry *lowerd = dentry->d_fsdata;
798 ++ struct inode *loweri = lowerd->d_inode;
799 ++ struct iattr newattr;
800 ++ const struct cred *oldcred;
801 ++ struct super_block *sb = dentry->d_sb;
802 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
803 ++ int err;
804 ++
805 ++ err = setattr_prepare(dentry, attr);
806 ++ if (err)
807 ++ return err;
808 ++
809 ++ newattr = *attr;
810 ++ newattr.ia_uid = shift_kuid(sb->s_user_ns, sbinfo->userns, attr->ia_uid);
811 ++ newattr.ia_gid = shift_kgid(sb->s_user_ns, sbinfo->userns, attr->ia_gid);
812 ++
813 ++ /*
814 ++ * mode change is for clearing setuid/setgid bits. Allow lower fs
815 ++ * to interpret this in its own way.
816 ++ */
817 ++ if (newattr.ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
818 ++ newattr.ia_valid &= ~ATTR_MODE;
819 ++
820 ++ inode_lock(loweri);
821 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
822 ++ err = notify_change(lowerd, &newattr, NULL);
823 ++ revert_creds(oldcred);
824 ++ inode_unlock(loweri);
825 ++
826 ++ shiftfs_copyattr(loweri, d_inode(dentry));
827 ++
828 ++ return err;
829 ++}
830 ++
831 ++static int shiftfs_getattr(const struct path *path, struct kstat *stat,
832 ++ u32 request_mask, unsigned int query_flags)
833 ++{
834 ++ struct inode *inode = path->dentry->d_inode;
835 ++ struct dentry *lowerd = path->dentry->d_fsdata;
836 ++ struct inode *loweri = lowerd->d_inode;
837 ++ struct shiftfs_super_info *info = path->dentry->d_sb->s_fs_info;
838 ++ struct path newpath = { .mnt = info->mnt, .dentry = lowerd };
839 ++ struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
840 ++ struct user_namespace *to_ns = inode->i_sb->s_user_ns;
841 ++ const struct cred *oldcred;
842 ++ int err;
843 ++
844 ++ oldcred = shiftfs_override_creds(inode->i_sb);
845 ++ err = vfs_getattr(&newpath, stat, request_mask, query_flags);
846 ++ revert_creds(oldcred);
847 ++
848 ++ if (err)
849 ++ return err;
850 ++
851 ++ /* transform the underlying id */
852 ++ stat->uid = shift_kuid(from_ns, to_ns, stat->uid);
853 ++ stat->gid = shift_kgid(from_ns, to_ns, stat->gid);
854 ++ return 0;
855 ++}
856 ++
857 ++#ifdef CONFIG_SHIFT_FS_POSIX_ACL
858 ++
859 ++static int
860 ++shift_acl_ids(struct user_namespace *from, struct user_namespace *to,
861 ++ struct posix_acl *acl)
862 ++{
863 ++ int i;
864 ++
865 ++ for (i = 0; i < acl->a_count; i++) {
866 ++ struct posix_acl_entry *e = &acl->a_entries[i];
867 ++ switch(e->e_tag) {
868 ++ case ACL_USER:
869 ++ e->e_uid = shift_kuid(from, to, e->e_uid);
870 ++ if (!uid_valid(e->e_uid))
871 ++ return -EOVERFLOW;
872 ++ break;
873 ++ case ACL_GROUP:
874 ++ e->e_gid = shift_kgid(from, to, e->e_gid);
875 ++ if (!gid_valid(e->e_gid))
876 ++ return -EOVERFLOW;
877 ++ break;
878 ++ }
879 ++ }
880 ++ return 0;
881 ++}
882 ++
883 ++static void
884 ++shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to,
885 ++ void *value, size_t size)
886 ++{
887 ++ struct posix_acl_xattr_header *header = value;
888 ++ struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
889 ++ int count;
890 ++ kuid_t kuid;
891 ++ kgid_t kgid;
892 ++
893 ++ if (!value)
894 ++ return;
895 ++ if (size < sizeof(struct posix_acl_xattr_header))
896 ++ return;
897 ++ if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
898 ++ return;
899 ++
900 ++ count = posix_acl_xattr_count(size);
901 ++ if (count < 0)
902 ++ return;
903 ++ if (count == 0)
904 ++ return;
905 ++
906 ++ for (end = entry + count; entry != end; entry++) {
907 ++ switch(le16_to_cpu(entry->e_tag)) {
908 ++ case ACL_USER:
909 ++ kuid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
910 ++ kuid = shift_kuid(from, to, kuid);
911 ++ entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid));
912 ++ break;
913 ++ case ACL_GROUP:
914 ++ kgid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
915 ++ kgid = shift_kgid(from, to, kgid);
916 ++ entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, kgid));
917 ++ break;
918 ++ default:
919 ++ break;
920 ++ }
921 ++ }
922 ++}
923 ++
924 ++static struct posix_acl *shiftfs_get_acl(struct inode *inode, int type)
925 ++{
926 ++ struct inode *loweri = inode->i_private;
927 ++ const struct cred *oldcred;
928 ++ struct posix_acl *lower_acl, *acl = NULL;
929 ++ struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
930 ++ struct user_namespace *to_ns = inode->i_sb->s_user_ns;
931 ++ int size;
932 ++ int err;
933 ++
934 ++ if (!IS_POSIXACL(loweri))
935 ++ return NULL;
936 ++
937 ++ oldcred = shiftfs_override_creds(inode->i_sb);
938 ++ lower_acl = get_acl(loweri, type);
939 ++ revert_creds(oldcred);
940 ++
941 ++ if (lower_acl && !IS_ERR(lower_acl)) {
942 ++ /* XXX: export posix_acl_clone? */
943 ++ size = sizeof(struct posix_acl) +
944 ++ lower_acl->a_count * sizeof(struct posix_acl_entry);
945 ++ acl = kmemdup(lower_acl, size, GFP_KERNEL);
946 ++ posix_acl_release(lower_acl);
947 ++
948 ++ if (!acl)
949 ++ return ERR_PTR(-ENOMEM);
950 ++
951 ++ refcount_set(&acl->a_refcount, 1);
952 ++
953 ++ err = shift_acl_ids(from_ns, to_ns, acl);
954 ++ if (err) {
955 ++ kfree(acl);
956 ++ return ERR_PTR(err);
957 ++ }
958 ++ }
959 ++
960 ++ return acl;
961 ++}
962 ++
963 ++static int
964 ++shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler,
965 ++ struct dentry *dentry, struct inode *inode,
966 ++ const char *name, void *buffer, size_t size)
967 ++{
968 ++ struct inode *loweri = inode->i_private;
969 ++ int ret;
970 ++
971 ++ ret = shiftfs_xattr_get(NULL, dentry, inode, handler->name,
972 ++ buffer, size);
973 ++ if (ret < 0)
974 ++ return ret;
975 ++
976 ++ inode_lock(loweri);
977 ++ shift_acl_xattr_ids(loweri->i_sb->s_user_ns, inode->i_sb->s_user_ns,
978 ++ buffer, size);
979 ++ inode_unlock(loweri);
980 ++ return ret;
981 ++}
982 ++
983 ++static int
984 ++shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler,
985 ++ struct dentry *dentry, struct inode *inode,
986 ++ const char *name, const void *value,
987 ++ size_t size, int flags)
988 ++{
989 ++ struct inode *loweri = inode->i_private;
990 ++ int err;
991 ++
992 ++ if (!IS_POSIXACL(loweri) || !loweri->i_op->set_acl)
993 ++ return -EOPNOTSUPP;
994 ++ if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
995 ++ return value ? -EACCES : 0;
996 ++ if (!inode_owner_or_capable(inode))
997 ++ return -EPERM;
998 ++
999 ++ if (value) {
1000 ++ shift_acl_xattr_ids(inode->i_sb->s_user_ns,
1001 ++ loweri->i_sb->s_user_ns,
1002 ++ (void *)value, size);
1003 ++ err = shiftfs_setxattr(dentry, inode, handler->name, value,
1004 ++ size, flags);
1005 ++ } else {
1006 ++ err = shiftfs_removexattr(dentry, handler->name);
1007 ++ }
1008 ++
1009 ++ if (!err)
1010 ++ shiftfs_copyattr(loweri, inode);
1011 ++
1012 ++ return err;
1013 ++}
1014 ++
1015 ++static const struct xattr_handler
1016 ++shiftfs_posix_acl_access_xattr_handler = {
1017 ++ .name = XATTR_NAME_POSIX_ACL_ACCESS,
1018 ++ .flags = ACL_TYPE_ACCESS,
1019 ++ .get = shiftfs_posix_acl_xattr_get,
1020 ++ .set = shiftfs_posix_acl_xattr_set,
1021 ++};
1022 ++
1023 ++static const struct xattr_handler
1024 ++shiftfs_posix_acl_default_xattr_handler = {
1025 ++ .name = XATTR_NAME_POSIX_ACL_DEFAULT,
1026 ++ .flags = ACL_TYPE_DEFAULT,
1027 ++ .get = shiftfs_posix_acl_xattr_get,
1028 ++ .set = shiftfs_posix_acl_xattr_set,
1029 ++};
1030 ++
1031 ++#else /* !CONFIG_SHIFT_FS_POSIX_ACL */
1032 ++
1033 ++#define shiftfs_get_acl NULL
1034 ++
1035 ++#endif /* CONFIG_SHIFT_FS_POSIX_ACL */
1036 ++
1037 ++static const struct inode_operations shiftfs_dir_inode_operations = {
1038 ++ .lookup = shiftfs_lookup,
1039 ++ .mkdir = shiftfs_mkdir,
1040 ++ .symlink = shiftfs_symlink,
1041 ++ .unlink = shiftfs_unlink,
1042 ++ .rmdir = shiftfs_rmdir,
1043 ++ .rename = shiftfs_rename,
1044 ++ .link = shiftfs_link,
1045 ++ .setattr = shiftfs_setattr,
1046 ++ .create = shiftfs_create,
1047 ++ .mknod = shiftfs_mknod,
1048 ++ .permission = shiftfs_permission,
1049 ++ .getattr = shiftfs_getattr,
1050 ++ .listxattr = shiftfs_listxattr,
1051 ++ .get_acl = shiftfs_get_acl,
1052 ++};
1053 ++
1054 ++static const struct inode_operations shiftfs_file_inode_operations = {
1055 ++ .fiemap = shiftfs_fiemap,
1056 ++ .getattr = shiftfs_getattr,
1057 ++ .get_acl = shiftfs_get_acl,
1058 ++ .listxattr = shiftfs_listxattr,
1059 ++ .permission = shiftfs_permission,
1060 ++ .setattr = shiftfs_setattr,
1061 ++ .tmpfile = shiftfs_tmpfile,
1062 ++};
1063 ++
1064 ++static const struct inode_operations shiftfs_special_inode_operations = {
1065 ++ .getattr = shiftfs_getattr,
1066 ++ .get_acl = shiftfs_get_acl,
1067 ++ .listxattr = shiftfs_listxattr,
1068 ++ .permission = shiftfs_permission,
1069 ++ .setattr = shiftfs_setattr,
1070 ++};
1071 ++
1072 ++static const struct inode_operations shiftfs_symlink_inode_operations = {
1073 ++ .getattr = shiftfs_getattr,
1074 ++ .get_link = shiftfs_get_link,
1075 ++ .listxattr = shiftfs_listxattr,
1076 ++ .setattr = shiftfs_setattr,
1077 ++};
1078 ++
1079 ++static struct file *shiftfs_open_realfile(const struct file *file,
1080 ++ struct inode *realinode)
1081 ++{
1082 ++ struct file *realfile;
1083 ++ const struct cred *old_cred;
1084 ++ struct inode *inode = file_inode(file);
1085 ++ struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1086 ++ struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1087 ++ struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1088 ++
1089 ++ old_cred = shiftfs_override_creds(inode->i_sb);
1090 ++ realfile = open_with_fake_path(&realpath, file->f_flags, realinode,
1091 ++ info->creator_cred);
1092 ++ revert_creds(old_cred);
1093 ++
1094 ++ return realfile;
1095 ++}
1096 ++
1097 ++#define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
1098 ++
1099 ++static int shiftfs_change_flags(struct file *file, unsigned int flags)
1100 ++{
1101 ++ struct inode *inode = file_inode(file);
1102 ++ int err;
1103 ++
1104 ++ /* if some flag changed that cannot be changed then something's amiss */
1105 ++ if (WARN_ON((file->f_flags ^ flags) & ~SHIFTFS_SETFL_MASK))
1106 ++ return -EIO;
1107 ++
1108 ++ flags &= SHIFTFS_SETFL_MASK;
1109 ++
1110 ++ if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
1111 ++ return -EPERM;
1112 ++
1113 ++ if (flags & O_DIRECT) {
1114 ++ if (!file->f_mapping->a_ops ||
1115 ++ !file->f_mapping->a_ops->direct_IO)
1116 ++ return -EINVAL;
1117 ++ }
1118 ++
1119 ++ if (file->f_op->check_flags) {
1120 ++ err = file->f_op->check_flags(flags);
1121 ++ if (err)
1122 ++ return err;
1123 ++ }
1124 ++
1125 ++ spin_lock(&file->f_lock);
1126 ++ file->f_flags = (file->f_flags & ~SHIFTFS_SETFL_MASK) | flags;
1127 ++ spin_unlock(&file->f_lock);
1128 ++
1129 ++ return 0;
1130 ++}
1131 ++
1132 ++static int shiftfs_open(struct inode *inode, struct file *file)
1133 ++{
1134 ++ struct file *realfile;
1135 ++
1136 ++ realfile = shiftfs_open_realfile(file, inode->i_private);
1137 ++ if (IS_ERR(realfile))
1138 ++ return PTR_ERR(realfile);
1139 ++
1140 ++ file->private_data = realfile;
1141 ++ /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */
1142 ++ file->f_mapping = realfile->f_mapping;
1143 ++
1144 ++ return 0;
1145 ++}
1146 ++
1147 ++static int shiftfs_dir_open(struct inode *inode, struct file *file)
1148 ++{
1149 ++ struct file *realfile;
1150 ++ const struct cred *oldcred;
1151 ++ struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1152 ++ struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1153 ++ struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1154 ++
1155 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1156 ++ realfile = dentry_open(&realpath, file->f_flags | O_NOATIME,
1157 ++ info->creator_cred);
1158 ++ revert_creds(oldcred);
1159 ++ if (IS_ERR(realfile))
1160 ++ return PTR_ERR(realfile);
1161 ++
1162 ++ file->private_data = realfile;
1163 ++
1164 ++ return 0;
1165 ++}
1166 ++
1167 ++static int shiftfs_release(struct inode *inode, struct file *file)
1168 ++{
1169 ++ struct file *realfile = file->private_data;
1170 ++
1171 ++ if (realfile)
1172 ++ fput(realfile);
1173 ++
1174 ++ return 0;
1175 ++}
1176 ++
1177 ++static int shiftfs_dir_release(struct inode *inode, struct file *file)
1178 ++{
1179 ++ return shiftfs_release(inode, file);
1180 ++}
1181 ++
1182 ++static loff_t shiftfs_dir_llseek(struct file *file, loff_t offset, int whence)
1183 ++{
1184 ++ struct file *realfile = file->private_data;
1185 ++
1186 ++ return vfs_llseek(realfile, offset, whence);
1187 ++}
1188 ++
1189 ++static loff_t shiftfs_file_llseek(struct file *file, loff_t offset, int whence)
1190 ++{
1191 ++ struct inode *realinode = file_inode(file)->i_private;
1192 ++
1193 ++ return generic_file_llseek_size(file, offset, whence,
1194 ++ realinode->i_sb->s_maxbytes,
1195 ++ i_size_read(realinode));
1196 ++}
1197 ++
1198 ++/* XXX: Need to figure out what to to about atime updates, maybe other
1199 ++ * timestamps too ... ref. ovl_file_accessed() */
1200 ++
1201 ++static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb)
1202 ++{
1203 ++ int ifl = iocb->ki_flags;
1204 ++ rwf_t flags = 0;
1205 ++
1206 ++ if (ifl & IOCB_NOWAIT)
1207 ++ flags |= RWF_NOWAIT;
1208 ++ if (ifl & IOCB_HIPRI)
1209 ++ flags |= RWF_HIPRI;
1210 ++ if (ifl & IOCB_DSYNC)
1211 ++ flags |= RWF_DSYNC;
1212 ++ if (ifl & IOCB_SYNC)
1213 ++ flags |= RWF_SYNC;
1214 ++
1215 ++ return flags;
1216 ++}
1217 ++
1218 ++static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd)
1219 ++{
1220 ++ struct file *realfile;
1221 ++
1222 ++ if (file->f_op->open != shiftfs_open &&
1223 ++ file->f_op->open != shiftfs_dir_open)
1224 ++ return -EINVAL;
1225 ++
1226 ++ realfile = file->private_data;
1227 ++ lowerfd->flags = 0;
1228 ++ lowerfd->file = realfile;
1229 ++
1230 ++ /* Did the flags change since open? */
1231 ++ if (unlikely(file->f_flags & ~lowerfd->file->f_flags))
1232 ++ return shiftfs_change_flags(lowerfd->file, file->f_flags);
1233 ++
1234 ++ return 0;
1235 ++}
1236 ++
1237 ++static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
1238 ++{
1239 ++ struct file *file = iocb->ki_filp;
1240 ++ struct fd lowerfd;
1241 ++ const struct cred *oldcred;
1242 ++ ssize_t ret;
1243 ++
1244 ++ if (!iov_iter_count(iter))
1245 ++ return 0;
1246 ++
1247 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1248 ++ if (ret)
1249 ++ return ret;
1250 ++
1251 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1252 ++ ret = vfs_iter_read(lowerfd.file, iter, &iocb->ki_pos,
1253 ++ shiftfs_iocb_to_rwf(iocb));
1254 ++ revert_creds(oldcred);
1255 ++
1256 ++ shiftfs_file_accessed(file);
1257 ++
1258 ++ fdput(lowerfd);
1259 ++ return ret;
1260 ++}
1261 ++
1262 ++static ssize_t shiftfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
1263 ++{
1264 ++ struct file *file = iocb->ki_filp;
1265 ++ struct inode *inode = file_inode(file);
1266 ++ struct fd lowerfd;
1267 ++ const struct cred *oldcred;
1268 ++ ssize_t ret;
1269 ++
1270 ++ if (!iov_iter_count(iter))
1271 ++ return 0;
1272 ++
1273 ++ inode_lock(inode);
1274 ++ /* Update mode */
1275 ++ shiftfs_copyattr(inode->i_private, inode);
1276 ++ ret = file_remove_privs(file);
1277 ++ if (ret)
1278 ++ goto out_unlock;
1279 ++
1280 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1281 ++ if (ret)
1282 ++ goto out_unlock;
1283 ++
1284 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1285 ++ file_start_write(lowerfd.file);
1286 ++ ret = vfs_iter_write(lowerfd.file, iter, &iocb->ki_pos,
1287 ++ shiftfs_iocb_to_rwf(iocb));
1288 ++ file_end_write(lowerfd.file);
1289 ++ revert_creds(oldcred);
1290 ++
1291 ++ /* Update size */
1292 ++ shiftfs_copyattr(inode->i_private, inode);
1293 ++
1294 ++ fdput(lowerfd);
1295 ++
1296 ++out_unlock:
1297 ++ inode_unlock(inode);
1298 ++ return ret;
1299 ++}
1300 ++
1301 ++static int shiftfs_fsync(struct file *file, loff_t start, loff_t end,
1302 ++ int datasync)
1303 ++{
1304 ++ struct fd lowerfd;
1305 ++ const struct cred *oldcred;
1306 ++ int ret;
1307 ++
1308 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1309 ++ if (ret)
1310 ++ return ret;
1311 ++
1312 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1313 ++ ret = vfs_fsync_range(lowerfd.file, start, end, datasync);
1314 ++ revert_creds(oldcred);
1315 ++
1316 ++ fdput(lowerfd);
1317 ++ return ret;
1318 ++}
1319 ++
1320 ++static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma)
1321 ++{
1322 ++ struct file *realfile = file->private_data;
1323 ++ const struct cred *oldcred;
1324 ++ int ret;
1325 ++
1326 ++ if (!realfile->f_op->mmap)
1327 ++ return -ENODEV;
1328 ++
1329 ++ if (WARN_ON(file != vma->vm_file))
1330 ++ return -EIO;
1331 ++
1332 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1333 ++ vma->vm_file = get_file(realfile);
1334 ++ ret = call_mmap(vma->vm_file, vma);
1335 ++ revert_creds(oldcred);
1336 ++
1337 ++ shiftfs_file_accessed(file);
1338 ++
1339 ++ if (ret) {
1340 ++ /*
1341 ++ * Drop refcount from new vm_file value and restore original
1342 ++ * vm_file value
1343 ++ */
1344 ++ vma->vm_file = file;
1345 ++ fput(realfile);
1346 ++ } else {
1347 ++ /* Drop refcount from previous vm_file value */
1348 ++ fput(file);
1349 ++ }
1350 ++
1351 ++ return ret;
1352 ++}
1353 ++
1354 ++static long shiftfs_fallocate(struct file *file, int mode, loff_t offset,
1355 ++ loff_t len)
1356 ++{
1357 ++ struct inode *inode = file_inode(file);
1358 ++ struct inode *loweri = inode->i_private;
1359 ++ struct fd lowerfd;
1360 ++ const struct cred *oldcred;
1361 ++ int ret;
1362 ++
1363 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1364 ++ if (ret)
1365 ++ return ret;
1366 ++
1367 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1368 ++ ret = vfs_fallocate(lowerfd.file, mode, offset, len);
1369 ++ revert_creds(oldcred);
1370 ++
1371 ++ /* Update size */
1372 ++ shiftfs_copyattr(loweri, inode);
1373 ++
1374 ++ fdput(lowerfd);
1375 ++ return ret;
1376 ++}
1377 ++
1378 ++static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len,
1379 ++ int advice)
1380 ++{
1381 ++ struct fd lowerfd;
1382 ++ const struct cred *oldcred;
1383 ++ int ret;
1384 ++
1385 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1386 ++ if (ret)
1387 ++ return ret;
1388 ++
1389 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1390 ++ ret = vfs_fadvise(lowerfd.file, offset, len, advice);
1391 ++ revert_creds(oldcred);
1392 ++
1393 ++ fdput(lowerfd);
1394 ++ return ret;
1395 ++}
1396 ++
1397 ++static int shiftfs_override_ioctl_creds(int cmd, const struct super_block *sb,
1398 ++ const struct cred **oldcred,
1399 ++ struct cred **newcred)
1400 ++{
1401 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1402 ++ kuid_t fsuid = current_fsuid();
1403 ++ kgid_t fsgid = current_fsgid();
1404 ++
1405 ++ *oldcred = shiftfs_override_creds(sb);
1406 ++
1407 ++ *newcred = prepare_creds();
1408 ++ if (!*newcred) {
1409 ++ revert_creds(*oldcred);
1410 ++ return -ENOMEM;
1411 ++ }
1412 ++
1413 ++ (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
1414 ++ (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
1415 ++
1416 ++ /* clear all caps to prevent bypassing capable() checks */
1417 ++ cap_clear((*newcred)->cap_bset);
1418 ++ cap_clear((*newcred)->cap_effective);
1419 ++ cap_clear((*newcred)->cap_inheritable);
1420 ++ cap_clear((*newcred)->cap_permitted);
1421 ++
1422 ++ if (cmd == BTRFS_IOC_SNAP_DESTROY) {
1423 ++ kuid_t kuid_root = make_kuid(sb->s_user_ns, 0);
1424 ++ /*
1425 ++ * Allow the root user in the container to remove subvolumes
1426 ++ * from other users.
1427 ++ */
1428 ++ if (uid_valid(kuid_root) && uid_eq(fsuid, kuid_root))
1429 ++ cap_raise((*newcred)->cap_effective, CAP_DAC_OVERRIDE);
1430 ++ }
1431 ++
1432 ++ put_cred(override_creds(*newcred));
1433 ++ return 0;
1434 ++}
1435 ++
1436 ++static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred,
1437 ++ struct cred *newcred)
1438 ++{
1439 ++ return shiftfs_revert_object_creds(oldcred, newcred);
1440 ++}
1441 ++
1442 ++static inline bool is_btrfs_snap_ioctl(int cmd)
1443 ++{
1444 ++ if ((cmd == BTRFS_IOC_SNAP_CREATE) || (cmd == BTRFS_IOC_SNAP_CREATE_V2))
1445 ++ return true;
1446 ++
1447 ++ return false;
1448 ++}
1449 ++
1450 ++static int shiftfs_btrfs_ioctl_fd_restore(int cmd, int fd, void __user *arg,
1451 ++ struct btrfs_ioctl_vol_args *v1,
1452 ++ struct btrfs_ioctl_vol_args_v2 *v2)
1453 ++{
1454 ++ int ret;
1455 ++
1456 ++ if (!is_btrfs_snap_ioctl(cmd))
1457 ++ return 0;
1458 ++
1459 ++ if (cmd == BTRFS_IOC_SNAP_CREATE)
1460 ++ ret = copy_to_user(arg, v1, sizeof(*v1));
1461 ++ else
1462 ++ ret = copy_to_user(arg, v2, sizeof(*v2));
1463 ++
1464 ++ __close_fd(current->files, fd);
1465 ++ kfree(v1);
1466 ++ kfree(v2);
1467 ++
1468 ++ return ret;
1469 ++}
1470 ++
1471 ++static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg,
1472 ++ struct btrfs_ioctl_vol_args **b1,
1473 ++ struct btrfs_ioctl_vol_args_v2 **b2,
1474 ++ int *newfd)
1475 ++{
1476 ++ int oldfd, ret;
1477 ++ struct fd src;
1478 ++ struct fd lfd = {};
1479 ++ struct btrfs_ioctl_vol_args *v1 = NULL;
1480 ++ struct btrfs_ioctl_vol_args_v2 *v2 = NULL;
1481 ++
1482 ++ if (!is_btrfs_snap_ioctl(cmd))
1483 ++ return 0;
1484 ++
1485 ++ if (cmd == BTRFS_IOC_SNAP_CREATE) {
1486 ++ v1 = memdup_user(arg, sizeof(*v1));
1487 ++ if (IS_ERR(v1))
1488 ++ return PTR_ERR(v1);
1489 ++ oldfd = v1->fd;
1490 ++ *b1 = v1;
1491 ++ } else {
1492 ++ v2 = memdup_user(arg, sizeof(*v2));
1493 ++ if (IS_ERR(v2))
1494 ++ return PTR_ERR(v2);
1495 ++ oldfd = v2->fd;
1496 ++ *b2 = v2;
1497 ++ }
1498 ++
1499 ++ src = fdget(oldfd);
1500 ++ if (!src.file)
1501 ++ return -EINVAL;
1502 ++
1503 ++ ret = shiftfs_real_fdget(src.file, &lfd);
1504 ++ if (ret) {
1505 ++ fdput(src);
1506 ++ return ret;
1507 ++ }
1508 ++
1509 ++ /*
1510 ++ * shiftfs_real_fdget() does not take a reference to lfd.file, so
1511 ++ * take a reference here to offset the one which will be put by
1512 ++ * __close_fd(), and make sure that reference is put on fdput(lfd).
1513 ++ */
1514 ++ get_file(lfd.file);
1515 ++ lfd.flags |= FDPUT_FPUT;
1516 ++ fdput(src);
1517 ++
1518 ++ *newfd = get_unused_fd_flags(lfd.file->f_flags);
1519 ++ if (*newfd < 0) {
1520 ++ fdput(lfd);
1521 ++ return *newfd;
1522 ++ }
1523 ++
1524 ++ fd_install(*newfd, lfd.file);
1525 ++
1526 ++ if (cmd == BTRFS_IOC_SNAP_CREATE) {
1527 ++ v1->fd = *newfd;
1528 ++ ret = copy_to_user(arg, v1, sizeof(*v1));
1529 ++ v1->fd = oldfd;
1530 ++ } else {
1531 ++ v2->fd = *newfd;
1532 ++ ret = copy_to_user(arg, v2, sizeof(*v2));
1533 ++ v2->fd = oldfd;
1534 ++ }
1535 ++
1536 ++ if (ret)
1537 ++ shiftfs_btrfs_ioctl_fd_restore(cmd, *newfd, arg, v1, v2);
1538 ++
1539 ++ return ret;
1540 ++}
1541 ++
1542 ++static long shiftfs_real_ioctl(struct file *file, unsigned int cmd,
1543 ++ unsigned long arg)
1544 ++{
1545 ++ struct fd lowerfd;
1546 ++ struct cred *newcred;
1547 ++ const struct cred *oldcred;
1548 ++ int newfd = -EBADF;
1549 ++ long err = 0, ret = 0;
1550 ++ void __user *argp = (void __user *)arg;
1551 ++ struct super_block *sb = file->f_path.dentry->d_sb;
1552 ++ struct btrfs_ioctl_vol_args *btrfs_v1 = NULL;
1553 ++ struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL;
1554 ++
1555 ++ ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2,
1556 ++ &newfd);
1557 ++ if (ret < 0)
1558 ++ return ret;
1559 ++
1560 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1561 ++ if (ret)
1562 ++ goto out_restore;
1563 ++
1564 ++ ret = shiftfs_override_ioctl_creds(cmd, sb, &oldcred, &newcred);
1565 ++ if (ret)
1566 ++ goto out_fdput;
1567 ++
1568 ++ ret = vfs_ioctl(lowerfd.file, cmd, arg);
1569 ++
1570 ++ shiftfs_revert_ioctl_creds(oldcred, newcred);
1571 ++
1572 ++ shiftfs_copyattr(file_inode(lowerfd.file), file_inode(file));
1573 ++ shiftfs_copyflags(file_inode(lowerfd.file), file_inode(file));
1574 ++
1575 ++out_fdput:
1576 ++ fdput(lowerfd);
1577 ++
1578 ++out_restore:
1579 ++ err = shiftfs_btrfs_ioctl_fd_restore(cmd, newfd, argp,
1580 ++ btrfs_v1, btrfs_v2);
1581 ++ if (!ret)
1582 ++ ret = err;
1583 ++
1584 ++ return ret;
1585 ++}
1586 ++
1587 ++static bool in_ioctl_whitelist(int flag, unsigned long arg)
1588 ++{
1589 ++ void __user *argp = (void __user *)arg;
1590 ++ u64 flags = 0;
1591 ++
1592 ++ switch (flag) {
1593 ++ case BTRFS_IOC_FS_INFO:
1594 ++ return true;
1595 ++ case BTRFS_IOC_SNAP_CREATE:
1596 ++ return true;
1597 ++ case BTRFS_IOC_SNAP_CREATE_V2:
1598 ++ return true;
1599 ++ case BTRFS_IOC_SUBVOL_CREATE:
1600 ++ return true;
1601 ++ case BTRFS_IOC_SUBVOL_CREATE_V2:
1602 ++ return true;
1603 ++ case BTRFS_IOC_SUBVOL_GETFLAGS:
1604 ++ return true;
1605 ++ case BTRFS_IOC_SUBVOL_SETFLAGS:
1606 ++ if (copy_from_user(&flags, argp, sizeof(flags)))
1607 ++ return false;
1608 ++
1609 ++ if (flags & ~BTRFS_SUBVOL_RDONLY)
1610 ++ return false;
1611 ++
1612 ++ return true;
1613 ++ case BTRFS_IOC_SNAP_DESTROY:
1614 ++ return true;
1615 ++ }
1616 ++
1617 ++ return false;
1618 ++}
1619 ++
1620 ++static long shiftfs_ioctl(struct file *file, unsigned int cmd,
1621 ++ unsigned long arg)
1622 ++{
1623 ++ switch (cmd) {
1624 ++ case FS_IOC_GETVERSION:
1625 ++ /* fall through */
1626 ++ case FS_IOC_GETFLAGS:
1627 ++ /* fall through */
1628 ++ case FS_IOC_SETFLAGS:
1629 ++ break;
1630 ++ default:
1631 ++ if (!in_ioctl_whitelist(cmd, arg) ||
1632 ++ !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1633 ++ return -ENOTTY;
1634 ++ }
1635 ++
1636 ++ return shiftfs_real_ioctl(file, cmd, arg);
1637 ++}
1638 ++
1639 ++static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd,
1640 ++ unsigned long arg)
1641 ++{
1642 ++ switch (cmd) {
1643 ++ case FS_IOC32_GETVERSION:
1644 ++ /* fall through */
1645 ++ case FS_IOC32_GETFLAGS:
1646 ++ /* fall through */
1647 ++ case FS_IOC32_SETFLAGS:
1648 ++ break;
1649 ++ default:
1650 ++ if (!in_ioctl_whitelist(cmd, arg) ||
1651 ++ !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1652 ++ return -ENOIOCTLCMD;
1653 ++ }
1654 ++
1655 ++ return shiftfs_real_ioctl(file, cmd, arg);
1656 ++}
1657 ++
1658 ++enum shiftfs_copyop {
1659 ++ SHIFTFS_COPY,
1660 ++ SHIFTFS_CLONE,
1661 ++ SHIFTFS_DEDUPE,
1662 ++};
1663 ++
1664 ++static ssize_t shiftfs_copyfile(struct file *file_in, loff_t pos_in,
1665 ++ struct file *file_out, loff_t pos_out, u64 len,
1666 ++ unsigned int flags, enum shiftfs_copyop op)
1667 ++{
1668 ++ ssize_t ret;
1669 ++ struct fd real_in, real_out;
1670 ++ const struct cred *oldcred;
1671 ++ struct inode *inode_out = file_inode(file_out);
1672 ++ struct inode *loweri = inode_out->i_private;
1673 ++
1674 ++ ret = shiftfs_real_fdget(file_out, &real_out);
1675 ++ if (ret)
1676 ++ return ret;
1677 ++
1678 ++ ret = shiftfs_real_fdget(file_in, &real_in);
1679 ++ if (ret) {
1680 ++ fdput(real_out);
1681 ++ return ret;
1682 ++ }
1683 ++
1684 ++ oldcred = shiftfs_override_creds(inode_out->i_sb);
1685 ++ switch (op) {
1686 ++ case SHIFTFS_COPY:
1687 ++ ret = vfs_copy_file_range(real_in.file, pos_in, real_out.file,
1688 ++ pos_out, len, flags);
1689 ++ break;
1690 ++
1691 ++ case SHIFTFS_CLONE:
1692 ++ ret = vfs_clone_file_range(real_in.file, pos_in, real_out.file,
1693 ++ pos_out, len, flags);
1694 ++ break;
1695 ++
1696 ++ case SHIFTFS_DEDUPE:
1697 ++ ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
1698 ++ real_out.file, pos_out, len,
1699 ++ flags);
1700 ++ break;
1701 ++ }
1702 ++ revert_creds(oldcred);
1703 ++
1704 ++ /* Update size */
1705 ++ shiftfs_copyattr(loweri, inode_out);
1706 ++
1707 ++ fdput(real_in);
1708 ++ fdput(real_out);
1709 ++
1710 ++ return ret;
1711 ++}
1712 ++
1713 ++static ssize_t shiftfs_copy_file_range(struct file *file_in, loff_t pos_in,
1714 ++ struct file *file_out, loff_t pos_out,
1715 ++ size_t len, unsigned int flags)
1716 ++{
1717 ++ return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
1718 ++ SHIFTFS_COPY);
1719 ++}
1720 ++
1721 ++static loff_t shiftfs_remap_file_range(struct file *file_in, loff_t pos_in,
1722 ++ struct file *file_out, loff_t pos_out,
1723 ++ loff_t len, unsigned int remap_flags)
1724 ++{
1725 ++ enum shiftfs_copyop op;
1726 ++
1727 ++ if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
1728 ++ return -EINVAL;
1729 ++
1730 ++ if (remap_flags & REMAP_FILE_DEDUP)
1731 ++ op = SHIFTFS_DEDUPE;
1732 ++ else
1733 ++ op = SHIFTFS_CLONE;
1734 ++
1735 ++ return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len,
1736 ++ remap_flags, op);
1737 ++}
1738 ++
1739 ++static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx)
1740 ++{
1741 ++ const struct cred *oldcred;
1742 ++ int err = -ENOTDIR;
1743 ++ struct file *realfile = file->private_data;
1744 ++
1745 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1746 ++ err = iterate_dir(realfile, ctx);
1747 ++ revert_creds(oldcred);
1748 ++
1749 ++ return err;
1750 ++}
1751 ++
1752 ++const struct file_operations shiftfs_file_operations = {
1753 ++ .open = shiftfs_open,
1754 ++ .release = shiftfs_release,
1755 ++ .llseek = shiftfs_file_llseek,
1756 ++ .read_iter = shiftfs_read_iter,
1757 ++ .write_iter = shiftfs_write_iter,
1758 ++ .fsync = shiftfs_fsync,
1759 ++ .mmap = shiftfs_mmap,
1760 ++ .fallocate = shiftfs_fallocate,
1761 ++ .fadvise = shiftfs_fadvise,
1762 ++ .unlocked_ioctl = shiftfs_ioctl,
1763 ++ .compat_ioctl = shiftfs_compat_ioctl,
1764 ++ .copy_file_range = shiftfs_copy_file_range,
1765 ++ .remap_file_range = shiftfs_remap_file_range,
1766 ++};
1767 ++
1768 ++const struct file_operations shiftfs_dir_operations = {
1769 ++ .open = shiftfs_dir_open,
1770 ++ .release = shiftfs_dir_release,
1771 ++ .compat_ioctl = shiftfs_compat_ioctl,
1772 ++ .fsync = shiftfs_fsync,
1773 ++ .iterate_shared = shiftfs_iterate_shared,
1774 ++ .llseek = shiftfs_dir_llseek,
1775 ++ .read = generic_read_dir,
1776 ++ .unlocked_ioctl = shiftfs_ioctl,
1777 ++};
1778 ++
1779 ++static const struct address_space_operations shiftfs_aops = {
1780 ++ /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
1781 ++ .direct_IO = noop_direct_IO,
1782 ++};
1783 ++
1784 ++static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
1785 ++ umode_t mode, dev_t dev, struct dentry *dentry)
1786 ++{
1787 ++ struct inode *loweri;
1788 ++
1789 ++ inode->i_ino = ino;
1790 ++ inode->i_flags |= S_NOCMTIME;
1791 ++
1792 ++ mode &= S_IFMT;
1793 ++ inode->i_mode = mode;
1794 ++ switch (mode & S_IFMT) {
1795 ++ case S_IFDIR:
1796 ++ inode->i_op = &shiftfs_dir_inode_operations;
1797 ++ inode->i_fop = &shiftfs_dir_operations;
1798 ++ break;
1799 ++ case S_IFLNK:
1800 ++ inode->i_op = &shiftfs_symlink_inode_operations;
1801 ++ break;
1802 ++ case S_IFREG:
1803 ++ inode->i_op = &shiftfs_file_inode_operations;
1804 ++ inode->i_fop = &shiftfs_file_operations;
1805 ++ inode->i_mapping->a_ops = &shiftfs_aops;
1806 ++ break;
1807 ++ default:
1808 ++ inode->i_op = &shiftfs_special_inode_operations;
1809 ++ init_special_inode(inode, mode, dev);
1810 ++ break;
1811 ++ }
1812 ++
1813 ++ if (!dentry)
1814 ++ return;
1815 ++
1816 ++ loweri = dentry->d_inode;
1817 ++ if (!loweri->i_op->get_link)
1818 ++ inode->i_opflags |= IOP_NOFOLLOW;
1819 ++
1820 ++ shiftfs_copyattr(loweri, inode);
1821 ++ shiftfs_copyflags(loweri, inode);
1822 ++ set_nlink(inode, loweri->i_nlink);
1823 ++}
1824 ++
1825 ++static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry)
1826 ++{
1827 ++ struct super_block *sb = dentry->d_sb;
1828 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1829 ++
1830 ++ if (sbinfo->mark)
1831 ++ seq_show_option(m, "mark", NULL);
1832 ++
1833 ++ if (sbinfo->passthrough)
1834 ++ seq_printf(m, ",passthrough=%u", sbinfo->passthrough);
1835 ++
1836 ++ return 0;
1837 ++}
1838 ++
1839 ++static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1840 ++{
1841 ++ struct super_block *sb = dentry->d_sb;
1842 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1843 ++ struct dentry *root = sb->s_root;
1844 ++ struct dentry *realroot = root->d_fsdata;
1845 ++ struct path realpath = { .mnt = sbinfo->mnt, .dentry = realroot };
1846 ++ int err;
1847 ++
1848 ++ err = vfs_statfs(&realpath, buf);
1849 ++ if (err)
1850 ++ return err;
1851 ++
1852 ++ if (!shiftfs_passthrough_statfs(sbinfo))
1853 ++ buf->f_type = sb->s_magic;
1854 ++
1855 ++ return 0;
1856 ++}
1857 ++
1858 ++static void shiftfs_evict_inode(struct inode *inode)
1859 ++{
1860 ++ struct inode *loweri = inode->i_private;
1861 ++
1862 ++ clear_inode(inode);
1863 ++
1864 ++ if (loweri)
1865 ++ iput(loweri);
1866 ++}
1867 ++
1868 ++static void shiftfs_put_super(struct super_block *sb)
1869 ++{
1870 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1871 ++
1872 ++ if (sbinfo) {
1873 ++ mntput(sbinfo->mnt);
1874 ++ put_cred(sbinfo->creator_cred);
1875 ++ kfree(sbinfo);
1876 ++ }
1877 ++}
1878 ++
1879 ++static const struct xattr_handler shiftfs_xattr_handler = {
1880 ++ .prefix = "",
1881 ++ .get = shiftfs_xattr_get,
1882 ++ .set = shiftfs_xattr_set,
1883 ++};
1884 ++
1885 ++const struct xattr_handler *shiftfs_xattr_handlers[] = {
1886 ++#ifdef CONFIG_SHIFT_FS_POSIX_ACL
1887 ++ &shiftfs_posix_acl_access_xattr_handler,
1888 ++ &shiftfs_posix_acl_default_xattr_handler,
1889 ++#endif
1890 ++ &shiftfs_xattr_handler,
1891 ++ NULL
1892 ++};
1893 ++
1894 ++static inline bool passthrough_is_subset(int old_flags, int new_flags)
1895 ++{
1896 ++ if ((new_flags & old_flags) != new_flags)
1897 ++ return false;
1898 ++
1899 ++ return true;
1900 ++}
1901 ++
1902 ++static int shiftfs_super_check_flags(unsigned long old_flags,
1903 ++ unsigned long new_flags)
1904 ++{
1905 ++ if ((old_flags & SB_RDONLY) && !(new_flags & SB_RDONLY))
1906 ++ return -EPERM;
1907 ++
1908 ++ if ((old_flags & SB_NOSUID) && !(new_flags & SB_NOSUID))
1909 ++ return -EPERM;
1910 ++
1911 ++ if ((old_flags & SB_NODEV) && !(new_flags & SB_NODEV))
1912 ++ return -EPERM;
1913 ++
1914 ++ if ((old_flags & SB_NOEXEC) && !(new_flags & SB_NOEXEC))
1915 ++ return -EPERM;
1916 ++
1917 ++ if ((old_flags & SB_NOATIME) && !(new_flags & SB_NOATIME))
1918 ++ return -EPERM;
1919 ++
1920 ++ if ((old_flags & SB_NODIRATIME) && !(new_flags & SB_NODIRATIME))
1921 ++ return -EPERM;
1922 ++
1923 ++ if (!(old_flags & SB_POSIXACL) && (new_flags & SB_POSIXACL))
1924 ++ return -EPERM;
1925 ++
1926 ++ return 0;
1927 ++}
1928 ++
1929 ++static int shiftfs_remount(struct super_block *sb, int *flags, char *data)
1930 ++{
1931 ++ int err;
1932 ++ struct shiftfs_super_info new = {};
1933 ++ struct shiftfs_super_info *info = sb->s_fs_info;
1934 ++
1935 ++ err = shiftfs_parse_mount_options(&new, data);
1936 ++ if (err)
1937 ++ return err;
1938 ++
1939 ++ err = shiftfs_super_check_flags(sb->s_flags, *flags);
1940 ++ if (err)
1941 ++ return err;
1942 ++
1943 ++ /* Mark mount option cannot be changed. */
1944 ++ if (info->mark || (info->mark != new.mark))
1945 ++ return -EPERM;
1946 ++
1947 ++ if (info->passthrough != new.passthrough) {
1948 ++ /* Don't allow exceeding passthrough options of mark mount. */
1949 ++ if (!passthrough_is_subset(info->passthrough_mark,
1950 ++ info->passthrough))
1951 ++ return -EPERM;
1952 ++
1953 ++ info->passthrough = new.passthrough;
1954 ++ }
1955 ++
1956 ++ return 0;
1957 ++}
1958 ++
1959 ++static const struct super_operations shiftfs_super_ops = {
1960 ++ .put_super = shiftfs_put_super,
1961 ++ .show_options = shiftfs_show_options,
1962 ++ .statfs = shiftfs_statfs,
1963 ++ .remount_fs = shiftfs_remount,
1964 ++ .evict_inode = shiftfs_evict_inode,
1965 ++};
1966 ++
1967 ++struct shiftfs_data {
1968 ++ void *data;
1969 ++ const char *path;
1970 ++};
1971 ++
1972 ++static void shiftfs_super_force_flags(struct super_block *sb,
1973 ++ unsigned long lower_flags)
1974 ++{
1975 ++ sb->s_flags |= lower_flags & (SB_RDONLY | SB_NOSUID | SB_NODEV |
1976 ++ SB_NOEXEC | SB_NOATIME | SB_NODIRATIME);
1977 ++
1978 ++ if (!(lower_flags & SB_POSIXACL))
1979 ++ sb->s_flags &= ~SB_POSIXACL;
1980 ++}
1981 ++
1982 ++static int shiftfs_fill_super(struct super_block *sb, void *raw_data,
1983 ++ int silent)
1984 ++{
1985 ++ int err;
1986 ++ struct path path = {};
1987 ++ struct shiftfs_super_info *sbinfo_mp;
1988 ++ char *name = NULL;
1989 ++ struct inode *inode = NULL;
1990 ++ struct dentry *dentry = NULL;
1991 ++ struct shiftfs_data *data = raw_data;
1992 ++ struct shiftfs_super_info *sbinfo = NULL;
1993 ++
1994 ++ if (!data->path)
1995 ++ return -EINVAL;
1996 ++
1997 ++ sb->s_fs_info = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
1998 ++ if (!sb->s_fs_info)
1999 ++ return -ENOMEM;
2000 ++ sbinfo = sb->s_fs_info;
2001 ++
2002 ++ err = shiftfs_parse_mount_options(sbinfo, data->data);
2003 ++ if (err)
2004 ++ return err;
2005 ++
2006 ++ /* to mount a mark, must be userns admin */
2007 ++ if (!sbinfo->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
2008 ++ return -EPERM;
2009 ++
2010 ++ name = kstrdup(data->path, GFP_KERNEL);
2011 ++ if (!name)
2012 ++ return -ENOMEM;
2013 ++
2014 ++ err = kern_path(name, LOOKUP_FOLLOW, &path);
2015 ++ if (err)
2016 ++ goto out_free_name;
2017 ++
2018 ++ if (!S_ISDIR(path.dentry->d_inode->i_mode)) {
2019 ++ err = -ENOTDIR;
2020 ++ goto out_put_path;
2021 ++ }
2022 ++
2023 ++ sb->s_flags |= SB_POSIXACL;
2024 ++
2025 ++ if (sbinfo->mark) {
2026 ++ struct cred *cred_tmp;
2027 ++ struct super_block *lower_sb = path.mnt->mnt_sb;
2028 ++
2029 ++ /* to mark a mount point, must root wrt lower s_user_ns */
2030 ++ if (!ns_capable(lower_sb->s_user_ns, CAP_SYS_ADMIN)) {
2031 ++ err = -EPERM;
2032 ++ goto out_put_path;
2033 ++ }
2034 ++
2035 ++ /*
2036 ++ * this part is visible unshifted, so make sure no
2037 ++ * executables that could be used to give suid
2038 ++ * privileges
2039 ++ */
2040 ++ sb->s_iflags = SB_I_NOEXEC;
2041 ++
2042 ++ shiftfs_super_force_flags(sb, lower_sb->s_flags);
2043 ++
2044 ++ /*
2045 ++ * Handle nesting of shiftfs mounts by referring this mark
2046 ++ * mount back to the original mark mount. This is more
2047 ++ * efficient and alleviates concerns about stack depth.
2048 ++ */
2049 ++ if (lower_sb->s_magic == SHIFTFS_MAGIC) {
2050 ++ sbinfo_mp = lower_sb->s_fs_info;
2051 ++
2052 ++ /* Doesn't make sense to mark a mark mount */
2053 ++ if (sbinfo_mp->mark) {
2054 ++ err = -EINVAL;
2055 ++ goto out_put_path;
2056 ++ }
2057 ++
2058 ++ if (!passthrough_is_subset(sbinfo_mp->passthrough,
2059 ++ sbinfo->passthrough)) {
2060 ++ err = -EPERM;
2061 ++ goto out_put_path;
2062 ++ }
2063 ++
2064 ++ sbinfo->mnt = mntget(sbinfo_mp->mnt);
2065 ++ dentry = dget(path.dentry->d_fsdata);
2066 ++ /*
2067 ++ * Copy up the passthrough mount options from the
2068 ++ * parent mark mountpoint.
2069 ++ */
2070 ++ sbinfo->passthrough_mark = sbinfo_mp->passthrough_mark;
2071 ++ sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
2072 ++ } else {
2073 ++ sbinfo->mnt = mntget(path.mnt);
2074 ++ dentry = dget(path.dentry);
2075 ++ /*
2076 ++ * For a new mark passthrough_mark and passthrough
2077 ++ * are identical.
2078 ++ */
2079 ++ sbinfo->passthrough_mark = sbinfo->passthrough;
2080 ++
2081 ++ cred_tmp = prepare_creds();
2082 ++ if (!cred_tmp) {
2083 ++ err = -ENOMEM;
2084 ++ goto out_put_path;
2085 ++ }
2086 ++ /* Don't override disk quota limits or use reserved space. */
2087 ++ cap_lower(cred_tmp->cap_effective, CAP_SYS_RESOURCE);
2088 ++ sbinfo->creator_cred = cred_tmp;
2089 ++ }
2090 ++ } else {
2091 ++ /*
2092 ++ * This leg executes if we're admin capable in the namespace,
2093 ++ * so be very careful.
2094 ++ */
2095 ++ err = -EPERM;
2096 ++ if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC)
2097 ++ goto out_put_path;
2098 ++
2099 ++ sbinfo_mp = path.dentry->d_sb->s_fs_info;
2100 ++ if (!sbinfo_mp->mark)
2101 ++ goto out_put_path;
2102 ++
2103 ++ if (!passthrough_is_subset(sbinfo_mp->passthrough,
2104 ++ sbinfo->passthrough))
2105 ++ goto out_put_path;
2106 ++
2107 ++ sbinfo->mnt = mntget(sbinfo_mp->mnt);
2108 ++ sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
2109 ++ dentry = dget(path.dentry->d_fsdata);
2110 ++ /*
2111 ++ * Copy up passthrough settings from mark mountpoint so we can
2112 ++ * verify when the overlay wants to remount with different
2113 ++ * passthrough settings.
2114 ++ */
2115 ++ sbinfo->passthrough_mark = sbinfo_mp->passthrough;
2116 ++ shiftfs_super_force_flags(sb, path.mnt->mnt_sb->s_flags);
2117 ++ }
2118 ++
2119 ++ sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1;
2120 ++ if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
2121 ++ printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n");
2122 ++ err = -EINVAL;
2123 ++ goto out_put_path;
2124 ++ }
2125 ++
2126 ++ inode = new_inode(sb);
2127 ++ if (!inode) {
2128 ++ err = -ENOMEM;
2129 ++ goto out_put_path;
2130 ++ }
2131 ++ shiftfs_fill_inode(inode, dentry->d_inode->i_ino, S_IFDIR, 0, dentry);
2132 ++
2133 ++ ihold(dentry->d_inode);
2134 ++ inode->i_private = dentry->d_inode;
2135 ++
2136 ++ sb->s_magic = SHIFTFS_MAGIC;
2137 ++ sb->s_maxbytes = MAX_LFS_FILESIZE;
2138 ++ sb->s_op = &shiftfs_super_ops;
2139 ++ sb->s_xattr = shiftfs_xattr_handlers;
2140 ++ sb->s_d_op = &shiftfs_dentry_ops;
2141 ++ sb->s_root = d_make_root(inode);
2142 ++ if (!sb->s_root) {
2143 ++ err = -ENOMEM;
2144 ++ goto out_put_path;
2145 ++ }
2146 ++
2147 ++ sb->s_root->d_fsdata = dentry;
2148 ++ sbinfo->userns = get_user_ns(dentry->d_sb->s_user_ns);
2149 ++ shiftfs_copyattr(dentry->d_inode, sb->s_root->d_inode);
2150 ++
2151 ++ dentry = NULL;
2152 ++ err = 0;
2153 ++
2154 ++out_put_path:
2155 ++ path_put(&path);
2156 ++
2157 ++out_free_name:
2158 ++ kfree(name);
2159 ++
2160 ++ dput(dentry);
2161 ++
2162 ++ return err;
2163 ++}
2164 ++
2165 ++static struct dentry *shiftfs_mount(struct file_system_type *fs_type,
2166 ++ int flags, const char *dev_name, void *data)
2167 ++{
2168 ++ struct shiftfs_data d = { data, dev_name };
2169 ++
2170 ++ return mount_nodev(fs_type, flags, &d, shiftfs_fill_super);
2171 ++}
2172 ++
2173 ++static struct file_system_type shiftfs_type = {
2174 ++ .owner = THIS_MODULE,
2175 ++ .name = "shiftfs",
2176 ++ .mount = shiftfs_mount,
2177 ++ .kill_sb = kill_anon_super,
2178 ++ .fs_flags = FS_USERNS_MOUNT,
2179 ++};
2180 ++
2181 ++static int __init shiftfs_init(void)
2182 ++{
2183 ++ return register_filesystem(&shiftfs_type);
2184 ++}
2185 ++
2186 ++static void __exit shiftfs_exit(void)
2187 ++{
2188 ++ unregister_filesystem(&shiftfs_type);
2189 ++}
2190 ++
2191 ++MODULE_ALIAS_FS("shiftfs");
2192 ++MODULE_AUTHOR("James Bottomley");
2193 ++MODULE_AUTHOR("Seth Forshee <seth.forshee@×××××××××.com>");
2194 ++MODULE_AUTHOR("Christian Brauner <christian.brauner@××××××.com>");
2195 ++MODULE_DESCRIPTION("id shifting filesystem");
2196 ++MODULE_LICENSE("GPL v2");
2197 ++module_init(shiftfs_init)
2198 ++module_exit(shiftfs_exit)
2199 +--- a/include/uapi/linux/magic.h 2021-01-06 19:08:45.234777659 -0500
2200 ++++ b/include/uapi/linux/magic.h 2021-01-06 19:09:53.900375394 -0500
2201 +@@ -96,4 +96,6 @@
2202 + #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */
2203 + #define Z3FOLD_MAGIC 0x33
2204 +
2205 ++#define SHIFTFS_MAGIC 0x6a656a62
2206 ++
2207 + #endif /* __LINUX_MAGIC_H__ */
2208 +--- a/fs/Makefile 2021-01-06 19:10:56.009918778 -0500
2209 ++++ b/fs/Makefile 2021-01-06 19:11:55.632442564 -0500
2210 +@@ -132,3 +132,4 @@ obj-$(CONFIG_CEPH_FS) += ceph/
2211 + obj-$(CONFIG_PSTORE) += pstore/
2212 + obj-$(CONFIG_EFIVAR_FS) += efivarfs/
2213 + obj-$(CONFIG_EROFS_FS) += erofs/
2214 ++obj-$(CONFIG_SHIFT_FS) += shiftfs.o
2215 +--- a/fs/Kconfig 2021-01-06 19:14:17.709697891 -0500
2216 ++++ b/fs/Kconfig 2021-01-06 19:15:23.413281282 -0500
2217 +@@ -122,6 +122,24 @@ source "fs/autofs/Kconfig"
2218 + source "fs/fuse/Kconfig"
2219 + source "fs/overlayfs/Kconfig"
2220 +
2221 ++config SHIFT_FS
2222 ++ tristate "UID/GID shifting overlay filesystem for containers"
2223 ++ help
2224 ++ This filesystem can overlay any mounted filesystem and shift
2225 ++ the uid/gid the files appear at. The idea is that
2226 ++ unprivileged containers can use this to mount root volumes
2227 ++ using this technique.
2228 ++
2229 ++config SHIFT_FS_POSIX_ACL
2230 ++ bool "shiftfs POSIX Access Control Lists"
2231 ++ depends on SHIFT_FS
2232 ++ select FS_POSIX_ACL
2233 ++ help
2234 ++ POSIX Access Control Lists (ACLs) support permissions for users and
2235 ++ groups beyond the owner/group/world scheme.
2236 ++
2237 ++ If you don't know what Access Control Lists are, say N.
2238 ++
2239 + menu "Caches"
2240 +
2241 + source "fs/fscache/Kconfig"