Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:5.4 commit in: /
Date: Sat, 27 Feb 2021 14:16:45
Message-Id: 1614435377.c9c705b2a925c8412c6775246c3e9dcbf30a092a.mpagano@gentoo
1 commit: c9c705b2a925c8412c6775246c3e9dcbf30a092a
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Sat Feb 27 14:16:17 2021 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Sat Feb 27 14:16:17 2021 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=c9c705b2
7
8 UID/GID shifting overlay filesystem for containers
9
10 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>
11
12 0000_README | 4 +
13 5000_shifts-ubuntu-20.04.patch | 2203 ++++++++++++++++++++++++++++++++++++++++
14 2 files changed, 2207 insertions(+)
15
16 diff --git a/0000_README b/0000_README
17 index aab74c6..6a18e82 100644
18 --- a/0000_README
19 +++ b/0000_README
20 @@ -475,6 +475,10 @@ Patch: 4567_distro-Gentoo-Kconfig.patch
21 From: Tom Wijsman <TomWij@g.o>
22 Desc: Add Gentoo Linux support config settings and defaults.
23
24 +Patch: 5000_shifts-ubuntu-20.04.patch
25 +From: https://git.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux/+git/focal
26 +Desc: UID/GID shifting overlay filesystem for containe
27 +
28 Patch: 5011_enable-cpu-optimizations-for-gcc8.patch
29 From: https://github.com/graysky2/kernel_gcc_patch/
30 Desc: Kernel patch for >= gccv8 enables kernel >= v4.13 optimizations for additional CPUs.
31
32 diff --git a/5000_shifts-ubuntu-20.04.patch b/5000_shifts-ubuntu-20.04.patch
33 new file mode 100644
34 index 0000000..a12bfe9
35 --- /dev/null
36 +++ b/5000_shifts-ubuntu-20.04.patch
37 @@ -0,0 +1,2203 @@
38 +--- /dev/null 2021-01-08 13:33:13.190303432 -0500
39 ++++ b/fs/shiftfs.c 2021-01-08 19:02:40.000000000 -0500
40 +@@ -0,0 +1,2157 @@
41 ++#include <linux/btrfs.h>
42 ++#include <linux/capability.h>
43 ++#include <linux/cred.h>
44 ++#include <linux/mount.h>
45 ++#include <linux/fdtable.h>
46 ++#include <linux/file.h>
47 ++#include <linux/fs.h>
48 ++#include <linux/namei.h>
49 ++#include <linux/module.h>
50 ++#include <linux/kernel.h>
51 ++#include <linux/magic.h>
52 ++#include <linux/parser.h>
53 ++#include <linux/security.h>
54 ++#include <linux/seq_file.h>
55 ++#include <linux/statfs.h>
56 ++#include <linux/slab.h>
57 ++#include <linux/user_namespace.h>
58 ++#include <linux/uidgid.h>
59 ++#include <linux/xattr.h>
60 ++#include <linux/posix_acl.h>
61 ++#include <linux/posix_acl_xattr.h>
62 ++#include <linux/uio.h>
63 ++#include <linux/fiemap.h>
64 ++
65 ++struct shiftfs_super_info {
66 ++ struct vfsmount *mnt;
67 ++ struct user_namespace *userns;
68 ++ /* creds of process who created the super block */
69 ++ const struct cred *creator_cred;
70 ++ bool mark;
71 ++ unsigned int passthrough;
72 ++ unsigned int passthrough_mark;
73 ++};
74 ++
75 ++static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
76 ++ umode_t mode, dev_t dev, struct dentry *dentry);
77 ++
78 ++#define SHIFTFS_PASSTHROUGH_NONE 0
79 ++#define SHIFTFS_PASSTHROUGH_STAT 1
80 ++#define SHIFTFS_PASSTHROUGH_IOCTL 2
81 ++#define SHIFTFS_PASSTHROUGH_ALL \
82 ++ (SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL)
83 ++
84 ++static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info)
85 ++{
86 ++ if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL))
87 ++ return false;
88 ++
89 ++ return true;
90 ++}
91 ++
92 ++static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info)
93 ++{
94 ++ if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT))
95 ++ return false;
96 ++
97 ++ return true;
98 ++}
99 ++
100 ++enum {
101 ++ OPT_MARK,
102 ++ OPT_PASSTHROUGH,
103 ++ OPT_LAST,
104 ++};
105 ++
106 ++/* global filesystem options */
107 ++static const match_table_t tokens = {
108 ++ { OPT_MARK, "mark" },
109 ++ { OPT_PASSTHROUGH, "passthrough=%u" },
110 ++ { OPT_LAST, NULL }
111 ++};
112 ++
113 ++static const struct cred *shiftfs_override_creds(const struct super_block *sb)
114 ++{
115 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
116 ++
117 ++ return override_creds(sbinfo->creator_cred);
118 ++}
119 ++
120 ++static inline void shiftfs_revert_object_creds(const struct cred *oldcred,
121 ++ struct cred *newcred)
122 ++{
123 ++ revert_creds(oldcred);
124 ++ put_cred(newcred);
125 ++}
126 ++
127 ++static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to,
128 ++ kuid_t kuid)
129 ++{
130 ++ uid_t uid = from_kuid(from, kuid);
131 ++ return make_kuid(to, uid);
132 ++}
133 ++
134 ++static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to,
135 ++ kgid_t kgid)
136 ++{
137 ++ gid_t gid = from_kgid(from, kgid);
138 ++ return make_kgid(to, gid);
139 ++}
140 ++
141 ++static int shiftfs_override_object_creds(const struct super_block *sb,
142 ++ const struct cred **oldcred,
143 ++ struct cred **newcred,
144 ++ struct dentry *dentry, umode_t mode,
145 ++ bool hardlink)
146 ++{
147 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
148 ++ kuid_t fsuid = current_fsuid();
149 ++ kgid_t fsgid = current_fsgid();
150 ++
151 ++ *oldcred = shiftfs_override_creds(sb);
152 ++
153 ++ *newcred = prepare_creds();
154 ++ if (!*newcred) {
155 ++ revert_creds(*oldcred);
156 ++ return -ENOMEM;
157 ++ }
158 ++
159 ++ (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
160 ++ (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
161 ++
162 ++ if (!hardlink) {
163 ++ int err = security_dentry_create_files_as(dentry, mode,
164 ++ &dentry->d_name,
165 ++ *oldcred, *newcred);
166 ++ if (err) {
167 ++ shiftfs_revert_object_creds(*oldcred, *newcred);
168 ++ return err;
169 ++ }
170 ++ }
171 ++
172 ++ put_cred(override_creds(*newcred));
173 ++ return 0;
174 ++}
175 ++
176 ++static void shiftfs_copyattr(struct inode *from, struct inode *to)
177 ++{
178 ++ struct user_namespace *from_ns = from->i_sb->s_user_ns;
179 ++ struct user_namespace *to_ns = to->i_sb->s_user_ns;
180 ++
181 ++ to->i_uid = shift_kuid(from_ns, to_ns, from->i_uid);
182 ++ to->i_gid = shift_kgid(from_ns, to_ns, from->i_gid);
183 ++ to->i_mode = from->i_mode;
184 ++ to->i_atime = from->i_atime;
185 ++ to->i_mtime = from->i_mtime;
186 ++ to->i_ctime = from->i_ctime;
187 ++ i_size_write(to, i_size_read(from));
188 ++}
189 ++
190 ++static void shiftfs_copyflags(struct inode *from, struct inode *to)
191 ++{
192 ++ unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
193 ++
194 ++ inode_set_flags(to, from->i_flags & mask, mask);
195 ++}
196 ++
197 ++static void shiftfs_file_accessed(struct file *file)
198 ++{
199 ++ struct inode *upperi, *loweri;
200 ++
201 ++ if (file->f_flags & O_NOATIME)
202 ++ return;
203 ++
204 ++ upperi = file_inode(file);
205 ++ loweri = upperi->i_private;
206 ++
207 ++ if (!loweri)
208 ++ return;
209 ++
210 ++ upperi->i_mtime = loweri->i_mtime;
211 ++ upperi->i_ctime = loweri->i_ctime;
212 ++
213 ++ touch_atime(&file->f_path);
214 ++}
215 ++
216 ++static int shiftfs_parse_mount_options(struct shiftfs_super_info *sbinfo,
217 ++ char *options)
218 ++{
219 ++ char *p;
220 ++ substring_t args[MAX_OPT_ARGS];
221 ++
222 ++ sbinfo->mark = false;
223 ++ sbinfo->passthrough = 0;
224 ++
225 ++ while ((p = strsep(&options, ",")) != NULL) {
226 ++ int err, intarg, token;
227 ++
228 ++ if (!*p)
229 ++ continue;
230 ++
231 ++ token = match_token(p, tokens, args);
232 ++ switch (token) {
233 ++ case OPT_MARK:
234 ++ sbinfo->mark = true;
235 ++ break;
236 ++ case OPT_PASSTHROUGH:
237 ++ err = match_int(&args[0], &intarg);
238 ++ if (err)
239 ++ return err;
240 ++
241 ++ if (intarg & ~SHIFTFS_PASSTHROUGH_ALL)
242 ++ return -EINVAL;
243 ++
244 ++ sbinfo->passthrough = intarg;
245 ++ break;
246 ++ default:
247 ++ return -EINVAL;
248 ++ }
249 ++ }
250 ++
251 ++ return 0;
252 ++}
253 ++
254 ++static void shiftfs_d_release(struct dentry *dentry)
255 ++{
256 ++ struct dentry *lowerd = dentry->d_fsdata;
257 ++
258 ++ if (lowerd)
259 ++ dput(lowerd);
260 ++}
261 ++
262 ++static struct dentry *shiftfs_d_real(struct dentry *dentry,
263 ++ const struct inode *inode)
264 ++{
265 ++ struct dentry *lowerd = dentry->d_fsdata;
266 ++
267 ++ if (inode && d_inode(dentry) == inode)
268 ++ return dentry;
269 ++
270 ++ lowerd = d_real(lowerd, inode);
271 ++ if (lowerd && (!inode || inode == d_inode(lowerd)))
272 ++ return lowerd;
273 ++
274 ++ WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
275 ++ inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
276 ++ return dentry;
277 ++}
278 ++
279 ++static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags)
280 ++{
281 ++ int err = 1;
282 ++ struct dentry *lowerd = dentry->d_fsdata;
283 ++
284 ++ if (d_is_negative(lowerd) != d_is_negative(dentry))
285 ++ return 0;
286 ++
287 ++ if ((lowerd->d_flags & DCACHE_OP_WEAK_REVALIDATE))
288 ++ err = lowerd->d_op->d_weak_revalidate(lowerd, flags);
289 ++
290 ++ if (d_really_is_positive(dentry)) {
291 ++ struct inode *inode = d_inode(dentry);
292 ++ struct inode *loweri = d_inode(lowerd);
293 ++
294 ++ shiftfs_copyattr(loweri, inode);
295 ++ }
296 ++
297 ++ return err;
298 ++}
299 ++
300 ++static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags)
301 ++{
302 ++ int err = 1;
303 ++ struct dentry *lowerd = dentry->d_fsdata;
304 ++
305 ++ if (d_unhashed(lowerd) ||
306 ++ ((d_is_negative(lowerd) != d_is_negative(dentry))))
307 ++ return 0;
308 ++
309 ++ if (flags & LOOKUP_RCU)
310 ++ return -ECHILD;
311 ++
312 ++ if ((lowerd->d_flags & DCACHE_OP_REVALIDATE))
313 ++ err = lowerd->d_op->d_revalidate(lowerd, flags);
314 ++
315 ++ if (d_really_is_positive(dentry)) {
316 ++ struct inode *inode = d_inode(dentry);
317 ++ struct inode *loweri = d_inode(lowerd);
318 ++
319 ++ shiftfs_copyattr(loweri, inode);
320 ++ }
321 ++
322 ++ return err;
323 ++}
324 ++
325 ++static const struct dentry_operations shiftfs_dentry_ops = {
326 ++ .d_release = shiftfs_d_release,
327 ++ .d_real = shiftfs_d_real,
328 ++ .d_revalidate = shiftfs_d_revalidate,
329 ++ .d_weak_revalidate = shiftfs_d_weak_revalidate,
330 ++};
331 ++
332 ++static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode,
333 ++ struct delayed_call *done)
334 ++{
335 ++ const char *p;
336 ++ const struct cred *oldcred;
337 ++ struct dentry *lowerd;
338 ++
339 ++ /* RCU lookup not supported */
340 ++ if (!dentry)
341 ++ return ERR_PTR(-ECHILD);
342 ++
343 ++ lowerd = dentry->d_fsdata;
344 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
345 ++ p = vfs_get_link(lowerd, done);
346 ++ revert_creds(oldcred);
347 ++
348 ++ return p;
349 ++}
350 ++
351 ++static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode,
352 ++ const char *name, const void *value,
353 ++ size_t size, int flags)
354 ++{
355 ++ struct dentry *lowerd = dentry->d_fsdata;
356 ++ int err;
357 ++ const struct cred *oldcred;
358 ++
359 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
360 ++ err = vfs_setxattr(lowerd, name, value, size, flags);
361 ++ revert_creds(oldcred);
362 ++
363 ++ shiftfs_copyattr(lowerd->d_inode, inode);
364 ++
365 ++ return err;
366 ++}
367 ++
368 ++static int shiftfs_xattr_get(const struct xattr_handler *handler,
369 ++ struct dentry *dentry, struct inode *inode,
370 ++ const char *name, void *value, size_t size)
371 ++{
372 ++ struct dentry *lowerd = dentry->d_fsdata;
373 ++ int err;
374 ++ const struct cred *oldcred;
375 ++
376 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
377 ++ err = vfs_getxattr(lowerd, name, value, size);
378 ++ revert_creds(oldcred);
379 ++
380 ++ return err;
381 ++}
382 ++
383 ++static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list,
384 ++ size_t size)
385 ++{
386 ++ struct dentry *lowerd = dentry->d_fsdata;
387 ++ int err;
388 ++ const struct cred *oldcred;
389 ++
390 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
391 ++ err = vfs_listxattr(lowerd, list, size);
392 ++ revert_creds(oldcred);
393 ++
394 ++ return err;
395 ++}
396 ++
397 ++static int shiftfs_removexattr(struct dentry *dentry, const char *name)
398 ++{
399 ++ struct dentry *lowerd = dentry->d_fsdata;
400 ++ int err;
401 ++ const struct cred *oldcred;
402 ++
403 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
404 ++ err = vfs_removexattr(lowerd, name);
405 ++ revert_creds(oldcred);
406 ++
407 ++ /* update c/mtime */
408 ++ shiftfs_copyattr(lowerd->d_inode, d_inode(dentry));
409 ++
410 ++ return err;
411 ++}
412 ++
413 ++static int shiftfs_xattr_set(const struct xattr_handler *handler,
414 ++ struct dentry *dentry, struct inode *inode,
415 ++ const char *name, const void *value, size_t size,
416 ++ int flags)
417 ++{
418 ++ if (!value)
419 ++ return shiftfs_removexattr(dentry, name);
420 ++ return shiftfs_setxattr(dentry, inode, name, value, size, flags);
421 ++}
422 ++
423 ++static int shiftfs_inode_test(struct inode *inode, void *data)
424 ++{
425 ++ return inode->i_private == data;
426 ++}
427 ++
428 ++static int shiftfs_inode_set(struct inode *inode, void *data)
429 ++{
430 ++ inode->i_private = data;
431 ++ return 0;
432 ++}
433 ++
434 ++static int shiftfs_create_object(struct inode *diri, struct dentry *dentry,
435 ++ umode_t mode, const char *symlink,
436 ++ struct dentry *hardlink, bool excl)
437 ++{
438 ++ int err;
439 ++ const struct cred *oldcred;
440 ++ struct cred *newcred;
441 ++ void *loweri_iop_ptr = NULL;
442 ++ umode_t modei = mode;
443 ++ struct super_block *dir_sb = diri->i_sb;
444 ++ struct dentry *lowerd_new = dentry->d_fsdata;
445 ++ struct inode *inode = NULL, *loweri_dir = diri->i_private;
446 ++ const struct inode_operations *loweri_dir_iop = loweri_dir->i_op;
447 ++ struct dentry *lowerd_link = NULL;
448 ++
449 ++ if (hardlink) {
450 ++ loweri_iop_ptr = loweri_dir_iop->link;
451 ++ } else {
452 ++ switch (mode & S_IFMT) {
453 ++ case S_IFDIR:
454 ++ loweri_iop_ptr = loweri_dir_iop->mkdir;
455 ++ break;
456 ++ case S_IFREG:
457 ++ loweri_iop_ptr = loweri_dir_iop->create;
458 ++ break;
459 ++ case S_IFLNK:
460 ++ loweri_iop_ptr = loweri_dir_iop->symlink;
461 ++ break;
462 ++ case S_IFSOCK:
463 ++ /* fall through */
464 ++ case S_IFIFO:
465 ++ loweri_iop_ptr = loweri_dir_iop->mknod;
466 ++ break;
467 ++ }
468 ++ }
469 ++ if (!loweri_iop_ptr) {
470 ++ err = -EINVAL;
471 ++ goto out_iput;
472 ++ }
473 ++
474 ++ inode_lock_nested(loweri_dir, I_MUTEX_PARENT);
475 ++
476 ++ if (!hardlink) {
477 ++ inode = new_inode(dir_sb);
478 ++ if (!inode) {
479 ++ err = -ENOMEM;
480 ++ goto out_iput;
481 ++ }
482 ++
483 ++ /*
484 ++ * new_inode() will have added the new inode to the super
485 ++ * block's list of inodes. Further below we will call
486 ++ * inode_insert5() Which would perform the same operation again
487 ++ * thereby corrupting the list. To avoid this raise I_CREATING
488 ++ * in i_state which will cause inode_insert5() to skip this
489 ++ * step. I_CREATING will be cleared by d_instantiate_new()
490 ++ * below.
491 ++ */
492 ++ spin_lock(&inode->i_lock);
493 ++ inode->i_state |= I_CREATING;
494 ++ spin_unlock(&inode->i_lock);
495 ++
496 ++ inode_init_owner(inode, diri, mode);
497 ++ modei = inode->i_mode;
498 ++ }
499 ++
500 ++ err = shiftfs_override_object_creds(dentry->d_sb, &oldcred, &newcred,
501 ++ dentry, modei, hardlink != NULL);
502 ++ if (err)
503 ++ goto out_iput;
504 ++
505 ++ if (hardlink) {
506 ++ lowerd_link = hardlink->d_fsdata;
507 ++ err = vfs_link(lowerd_link, loweri_dir, lowerd_new, NULL);
508 ++ } else {
509 ++ switch (modei & S_IFMT) {
510 ++ case S_IFDIR:
511 ++ err = vfs_mkdir(loweri_dir, lowerd_new, modei);
512 ++ break;
513 ++ case S_IFREG:
514 ++ err = vfs_create(loweri_dir, lowerd_new, modei, excl);
515 ++ break;
516 ++ case S_IFLNK:
517 ++ err = vfs_symlink(loweri_dir, lowerd_new, symlink);
518 ++ break;
519 ++ case S_IFSOCK:
520 ++ /* fall through */
521 ++ case S_IFIFO:
522 ++ err = vfs_mknod(loweri_dir, lowerd_new, modei, 0);
523 ++ break;
524 ++ default:
525 ++ err = -EINVAL;
526 ++ break;
527 ++ }
528 ++ }
529 ++
530 ++ shiftfs_revert_object_creds(oldcred, newcred);
531 ++
532 ++ if (!err && WARN_ON(!lowerd_new->d_inode))
533 ++ err = -EIO;
534 ++ if (err)
535 ++ goto out_iput;
536 ++
537 ++ if (hardlink) {
538 ++ inode = d_inode(hardlink);
539 ++ ihold(inode);
540 ++
541 ++ /* copy up times from lower inode */
542 ++ shiftfs_copyattr(d_inode(lowerd_link), inode);
543 ++ set_nlink(d_inode(hardlink), d_inode(lowerd_link)->i_nlink);
544 ++ d_instantiate(dentry, inode);
545 ++ } else {
546 ++ struct inode *inode_tmp;
547 ++ struct inode *loweri_new = d_inode(lowerd_new);
548 ++
549 ++ inode_tmp = inode_insert5(inode, (unsigned long)loweri_new,
550 ++ shiftfs_inode_test, shiftfs_inode_set,
551 ++ loweri_new);
552 ++ if (unlikely(inode_tmp != inode)) {
553 ++ pr_err_ratelimited("shiftfs: newly created inode found in cache\n");
554 ++ iput(inode_tmp);
555 ++ err = -EINVAL;
556 ++ goto out_iput;
557 ++ }
558 ++
559 ++ ihold(loweri_new);
560 ++ shiftfs_fill_inode(inode, loweri_new->i_ino, loweri_new->i_mode,
561 ++ 0, lowerd_new);
562 ++ d_instantiate_new(dentry, inode);
563 ++ }
564 ++
565 ++ shiftfs_copyattr(loweri_dir, diri);
566 ++ if (loweri_iop_ptr == loweri_dir_iop->mkdir)
567 ++ set_nlink(diri, loweri_dir->i_nlink);
568 ++
569 ++ inode = NULL;
570 ++
571 ++out_iput:
572 ++ iput(inode);
573 ++ inode_unlock(loweri_dir);
574 ++
575 ++ return err;
576 ++}
577 ++
578 ++static int shiftfs_create(struct inode *dir, struct dentry *dentry,
579 ++ umode_t mode, bool excl)
580 ++{
581 ++ mode |= S_IFREG;
582 ++
583 ++ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl);
584 ++}
585 ++
586 ++static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry,
587 ++ umode_t mode)
588 ++{
589 ++ mode |= S_IFDIR;
590 ++
591 ++ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
592 ++}
593 ++
594 ++static int shiftfs_link(struct dentry *hardlink, struct inode *dir,
595 ++ struct dentry *dentry)
596 ++{
597 ++ return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false);
598 ++}
599 ++
600 ++static int shiftfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
601 ++ dev_t rdev)
602 ++{
603 ++ if (!S_ISFIFO(mode) && !S_ISSOCK(mode))
604 ++ return -EPERM;
605 ++
606 ++ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
607 ++}
608 ++
609 ++static int shiftfs_symlink(struct inode *dir, struct dentry *dentry,
610 ++ const char *symlink)
611 ++{
612 ++ return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false);
613 ++}
614 ++
615 ++static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir)
616 ++{
617 ++ struct dentry *lowerd = dentry->d_fsdata;
618 ++ struct inode *loweri = dir->i_private;
619 ++ struct inode *inode = d_inode(dentry);
620 ++ int err;
621 ++ const struct cred *oldcred;
622 ++
623 ++ dget(lowerd);
624 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
625 ++ inode_lock_nested(loweri, I_MUTEX_PARENT);
626 ++ if (rmdir)
627 ++ err = vfs_rmdir(loweri, lowerd);
628 ++ else
629 ++ err = vfs_unlink(loweri, lowerd, NULL);
630 ++ revert_creds(oldcred);
631 ++
632 ++ if (!err) {
633 ++ d_drop(dentry);
634 ++
635 ++ if (rmdir)
636 ++ clear_nlink(inode);
637 ++ else
638 ++ drop_nlink(inode);
639 ++ }
640 ++ inode_unlock(loweri);
641 ++
642 ++ shiftfs_copyattr(loweri, dir);
643 ++ dput(lowerd);
644 ++
645 ++ return err;
646 ++}
647 ++
648 ++static int shiftfs_unlink(struct inode *dir, struct dentry *dentry)
649 ++{
650 ++ return shiftfs_rm(dir, dentry, false);
651 ++}
652 ++
653 ++static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry)
654 ++{
655 ++ return shiftfs_rm(dir, dentry, true);
656 ++}
657 ++
658 ++static int shiftfs_rename(struct inode *olddir, struct dentry *old,
659 ++ struct inode *newdir, struct dentry *new,
660 ++ unsigned int flags)
661 ++{
662 ++ struct dentry *lowerd_dir_old = old->d_parent->d_fsdata,
663 ++ *lowerd_dir_new = new->d_parent->d_fsdata,
664 ++ *lowerd_old = old->d_fsdata, *lowerd_new = new->d_fsdata,
665 ++ *trapd;
666 ++ struct inode *loweri_dir_old = lowerd_dir_old->d_inode,
667 ++ *loweri_dir_new = lowerd_dir_new->d_inode;
668 ++ int err = -EINVAL;
669 ++ const struct cred *oldcred;
670 ++
671 ++ trapd = lock_rename(lowerd_dir_new, lowerd_dir_old);
672 ++
673 ++ if (trapd == lowerd_old || trapd == lowerd_new)
674 ++ goto out_unlock;
675 ++
676 ++ oldcred = shiftfs_override_creds(old->d_sb);
677 ++ err = vfs_rename(loweri_dir_old, lowerd_old, loweri_dir_new, lowerd_new,
678 ++ NULL, flags);
679 ++ revert_creds(oldcred);
680 ++
681 ++ shiftfs_copyattr(loweri_dir_old, olddir);
682 ++ shiftfs_copyattr(loweri_dir_new, newdir);
683 ++
684 ++out_unlock:
685 ++ unlock_rename(lowerd_dir_new, lowerd_dir_old);
686 ++
687 ++ return err;
688 ++}
689 ++
690 ++static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry,
691 ++ unsigned int flags)
692 ++{
693 ++ struct dentry *new;
694 ++ struct inode *newi;
695 ++ const struct cred *oldcred;
696 ++ struct dentry *lowerd = dentry->d_parent->d_fsdata;
697 ++ struct inode *inode = NULL, *loweri = lowerd->d_inode;
698 ++
699 ++ inode_lock(loweri);
700 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
701 ++ new = lookup_one_len(dentry->d_name.name, lowerd, dentry->d_name.len);
702 ++ revert_creds(oldcred);
703 ++ inode_unlock(loweri);
704 ++
705 ++ if (IS_ERR(new))
706 ++ return new;
707 ++
708 ++ dentry->d_fsdata = new;
709 ++
710 ++ newi = new->d_inode;
711 ++ if (!newi)
712 ++ goto out;
713 ++
714 ++ inode = iget5_locked(dentry->d_sb, (unsigned long)newi,
715 ++ shiftfs_inode_test, shiftfs_inode_set, newi);
716 ++ if (!inode) {
717 ++ dput(new);
718 ++ return ERR_PTR(-ENOMEM);
719 ++ }
720 ++ if (inode->i_state & I_NEW) {
721 ++ /*
722 ++ * inode->i_private set by shiftfs_inode_set(), but we still
723 ++ * need to take a reference
724 ++ */
725 ++ ihold(newi);
726 ++ shiftfs_fill_inode(inode, newi->i_ino, newi->i_mode, 0, new);
727 ++ unlock_new_inode(inode);
728 ++ }
729 ++
730 ++out:
731 ++ return d_splice_alias(inode, dentry);
732 ++}
733 ++
734 ++static int shiftfs_permission(struct inode *inode, int mask)
735 ++{
736 ++ int err;
737 ++ const struct cred *oldcred;
738 ++ struct inode *loweri = inode->i_private;
739 ++
740 ++ if (!loweri) {
741 ++ WARN_ON(!(mask & MAY_NOT_BLOCK));
742 ++ return -ECHILD;
743 ++ }
744 ++
745 ++ err = generic_permission(inode, mask);
746 ++ if (err)
747 ++ return err;
748 ++
749 ++ oldcred = shiftfs_override_creds(inode->i_sb);
750 ++ err = inode_permission(loweri, mask);
751 ++ revert_creds(oldcred);
752 ++
753 ++ return err;
754 ++}
755 ++
756 ++static int shiftfs_fiemap(struct inode *inode,
757 ++ struct fiemap_extent_info *fieinfo, u64 start,
758 ++ u64 len)
759 ++{
760 ++ int err;
761 ++ const struct cred *oldcred;
762 ++ struct inode *loweri = inode->i_private;
763 ++
764 ++ if (!loweri->i_op->fiemap)
765 ++ return -EOPNOTSUPP;
766 ++
767 ++ oldcred = shiftfs_override_creds(inode->i_sb);
768 ++ if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
769 ++ filemap_write_and_wait(loweri->i_mapping);
770 ++ err = loweri->i_op->fiemap(loweri, fieinfo, start, len);
771 ++ revert_creds(oldcred);
772 ++
773 ++ return err;
774 ++}
775 ++
776 ++static int shiftfs_tmpfile(struct inode *dir, struct dentry *dentry,
777 ++ umode_t mode)
778 ++{
779 ++ int err;
780 ++ const struct cred *oldcred;
781 ++ struct dentry *lowerd = dentry->d_fsdata;
782 ++ struct inode *loweri = dir->i_private;
783 ++
784 ++ if (!loweri->i_op->tmpfile)
785 ++ return -EOPNOTSUPP;
786 ++
787 ++ oldcred = shiftfs_override_creds(dir->i_sb);
788 ++ err = loweri->i_op->tmpfile(loweri, lowerd, mode);
789 ++ revert_creds(oldcred);
790 ++
791 ++ return err;
792 ++}
793 ++
794 ++static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr)
795 ++{
796 ++ struct dentry *lowerd = dentry->d_fsdata;
797 ++ struct inode *loweri = lowerd->d_inode;
798 ++ struct iattr newattr;
799 ++ const struct cred *oldcred;
800 ++ struct super_block *sb = dentry->d_sb;
801 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
802 ++ int err;
803 ++
804 ++ err = setattr_prepare(dentry, attr);
805 ++ if (err)
806 ++ return err;
807 ++
808 ++ newattr = *attr;
809 ++ newattr.ia_uid = shift_kuid(sb->s_user_ns, sbinfo->userns, attr->ia_uid);
810 ++ newattr.ia_gid = shift_kgid(sb->s_user_ns, sbinfo->userns, attr->ia_gid);
811 ++
812 ++ /*
813 ++ * mode change is for clearing setuid/setgid bits. Allow lower fs
814 ++ * to interpret this in its own way.
815 ++ */
816 ++ if (newattr.ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
817 ++ newattr.ia_valid &= ~ATTR_MODE;
818 ++
819 ++ inode_lock(loweri);
820 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
821 ++ err = notify_change(lowerd, &newattr, NULL);
822 ++ revert_creds(oldcred);
823 ++ inode_unlock(loweri);
824 ++
825 ++ shiftfs_copyattr(loweri, d_inode(dentry));
826 ++
827 ++ return err;
828 ++}
829 ++
830 ++static int shiftfs_getattr(const struct path *path, struct kstat *stat,
831 ++ u32 request_mask, unsigned int query_flags)
832 ++{
833 ++ struct inode *inode = path->dentry->d_inode;
834 ++ struct dentry *lowerd = path->dentry->d_fsdata;
835 ++ struct inode *loweri = lowerd->d_inode;
836 ++ struct shiftfs_super_info *info = path->dentry->d_sb->s_fs_info;
837 ++ struct path newpath = { .mnt = info->mnt, .dentry = lowerd };
838 ++ struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
839 ++ struct user_namespace *to_ns = inode->i_sb->s_user_ns;
840 ++ const struct cred *oldcred;
841 ++ int err;
842 ++
843 ++ oldcred = shiftfs_override_creds(inode->i_sb);
844 ++ err = vfs_getattr(&newpath, stat, request_mask, query_flags);
845 ++ revert_creds(oldcred);
846 ++
847 ++ if (err)
848 ++ return err;
849 ++
850 ++ /* transform the underlying id */
851 ++ stat->uid = shift_kuid(from_ns, to_ns, stat->uid);
852 ++ stat->gid = shift_kgid(from_ns, to_ns, stat->gid);
853 ++ return 0;
854 ++}
855 ++
856 ++#ifdef CONFIG_SHIFT_FS_POSIX_ACL
857 ++
858 ++static int
859 ++shift_acl_ids(struct user_namespace *from, struct user_namespace *to,
860 ++ struct posix_acl *acl)
861 ++{
862 ++ int i;
863 ++
864 ++ for (i = 0; i < acl->a_count; i++) {
865 ++ struct posix_acl_entry *e = &acl->a_entries[i];
866 ++ switch(e->e_tag) {
867 ++ case ACL_USER:
868 ++ e->e_uid = shift_kuid(from, to, e->e_uid);
869 ++ if (!uid_valid(e->e_uid))
870 ++ return -EOVERFLOW;
871 ++ break;
872 ++ case ACL_GROUP:
873 ++ e->e_gid = shift_kgid(from, to, e->e_gid);
874 ++ if (!gid_valid(e->e_gid))
875 ++ return -EOVERFLOW;
876 ++ break;
877 ++ }
878 ++ }
879 ++ return 0;
880 ++}
881 ++
882 ++static void
883 ++shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to,
884 ++ void *value, size_t size)
885 ++{
886 ++ struct posix_acl_xattr_header *header = value;
887 ++ struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
888 ++ int count;
889 ++ kuid_t kuid;
890 ++ kgid_t kgid;
891 ++
892 ++ if (!value)
893 ++ return;
894 ++ if (size < sizeof(struct posix_acl_xattr_header))
895 ++ return;
896 ++ if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
897 ++ return;
898 ++
899 ++ count = posix_acl_xattr_count(size);
900 ++ if (count < 0)
901 ++ return;
902 ++ if (count == 0)
903 ++ return;
904 ++
905 ++ for (end = entry + count; entry != end; entry++) {
906 ++ switch(le16_to_cpu(entry->e_tag)) {
907 ++ case ACL_USER:
908 ++ kuid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
909 ++ kuid = shift_kuid(from, to, kuid);
910 ++ entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid));
911 ++ break;
912 ++ case ACL_GROUP:
913 ++ kgid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
914 ++ kgid = shift_kgid(from, to, kgid);
915 ++ entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, kgid));
916 ++ break;
917 ++ default:
918 ++ break;
919 ++ }
920 ++ }
921 ++}
922 ++
923 ++static struct posix_acl *shiftfs_get_acl(struct inode *inode, int type)
924 ++{
925 ++ struct inode *loweri = inode->i_private;
926 ++ const struct cred *oldcred;
927 ++ struct posix_acl *lower_acl, *acl = NULL;
928 ++ struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
929 ++ struct user_namespace *to_ns = inode->i_sb->s_user_ns;
930 ++ int size;
931 ++ int err;
932 ++
933 ++ if (!IS_POSIXACL(loweri))
934 ++ return NULL;
935 ++
936 ++ oldcred = shiftfs_override_creds(inode->i_sb);
937 ++ lower_acl = get_acl(loweri, type);
938 ++ revert_creds(oldcred);
939 ++
940 ++ if (lower_acl && !IS_ERR(lower_acl)) {
941 ++ /* XXX: export posix_acl_clone? */
942 ++ size = sizeof(struct posix_acl) +
943 ++ lower_acl->a_count * sizeof(struct posix_acl_entry);
944 ++ acl = kmemdup(lower_acl, size, GFP_KERNEL);
945 ++ posix_acl_release(lower_acl);
946 ++
947 ++ if (!acl)
948 ++ return ERR_PTR(-ENOMEM);
949 ++
950 ++ refcount_set(&acl->a_refcount, 1);
951 ++
952 ++ err = shift_acl_ids(from_ns, to_ns, acl);
953 ++ if (err) {
954 ++ kfree(acl);
955 ++ return ERR_PTR(err);
956 ++ }
957 ++ }
958 ++
959 ++ return acl;
960 ++}
961 ++
962 ++static int
963 ++shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler,
964 ++ struct dentry *dentry, struct inode *inode,
965 ++ const char *name, void *buffer, size_t size)
966 ++{
967 ++ struct inode *loweri = inode->i_private;
968 ++ int ret;
969 ++
970 ++ ret = shiftfs_xattr_get(NULL, dentry, inode, handler->name,
971 ++ buffer, size);
972 ++ if (ret < 0)
973 ++ return ret;
974 ++
975 ++ inode_lock(loweri);
976 ++ shift_acl_xattr_ids(loweri->i_sb->s_user_ns, inode->i_sb->s_user_ns,
977 ++ buffer, size);
978 ++ inode_unlock(loweri);
979 ++ return ret;
980 ++}
981 ++
982 ++static int
983 ++shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler,
984 ++ struct dentry *dentry, struct inode *inode,
985 ++ const char *name, const void *value,
986 ++ size_t size, int flags)
987 ++{
988 ++ struct inode *loweri = inode->i_private;
989 ++ int err;
990 ++
991 ++ if (!IS_POSIXACL(loweri) || !loweri->i_op->set_acl)
992 ++ return -EOPNOTSUPP;
993 ++ if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
994 ++ return value ? -EACCES : 0;
995 ++ if (!inode_owner_or_capable(inode))
996 ++ return -EPERM;
997 ++
998 ++ if (value) {
999 ++ shift_acl_xattr_ids(inode->i_sb->s_user_ns,
1000 ++ loweri->i_sb->s_user_ns,
1001 ++ (void *)value, size);
1002 ++ err = shiftfs_setxattr(dentry, inode, handler->name, value,
1003 ++ size, flags);
1004 ++ } else {
1005 ++ err = shiftfs_removexattr(dentry, handler->name);
1006 ++ }
1007 ++
1008 ++ if (!err)
1009 ++ shiftfs_copyattr(loweri, inode);
1010 ++
1011 ++ return err;
1012 ++}
1013 ++
1014 ++static const struct xattr_handler
1015 ++shiftfs_posix_acl_access_xattr_handler = {
1016 ++ .name = XATTR_NAME_POSIX_ACL_ACCESS,
1017 ++ .flags = ACL_TYPE_ACCESS,
1018 ++ .get = shiftfs_posix_acl_xattr_get,
1019 ++ .set = shiftfs_posix_acl_xattr_set,
1020 ++};
1021 ++
1022 ++static const struct xattr_handler
1023 ++shiftfs_posix_acl_default_xattr_handler = {
1024 ++ .name = XATTR_NAME_POSIX_ACL_DEFAULT,
1025 ++ .flags = ACL_TYPE_DEFAULT,
1026 ++ .get = shiftfs_posix_acl_xattr_get,
1027 ++ .set = shiftfs_posix_acl_xattr_set,
1028 ++};
1029 ++
1030 ++#else /* !CONFIG_SHIFT_FS_POSIX_ACL */
1031 ++
1032 ++#define shiftfs_get_acl NULL
1033 ++
1034 ++#endif /* CONFIG_SHIFT_FS_POSIX_ACL */
1035 ++
1036 ++static const struct inode_operations shiftfs_dir_inode_operations = {
1037 ++ .lookup = shiftfs_lookup,
1038 ++ .mkdir = shiftfs_mkdir,
1039 ++ .symlink = shiftfs_symlink,
1040 ++ .unlink = shiftfs_unlink,
1041 ++ .rmdir = shiftfs_rmdir,
1042 ++ .rename = shiftfs_rename,
1043 ++ .link = shiftfs_link,
1044 ++ .setattr = shiftfs_setattr,
1045 ++ .create = shiftfs_create,
1046 ++ .mknod = shiftfs_mknod,
1047 ++ .permission = shiftfs_permission,
1048 ++ .getattr = shiftfs_getattr,
1049 ++ .listxattr = shiftfs_listxattr,
1050 ++ .get_acl = shiftfs_get_acl,
1051 ++};
1052 ++
1053 ++static const struct inode_operations shiftfs_file_inode_operations = {
1054 ++ .fiemap = shiftfs_fiemap,
1055 ++ .getattr = shiftfs_getattr,
1056 ++ .get_acl = shiftfs_get_acl,
1057 ++ .listxattr = shiftfs_listxattr,
1058 ++ .permission = shiftfs_permission,
1059 ++ .setattr = shiftfs_setattr,
1060 ++ .tmpfile = shiftfs_tmpfile,
1061 ++};
1062 ++
1063 ++static const struct inode_operations shiftfs_special_inode_operations = {
1064 ++ .getattr = shiftfs_getattr,
1065 ++ .get_acl = shiftfs_get_acl,
1066 ++ .listxattr = shiftfs_listxattr,
1067 ++ .permission = shiftfs_permission,
1068 ++ .setattr = shiftfs_setattr,
1069 ++};
1070 ++
1071 ++static const struct inode_operations shiftfs_symlink_inode_operations = {
1072 ++ .getattr = shiftfs_getattr,
1073 ++ .get_link = shiftfs_get_link,
1074 ++ .listxattr = shiftfs_listxattr,
1075 ++ .setattr = shiftfs_setattr,
1076 ++};
1077 ++
1078 ++static struct file *shiftfs_open_realfile(const struct file *file,
1079 ++ struct inode *realinode)
1080 ++{
1081 ++ struct file *realfile;
1082 ++ const struct cred *old_cred;
1083 ++ struct inode *inode = file_inode(file);
1084 ++ struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1085 ++ struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1086 ++ struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1087 ++
1088 ++ old_cred = shiftfs_override_creds(inode->i_sb);
1089 ++ realfile = open_with_fake_path(&realpath, file->f_flags, realinode,
1090 ++ info->creator_cred);
1091 ++ revert_creds(old_cred);
1092 ++
1093 ++ return realfile;
1094 ++}
1095 ++
1096 ++#define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
1097 ++
1098 ++static int shiftfs_change_flags(struct file *file, unsigned int flags)
1099 ++{
1100 ++ struct inode *inode = file_inode(file);
1101 ++ int err;
1102 ++
1103 ++ /* if some flag changed that cannot be changed then something's amiss */
1104 ++ if (WARN_ON((file->f_flags ^ flags) & ~SHIFTFS_SETFL_MASK))
1105 ++ return -EIO;
1106 ++
1107 ++ flags &= SHIFTFS_SETFL_MASK;
1108 ++
1109 ++ if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
1110 ++ return -EPERM;
1111 ++
1112 ++ if (flags & O_DIRECT) {
1113 ++ if (!file->f_mapping->a_ops ||
1114 ++ !file->f_mapping->a_ops->direct_IO)
1115 ++ return -EINVAL;
1116 ++ }
1117 ++
1118 ++ if (file->f_op->check_flags) {
1119 ++ err = file->f_op->check_flags(flags);
1120 ++ if (err)
1121 ++ return err;
1122 ++ }
1123 ++
1124 ++ spin_lock(&file->f_lock);
1125 ++ file->f_flags = (file->f_flags & ~SHIFTFS_SETFL_MASK) | flags;
1126 ++ spin_unlock(&file->f_lock);
1127 ++
1128 ++ return 0;
1129 ++}
1130 ++
1131 ++static int shiftfs_open(struct inode *inode, struct file *file)
1132 ++{
1133 ++ struct file *realfile;
1134 ++
1135 ++ realfile = shiftfs_open_realfile(file, inode->i_private);
1136 ++ if (IS_ERR(realfile))
1137 ++ return PTR_ERR(realfile);
1138 ++
1139 ++ file->private_data = realfile;
1140 ++ /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */
1141 ++ file->f_mapping = realfile->f_mapping;
1142 ++
1143 ++ return 0;
1144 ++}
1145 ++
1146 ++static int shiftfs_dir_open(struct inode *inode, struct file *file)
1147 ++{
1148 ++ struct file *realfile;
1149 ++ const struct cred *oldcred;
1150 ++ struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1151 ++ struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1152 ++ struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1153 ++
1154 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1155 ++ realfile = dentry_open(&realpath, file->f_flags | O_NOATIME,
1156 ++ info->creator_cred);
1157 ++ revert_creds(oldcred);
1158 ++ if (IS_ERR(realfile))
1159 ++ return PTR_ERR(realfile);
1160 ++
1161 ++ file->private_data = realfile;
1162 ++
1163 ++ return 0;
1164 ++}
1165 ++
1166 ++static int shiftfs_release(struct inode *inode, struct file *file)
1167 ++{
1168 ++ struct file *realfile = file->private_data;
1169 ++
1170 ++ if (realfile)
1171 ++ fput(realfile);
1172 ++
1173 ++ return 0;
1174 ++}
1175 ++
1176 ++static int shiftfs_dir_release(struct inode *inode, struct file *file)
1177 ++{
1178 ++ return shiftfs_release(inode, file);
1179 ++}
1180 ++
1181 ++static loff_t shiftfs_dir_llseek(struct file *file, loff_t offset, int whence)
1182 ++{
1183 ++ struct file *realfile = file->private_data;
1184 ++
1185 ++ return vfs_llseek(realfile, offset, whence);
1186 ++}
1187 ++
1188 ++static loff_t shiftfs_file_llseek(struct file *file, loff_t offset, int whence)
1189 ++{
1190 ++ struct inode *realinode = file_inode(file)->i_private;
1191 ++
1192 ++ return generic_file_llseek_size(file, offset, whence,
1193 ++ realinode->i_sb->s_maxbytes,
1194 ++ i_size_read(realinode));
1195 ++}
1196 ++
1197 ++/* XXX: Need to figure out what to to about atime updates, maybe other
1198 ++ * timestamps too ... ref. ovl_file_accessed() */
1199 ++
1200 ++static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb)
1201 ++{
1202 ++ int ifl = iocb->ki_flags;
1203 ++ rwf_t flags = 0;
1204 ++
1205 ++ if (ifl & IOCB_NOWAIT)
1206 ++ flags |= RWF_NOWAIT;
1207 ++ if (ifl & IOCB_HIPRI)
1208 ++ flags |= RWF_HIPRI;
1209 ++ if (ifl & IOCB_DSYNC)
1210 ++ flags |= RWF_DSYNC;
1211 ++ if (ifl & IOCB_SYNC)
1212 ++ flags |= RWF_SYNC;
1213 ++
1214 ++ return flags;
1215 ++}
1216 ++
1217 ++static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd)
1218 ++{
1219 ++ struct file *realfile;
1220 ++
1221 ++ if (file->f_op->open != shiftfs_open &&
1222 ++ file->f_op->open != shiftfs_dir_open)
1223 ++ return -EINVAL;
1224 ++
1225 ++ realfile = file->private_data;
1226 ++ lowerfd->flags = 0;
1227 ++ lowerfd->file = realfile;
1228 ++
1229 ++ /* Did the flags change since open? */
1230 ++ if (unlikely(file->f_flags & ~lowerfd->file->f_flags))
1231 ++ return shiftfs_change_flags(lowerfd->file, file->f_flags);
1232 ++
1233 ++ return 0;
1234 ++}
1235 ++
1236 ++static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
1237 ++{
1238 ++ struct file *file = iocb->ki_filp;
1239 ++ struct fd lowerfd;
1240 ++ const struct cred *oldcred;
1241 ++ ssize_t ret;
1242 ++
1243 ++ if (!iov_iter_count(iter))
1244 ++ return 0;
1245 ++
1246 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1247 ++ if (ret)
1248 ++ return ret;
1249 ++
1250 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1251 ++ ret = vfs_iter_read(lowerfd.file, iter, &iocb->ki_pos,
1252 ++ shiftfs_iocb_to_rwf(iocb));
1253 ++ revert_creds(oldcred);
1254 ++
1255 ++ shiftfs_file_accessed(file);
1256 ++
1257 ++ fdput(lowerfd);
1258 ++ return ret;
1259 ++}
1260 ++
1261 ++static ssize_t shiftfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
1262 ++{
1263 ++ struct file *file = iocb->ki_filp;
1264 ++ struct inode *inode = file_inode(file);
1265 ++ struct fd lowerfd;
1266 ++ const struct cred *oldcred;
1267 ++ ssize_t ret;
1268 ++
1269 ++ if (!iov_iter_count(iter))
1270 ++ return 0;
1271 ++
1272 ++ inode_lock(inode);
1273 ++ /* Update mode */
1274 ++ shiftfs_copyattr(inode->i_private, inode);
1275 ++ ret = file_remove_privs(file);
1276 ++ if (ret)
1277 ++ goto out_unlock;
1278 ++
1279 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1280 ++ if (ret)
1281 ++ goto out_unlock;
1282 ++
1283 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1284 ++ file_start_write(lowerfd.file);
1285 ++ ret = vfs_iter_write(lowerfd.file, iter, &iocb->ki_pos,
1286 ++ shiftfs_iocb_to_rwf(iocb));
1287 ++ file_end_write(lowerfd.file);
1288 ++ revert_creds(oldcred);
1289 ++
1290 ++ /* Update size */
1291 ++ shiftfs_copyattr(inode->i_private, inode);
1292 ++
1293 ++ fdput(lowerfd);
1294 ++
1295 ++out_unlock:
1296 ++ inode_unlock(inode);
1297 ++ return ret;
1298 ++}
1299 ++
1300 ++static int shiftfs_fsync(struct file *file, loff_t start, loff_t end,
1301 ++ int datasync)
1302 ++{
1303 ++ struct fd lowerfd;
1304 ++ const struct cred *oldcred;
1305 ++ int ret;
1306 ++
1307 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1308 ++ if (ret)
1309 ++ return ret;
1310 ++
1311 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1312 ++ ret = vfs_fsync_range(lowerfd.file, start, end, datasync);
1313 ++ revert_creds(oldcred);
1314 ++
1315 ++ fdput(lowerfd);
1316 ++ return ret;
1317 ++}
1318 ++
1319 ++static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma)
1320 ++{
1321 ++ struct file *realfile = file->private_data;
1322 ++ const struct cred *oldcred;
1323 ++ int ret;
1324 ++
1325 ++ if (!realfile->f_op->mmap)
1326 ++ return -ENODEV;
1327 ++
1328 ++ if (WARN_ON(file != vma->vm_file))
1329 ++ return -EIO;
1330 ++
1331 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1332 ++ vma->vm_file = get_file(realfile);
1333 ++ ret = call_mmap(vma->vm_file, vma);
1334 ++ revert_creds(oldcred);
1335 ++
1336 ++ shiftfs_file_accessed(file);
1337 ++
1338 ++ if (ret) {
1339 ++ /*
1340 ++ * Drop refcount from new vm_file value and restore original
1341 ++ * vm_file value
1342 ++ */
1343 ++ vma->vm_file = file;
1344 ++ fput(realfile);
1345 ++ } else {
1346 ++ /* Drop refcount from previous vm_file value */
1347 ++ fput(file);
1348 ++ }
1349 ++
1350 ++ return ret;
1351 ++}
1352 ++
1353 ++static long shiftfs_fallocate(struct file *file, int mode, loff_t offset,
1354 ++ loff_t len)
1355 ++{
1356 ++ struct inode *inode = file_inode(file);
1357 ++ struct inode *loweri = inode->i_private;
1358 ++ struct fd lowerfd;
1359 ++ const struct cred *oldcred;
1360 ++ int ret;
1361 ++
1362 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1363 ++ if (ret)
1364 ++ return ret;
1365 ++
1366 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1367 ++ ret = vfs_fallocate(lowerfd.file, mode, offset, len);
1368 ++ revert_creds(oldcred);
1369 ++
1370 ++ /* Update size */
1371 ++ shiftfs_copyattr(loweri, inode);
1372 ++
1373 ++ fdput(lowerfd);
1374 ++ return ret;
1375 ++}
1376 ++
1377 ++static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len,
1378 ++ int advice)
1379 ++{
1380 ++ struct fd lowerfd;
1381 ++ const struct cred *oldcred;
1382 ++ int ret;
1383 ++
1384 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1385 ++ if (ret)
1386 ++ return ret;
1387 ++
1388 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1389 ++ ret = vfs_fadvise(lowerfd.file, offset, len, advice);
1390 ++ revert_creds(oldcred);
1391 ++
1392 ++ fdput(lowerfd);
1393 ++ return ret;
1394 ++}
1395 ++
1396 ++static int shiftfs_override_ioctl_creds(int cmd, const struct super_block *sb,
1397 ++ const struct cred **oldcred,
1398 ++ struct cred **newcred)
1399 ++{
1400 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1401 ++ kuid_t fsuid = current_fsuid();
1402 ++ kgid_t fsgid = current_fsgid();
1403 ++
1404 ++ *oldcred = shiftfs_override_creds(sb);
1405 ++
1406 ++ *newcred = prepare_creds();
1407 ++ if (!*newcred) {
1408 ++ revert_creds(*oldcred);
1409 ++ return -ENOMEM;
1410 ++ }
1411 ++
1412 ++ (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
1413 ++ (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
1414 ++
1415 ++ /* clear all caps to prevent bypassing capable() checks */
1416 ++ cap_clear((*newcred)->cap_bset);
1417 ++ cap_clear((*newcred)->cap_effective);
1418 ++ cap_clear((*newcred)->cap_inheritable);
1419 ++ cap_clear((*newcred)->cap_permitted);
1420 ++
1421 ++ if (cmd == BTRFS_IOC_SNAP_DESTROY) {
1422 ++ kuid_t kuid_root = make_kuid(sb->s_user_ns, 0);
1423 ++ /*
1424 ++ * Allow the root user in the container to remove subvolumes
1425 ++ * from other users.
1426 ++ */
1427 ++ if (uid_valid(kuid_root) && uid_eq(fsuid, kuid_root))
1428 ++ cap_raise((*newcred)->cap_effective, CAP_DAC_OVERRIDE);
1429 ++ }
1430 ++
1431 ++ put_cred(override_creds(*newcred));
1432 ++ return 0;
1433 ++}
1434 ++
1435 ++static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred,
1436 ++ struct cred *newcred)
1437 ++{
1438 ++ return shiftfs_revert_object_creds(oldcred, newcred);
1439 ++}
1440 ++
1441 ++static inline bool is_btrfs_snap_ioctl(int cmd)
1442 ++{
1443 ++ if ((cmd == BTRFS_IOC_SNAP_CREATE) || (cmd == BTRFS_IOC_SNAP_CREATE_V2))
1444 ++ return true;
1445 ++
1446 ++ return false;
1447 ++}
1448 ++
1449 ++static int shiftfs_btrfs_ioctl_fd_restore(int cmd, int fd, void __user *arg,
1450 ++ struct btrfs_ioctl_vol_args *v1,
1451 ++ struct btrfs_ioctl_vol_args_v2 *v2)
1452 ++{
1453 ++ int ret;
1454 ++
1455 ++ if (!is_btrfs_snap_ioctl(cmd))
1456 ++ return 0;
1457 ++
1458 ++ if (cmd == BTRFS_IOC_SNAP_CREATE)
1459 ++ ret = copy_to_user(arg, v1, sizeof(*v1));
1460 ++ else
1461 ++ ret = copy_to_user(arg, v2, sizeof(*v2));
1462 ++
1463 ++ __close_fd(current->files, fd);
1464 ++ kfree(v1);
1465 ++ kfree(v2);
1466 ++
1467 ++ return ret;
1468 ++}
1469 ++
1470 ++static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg,
1471 ++ struct btrfs_ioctl_vol_args **b1,
1472 ++ struct btrfs_ioctl_vol_args_v2 **b2,
1473 ++ int *newfd)
1474 ++{
1475 ++ int oldfd, ret;
1476 ++ struct fd src;
1477 ++ struct fd lfd = {};
1478 ++ struct btrfs_ioctl_vol_args *v1 = NULL;
1479 ++ struct btrfs_ioctl_vol_args_v2 *v2 = NULL;
1480 ++
1481 ++ if (!is_btrfs_snap_ioctl(cmd))
1482 ++ return 0;
1483 ++
1484 ++ if (cmd == BTRFS_IOC_SNAP_CREATE) {
1485 ++ v1 = memdup_user(arg, sizeof(*v1));
1486 ++ if (IS_ERR(v1))
1487 ++ return PTR_ERR(v1);
1488 ++ oldfd = v1->fd;
1489 ++ *b1 = v1;
1490 ++ } else {
1491 ++ v2 = memdup_user(arg, sizeof(*v2));
1492 ++ if (IS_ERR(v2))
1493 ++ return PTR_ERR(v2);
1494 ++ oldfd = v2->fd;
1495 ++ *b2 = v2;
1496 ++ }
1497 ++
1498 ++ src = fdget(oldfd);
1499 ++ if (!src.file)
1500 ++ return -EINVAL;
1501 ++
1502 ++ ret = shiftfs_real_fdget(src.file, &lfd);
1503 ++ if (ret) {
1504 ++ fdput(src);
1505 ++ return ret;
1506 ++ }
1507 ++
1508 ++ /*
1509 ++ * shiftfs_real_fdget() does not take a reference to lfd.file, so
1510 ++ * take a reference here to offset the one which will be put by
1511 ++ * __close_fd(), and make sure that reference is put on fdput(lfd).
1512 ++ */
1513 ++ get_file(lfd.file);
1514 ++ lfd.flags |= FDPUT_FPUT;
1515 ++ fdput(src);
1516 ++
1517 ++ *newfd = get_unused_fd_flags(lfd.file->f_flags);
1518 ++ if (*newfd < 0) {
1519 ++ fdput(lfd);
1520 ++ return *newfd;
1521 ++ }
1522 ++
1523 ++ fd_install(*newfd, lfd.file);
1524 ++
1525 ++ if (cmd == BTRFS_IOC_SNAP_CREATE) {
1526 ++ v1->fd = *newfd;
1527 ++ ret = copy_to_user(arg, v1, sizeof(*v1));
1528 ++ v1->fd = oldfd;
1529 ++ } else {
1530 ++ v2->fd = *newfd;
1531 ++ ret = copy_to_user(arg, v2, sizeof(*v2));
1532 ++ v2->fd = oldfd;
1533 ++ }
1534 ++
1535 ++ if (ret)
1536 ++ shiftfs_btrfs_ioctl_fd_restore(cmd, *newfd, arg, v1, v2);
1537 ++
1538 ++ return ret;
1539 ++}
1540 ++
1541 ++static long shiftfs_real_ioctl(struct file *file, unsigned int cmd,
1542 ++ unsigned long arg)
1543 ++{
1544 ++ struct fd lowerfd;
1545 ++ struct cred *newcred;
1546 ++ const struct cred *oldcred;
1547 ++ int newfd = -EBADF;
1548 ++ long err = 0, ret = 0;
1549 ++ void __user *argp = (void __user *)arg;
1550 ++ struct super_block *sb = file->f_path.dentry->d_sb;
1551 ++ struct btrfs_ioctl_vol_args *btrfs_v1 = NULL;
1552 ++ struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL;
1553 ++
1554 ++ ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2,
1555 ++ &newfd);
1556 ++ if (ret < 0)
1557 ++ return ret;
1558 ++
1559 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1560 ++ if (ret)
1561 ++ goto out_restore;
1562 ++
1563 ++ ret = shiftfs_override_ioctl_creds(cmd, sb, &oldcred, &newcred);
1564 ++ if (ret)
1565 ++ goto out_fdput;
1566 ++
1567 ++ ret = vfs_ioctl(lowerfd.file, cmd, arg);
1568 ++
1569 ++ shiftfs_revert_ioctl_creds(oldcred, newcred);
1570 ++
1571 ++ shiftfs_copyattr(file_inode(lowerfd.file), file_inode(file));
1572 ++ shiftfs_copyflags(file_inode(lowerfd.file), file_inode(file));
1573 ++
1574 ++out_fdput:
1575 ++ fdput(lowerfd);
1576 ++
1577 ++out_restore:
1578 ++ err = shiftfs_btrfs_ioctl_fd_restore(cmd, newfd, argp,
1579 ++ btrfs_v1, btrfs_v2);
1580 ++ if (!ret)
1581 ++ ret = err;
1582 ++
1583 ++ return ret;
1584 ++}
1585 ++
1586 ++static bool in_ioctl_whitelist(int flag, unsigned long arg)
1587 ++{
1588 ++ void __user *argp = (void __user *)arg;
1589 ++ u64 flags = 0;
1590 ++
1591 ++ switch (flag) {
1592 ++ case BTRFS_IOC_FS_INFO:
1593 ++ return true;
1594 ++ case BTRFS_IOC_SNAP_CREATE:
1595 ++ return true;
1596 ++ case BTRFS_IOC_SNAP_CREATE_V2:
1597 ++ return true;
1598 ++ case BTRFS_IOC_SUBVOL_CREATE:
1599 ++ return true;
1600 ++ case BTRFS_IOC_SUBVOL_CREATE_V2:
1601 ++ return true;
1602 ++ case BTRFS_IOC_SUBVOL_GETFLAGS:
1603 ++ return true;
1604 ++ case BTRFS_IOC_SUBVOL_SETFLAGS:
1605 ++ if (copy_from_user(&flags, argp, sizeof(flags)))
1606 ++ return false;
1607 ++
1608 ++ if (flags & ~BTRFS_SUBVOL_RDONLY)
1609 ++ return false;
1610 ++
1611 ++ return true;
1612 ++ case BTRFS_IOC_SNAP_DESTROY:
1613 ++ return true;
1614 ++ }
1615 ++
1616 ++ return false;
1617 ++}
1618 ++
1619 ++static long shiftfs_ioctl(struct file *file, unsigned int cmd,
1620 ++ unsigned long arg)
1621 ++{
1622 ++ switch (cmd) {
1623 ++ case FS_IOC_GETVERSION:
1624 ++ /* fall through */
1625 ++ case FS_IOC_GETFLAGS:
1626 ++ /* fall through */
1627 ++ case FS_IOC_SETFLAGS:
1628 ++ break;
1629 ++ default:
1630 ++ if (!in_ioctl_whitelist(cmd, arg) ||
1631 ++ !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1632 ++ return -ENOTTY;
1633 ++ }
1634 ++
1635 ++ return shiftfs_real_ioctl(file, cmd, arg);
1636 ++}
1637 ++
1638 ++static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd,
1639 ++ unsigned long arg)
1640 ++{
1641 ++ switch (cmd) {
1642 ++ case FS_IOC32_GETVERSION:
1643 ++ /* fall through */
1644 ++ case FS_IOC32_GETFLAGS:
1645 ++ /* fall through */
1646 ++ case FS_IOC32_SETFLAGS:
1647 ++ break;
1648 ++ default:
1649 ++ if (!in_ioctl_whitelist(cmd, arg) ||
1650 ++ !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1651 ++ return -ENOIOCTLCMD;
1652 ++ }
1653 ++
1654 ++ return shiftfs_real_ioctl(file, cmd, arg);
1655 ++}
1656 ++
1657 ++enum shiftfs_copyop {
1658 ++ SHIFTFS_COPY,
1659 ++ SHIFTFS_CLONE,
1660 ++ SHIFTFS_DEDUPE,
1661 ++};
1662 ++
1663 ++static ssize_t shiftfs_copyfile(struct file *file_in, loff_t pos_in,
1664 ++ struct file *file_out, loff_t pos_out, u64 len,
1665 ++ unsigned int flags, enum shiftfs_copyop op)
1666 ++{
1667 ++ ssize_t ret;
1668 ++ struct fd real_in, real_out;
1669 ++ const struct cred *oldcred;
1670 ++ struct inode *inode_out = file_inode(file_out);
1671 ++ struct inode *loweri = inode_out->i_private;
1672 ++
1673 ++ ret = shiftfs_real_fdget(file_out, &real_out);
1674 ++ if (ret)
1675 ++ return ret;
1676 ++
1677 ++ ret = shiftfs_real_fdget(file_in, &real_in);
1678 ++ if (ret) {
1679 ++ fdput(real_out);
1680 ++ return ret;
1681 ++ }
1682 ++
1683 ++ oldcred = shiftfs_override_creds(inode_out->i_sb);
1684 ++ switch (op) {
1685 ++ case SHIFTFS_COPY:
1686 ++ ret = vfs_copy_file_range(real_in.file, pos_in, real_out.file,
1687 ++ pos_out, len, flags);
1688 ++ break;
1689 ++
1690 ++ case SHIFTFS_CLONE:
1691 ++ ret = vfs_clone_file_range(real_in.file, pos_in, real_out.file,
1692 ++ pos_out, len, flags);
1693 ++ break;
1694 ++
1695 ++ case SHIFTFS_DEDUPE:
1696 ++ ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
1697 ++ real_out.file, pos_out, len,
1698 ++ flags);
1699 ++ break;
1700 ++ }
1701 ++ revert_creds(oldcred);
1702 ++
1703 ++ /* Update size */
1704 ++ shiftfs_copyattr(loweri, inode_out);
1705 ++
1706 ++ fdput(real_in);
1707 ++ fdput(real_out);
1708 ++
1709 ++ return ret;
1710 ++}
1711 ++
1712 ++static ssize_t shiftfs_copy_file_range(struct file *file_in, loff_t pos_in,
1713 ++ struct file *file_out, loff_t pos_out,
1714 ++ size_t len, unsigned int flags)
1715 ++{
1716 ++ return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
1717 ++ SHIFTFS_COPY);
1718 ++}
1719 ++
1720 ++static loff_t shiftfs_remap_file_range(struct file *file_in, loff_t pos_in,
1721 ++ struct file *file_out, loff_t pos_out,
1722 ++ loff_t len, unsigned int remap_flags)
1723 ++{
1724 ++ enum shiftfs_copyop op;
1725 ++
1726 ++ if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
1727 ++ return -EINVAL;
1728 ++
1729 ++ if (remap_flags & REMAP_FILE_DEDUP)
1730 ++ op = SHIFTFS_DEDUPE;
1731 ++ else
1732 ++ op = SHIFTFS_CLONE;
1733 ++
1734 ++ return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len,
1735 ++ remap_flags, op);
1736 ++}
1737 ++
1738 ++static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx)
1739 ++{
1740 ++ const struct cred *oldcred;
1741 ++ int err = -ENOTDIR;
1742 ++ struct file *realfile = file->private_data;
1743 ++
1744 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1745 ++ err = iterate_dir(realfile, ctx);
1746 ++ revert_creds(oldcred);
1747 ++
1748 ++ return err;
1749 ++}
1750 ++
1751 ++const struct file_operations shiftfs_file_operations = {
1752 ++ .open = shiftfs_open,
1753 ++ .release = shiftfs_release,
1754 ++ .llseek = shiftfs_file_llseek,
1755 ++ .read_iter = shiftfs_read_iter,
1756 ++ .write_iter = shiftfs_write_iter,
1757 ++ .fsync = shiftfs_fsync,
1758 ++ .mmap = shiftfs_mmap,
1759 ++ .fallocate = shiftfs_fallocate,
1760 ++ .fadvise = shiftfs_fadvise,
1761 ++ .unlocked_ioctl = shiftfs_ioctl,
1762 ++ .compat_ioctl = shiftfs_compat_ioctl,
1763 ++ .copy_file_range = shiftfs_copy_file_range,
1764 ++ .remap_file_range = shiftfs_remap_file_range,
1765 ++};
1766 ++
1767 ++const struct file_operations shiftfs_dir_operations = {
1768 ++ .open = shiftfs_dir_open,
1769 ++ .release = shiftfs_dir_release,
1770 ++ .compat_ioctl = shiftfs_compat_ioctl,
1771 ++ .fsync = shiftfs_fsync,
1772 ++ .iterate_shared = shiftfs_iterate_shared,
1773 ++ .llseek = shiftfs_dir_llseek,
1774 ++ .read = generic_read_dir,
1775 ++ .unlocked_ioctl = shiftfs_ioctl,
1776 ++};
1777 ++
1778 ++static const struct address_space_operations shiftfs_aops = {
1779 ++ /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
1780 ++ .direct_IO = noop_direct_IO,
1781 ++};
1782 ++
1783 ++static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
1784 ++ umode_t mode, dev_t dev, struct dentry *dentry)
1785 ++{
1786 ++ struct inode *loweri;
1787 ++
1788 ++ inode->i_ino = ino;
1789 ++ inode->i_flags |= S_NOCMTIME;
1790 ++
1791 ++ mode &= S_IFMT;
1792 ++ inode->i_mode = mode;
1793 ++ switch (mode & S_IFMT) {
1794 ++ case S_IFDIR:
1795 ++ inode->i_op = &shiftfs_dir_inode_operations;
1796 ++ inode->i_fop = &shiftfs_dir_operations;
1797 ++ break;
1798 ++ case S_IFLNK:
1799 ++ inode->i_op = &shiftfs_symlink_inode_operations;
1800 ++ break;
1801 ++ case S_IFREG:
1802 ++ inode->i_op = &shiftfs_file_inode_operations;
1803 ++ inode->i_fop = &shiftfs_file_operations;
1804 ++ inode->i_mapping->a_ops = &shiftfs_aops;
1805 ++ break;
1806 ++ default:
1807 ++ inode->i_op = &shiftfs_special_inode_operations;
1808 ++ init_special_inode(inode, mode, dev);
1809 ++ break;
1810 ++ }
1811 ++
1812 ++ if (!dentry)
1813 ++ return;
1814 ++
1815 ++ loweri = dentry->d_inode;
1816 ++ if (!loweri->i_op->get_link)
1817 ++ inode->i_opflags |= IOP_NOFOLLOW;
1818 ++
1819 ++ shiftfs_copyattr(loweri, inode);
1820 ++ shiftfs_copyflags(loweri, inode);
1821 ++ set_nlink(inode, loweri->i_nlink);
1822 ++}
1823 ++
1824 ++static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry)
1825 ++{
1826 ++ struct super_block *sb = dentry->d_sb;
1827 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1828 ++
1829 ++ if (sbinfo->mark)
1830 ++ seq_show_option(m, "mark", NULL);
1831 ++
1832 ++ if (sbinfo->passthrough)
1833 ++ seq_printf(m, ",passthrough=%u", sbinfo->passthrough);
1834 ++
1835 ++ return 0;
1836 ++}
1837 ++
1838 ++static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1839 ++{
1840 ++ struct super_block *sb = dentry->d_sb;
1841 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1842 ++ struct dentry *root = sb->s_root;
1843 ++ struct dentry *realroot = root->d_fsdata;
1844 ++ struct path realpath = { .mnt = sbinfo->mnt, .dentry = realroot };
1845 ++ int err;
1846 ++
1847 ++ err = vfs_statfs(&realpath, buf);
1848 ++ if (err)
1849 ++ return err;
1850 ++
1851 ++ if (!shiftfs_passthrough_statfs(sbinfo))
1852 ++ buf->f_type = sb->s_magic;
1853 ++
1854 ++ return 0;
1855 ++}
1856 ++
1857 ++static void shiftfs_evict_inode(struct inode *inode)
1858 ++{
1859 ++ struct inode *loweri = inode->i_private;
1860 ++
1861 ++ clear_inode(inode);
1862 ++
1863 ++ if (loweri)
1864 ++ iput(loweri);
1865 ++}
1866 ++
1867 ++static void shiftfs_put_super(struct super_block *sb)
1868 ++{
1869 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1870 ++
1871 ++ if (sbinfo) {
1872 ++ mntput(sbinfo->mnt);
1873 ++ put_cred(sbinfo->creator_cred);
1874 ++ kfree(sbinfo);
1875 ++ }
1876 ++}
1877 ++
1878 ++static const struct xattr_handler shiftfs_xattr_handler = {
1879 ++ .prefix = "",
1880 ++ .get = shiftfs_xattr_get,
1881 ++ .set = shiftfs_xattr_set,
1882 ++};
1883 ++
1884 ++const struct xattr_handler *shiftfs_xattr_handlers[] = {
1885 ++#ifdef CONFIG_SHIFT_FS_POSIX_ACL
1886 ++ &shiftfs_posix_acl_access_xattr_handler,
1887 ++ &shiftfs_posix_acl_default_xattr_handler,
1888 ++#endif
1889 ++ &shiftfs_xattr_handler,
1890 ++ NULL
1891 ++};
1892 ++
1893 ++static inline bool passthrough_is_subset(int old_flags, int new_flags)
1894 ++{
1895 ++ if ((new_flags & old_flags) != new_flags)
1896 ++ return false;
1897 ++
1898 ++ return true;
1899 ++}
1900 ++
1901 ++static int shiftfs_super_check_flags(unsigned long old_flags,
1902 ++ unsigned long new_flags)
1903 ++{
1904 ++ if ((old_flags & SB_RDONLY) && !(new_flags & SB_RDONLY))
1905 ++ return -EPERM;
1906 ++
1907 ++ if ((old_flags & SB_NOSUID) && !(new_flags & SB_NOSUID))
1908 ++ return -EPERM;
1909 ++
1910 ++ if ((old_flags & SB_NODEV) && !(new_flags & SB_NODEV))
1911 ++ return -EPERM;
1912 ++
1913 ++ if ((old_flags & SB_NOEXEC) && !(new_flags & SB_NOEXEC))
1914 ++ return -EPERM;
1915 ++
1916 ++ if ((old_flags & SB_NOATIME) && !(new_flags & SB_NOATIME))
1917 ++ return -EPERM;
1918 ++
1919 ++ if ((old_flags & SB_NODIRATIME) && !(new_flags & SB_NODIRATIME))
1920 ++ return -EPERM;
1921 ++
1922 ++ if (!(old_flags & SB_POSIXACL) && (new_flags & SB_POSIXACL))
1923 ++ return -EPERM;
1924 ++
1925 ++ return 0;
1926 ++}
1927 ++
1928 ++static int shiftfs_remount(struct super_block *sb, int *flags, char *data)
1929 ++{
1930 ++ int err;
1931 ++ struct shiftfs_super_info new = {};
1932 ++ struct shiftfs_super_info *info = sb->s_fs_info;
1933 ++
1934 ++ err = shiftfs_parse_mount_options(&new, data);
1935 ++ if (err)
1936 ++ return err;
1937 ++
1938 ++ err = shiftfs_super_check_flags(sb->s_flags, *flags);
1939 ++ if (err)
1940 ++ return err;
1941 ++
1942 ++ /* Mark mount option cannot be changed. */
1943 ++ if (info->mark || (info->mark != new.mark))
1944 ++ return -EPERM;
1945 ++
1946 ++ if (info->passthrough != new.passthrough) {
1947 ++ /* Don't allow exceeding passthrough options of mark mount. */
1948 ++ if (!passthrough_is_subset(info->passthrough_mark,
1949 ++ info->passthrough))
1950 ++ return -EPERM;
1951 ++
1952 ++ info->passthrough = new.passthrough;
1953 ++ }
1954 ++
1955 ++ return 0;
1956 ++}
1957 ++
1958 ++static const struct super_operations shiftfs_super_ops = {
1959 ++ .put_super = shiftfs_put_super,
1960 ++ .show_options = shiftfs_show_options,
1961 ++ .statfs = shiftfs_statfs,
1962 ++ .remount_fs = shiftfs_remount,
1963 ++ .evict_inode = shiftfs_evict_inode,
1964 ++};
1965 ++
1966 ++struct shiftfs_data {
1967 ++ void *data;
1968 ++ const char *path;
1969 ++};
1970 ++
1971 ++static void shiftfs_super_force_flags(struct super_block *sb,
1972 ++ unsigned long lower_flags)
1973 ++{
1974 ++ sb->s_flags |= lower_flags & (SB_RDONLY | SB_NOSUID | SB_NODEV |
1975 ++ SB_NOEXEC | SB_NOATIME | SB_NODIRATIME);
1976 ++
1977 ++ if (!(lower_flags & SB_POSIXACL))
1978 ++ sb->s_flags &= ~SB_POSIXACL;
1979 ++}
1980 ++
1981 ++static int shiftfs_fill_super(struct super_block *sb, void *raw_data,
1982 ++ int silent)
1983 ++{
1984 ++ int err;
1985 ++ struct path path = {};
1986 ++ struct shiftfs_super_info *sbinfo_mp;
1987 ++ char *name = NULL;
1988 ++ struct inode *inode = NULL;
1989 ++ struct dentry *dentry = NULL;
1990 ++ struct shiftfs_data *data = raw_data;
1991 ++ struct shiftfs_super_info *sbinfo = NULL;
1992 ++
1993 ++ if (!data->path)
1994 ++ return -EINVAL;
1995 ++
1996 ++ sb->s_fs_info = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
1997 ++ if (!sb->s_fs_info)
1998 ++ return -ENOMEM;
1999 ++ sbinfo = sb->s_fs_info;
2000 ++
2001 ++ err = shiftfs_parse_mount_options(sbinfo, data->data);
2002 ++ if (err)
2003 ++ return err;
2004 ++
2005 ++ /* to mount a mark, must be userns admin */
2006 ++ if (!sbinfo->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
2007 ++ return -EPERM;
2008 ++
2009 ++ name = kstrdup(data->path, GFP_KERNEL);
2010 ++ if (!name)
2011 ++ return -ENOMEM;
2012 ++
2013 ++ err = kern_path(name, LOOKUP_FOLLOW, &path);
2014 ++ if (err)
2015 ++ goto out_free_name;
2016 ++
2017 ++ if (!S_ISDIR(path.dentry->d_inode->i_mode)) {
2018 ++ err = -ENOTDIR;
2019 ++ goto out_put_path;
2020 ++ }
2021 ++
2022 ++ sb->s_flags |= SB_POSIXACL;
2023 ++
2024 ++ if (sbinfo->mark) {
2025 ++ struct cred *cred_tmp;
2026 ++ struct super_block *lower_sb = path.mnt->mnt_sb;
2027 ++
2028 ++ /* to mark a mount point, must root wrt lower s_user_ns */
2029 ++ if (!ns_capable(lower_sb->s_user_ns, CAP_SYS_ADMIN)) {
2030 ++ err = -EPERM;
2031 ++ goto out_put_path;
2032 ++ }
2033 ++
2034 ++ /*
2035 ++ * this part is visible unshifted, so make sure no
2036 ++ * executables that could be used to give suid
2037 ++ * privileges
2038 ++ */
2039 ++ sb->s_iflags = SB_I_NOEXEC;
2040 ++
2041 ++ shiftfs_super_force_flags(sb, lower_sb->s_flags);
2042 ++
2043 ++ /*
2044 ++ * Handle nesting of shiftfs mounts by referring this mark
2045 ++ * mount back to the original mark mount. This is more
2046 ++ * efficient and alleviates concerns about stack depth.
2047 ++ */
2048 ++ if (lower_sb->s_magic == SHIFTFS_MAGIC) {
2049 ++ sbinfo_mp = lower_sb->s_fs_info;
2050 ++
2051 ++ /* Doesn't make sense to mark a mark mount */
2052 ++ if (sbinfo_mp->mark) {
2053 ++ err = -EINVAL;
2054 ++ goto out_put_path;
2055 ++ }
2056 ++
2057 ++ if (!passthrough_is_subset(sbinfo_mp->passthrough,
2058 ++ sbinfo->passthrough)) {
2059 ++ err = -EPERM;
2060 ++ goto out_put_path;
2061 ++ }
2062 ++
2063 ++ sbinfo->mnt = mntget(sbinfo_mp->mnt);
2064 ++ dentry = dget(path.dentry->d_fsdata);
2065 ++ /*
2066 ++ * Copy up the passthrough mount options from the
2067 ++ * parent mark mountpoint.
2068 ++ */
2069 ++ sbinfo->passthrough_mark = sbinfo_mp->passthrough_mark;
2070 ++ sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
2071 ++ } else {
2072 ++ sbinfo->mnt = mntget(path.mnt);
2073 ++ dentry = dget(path.dentry);
2074 ++ /*
2075 ++ * For a new mark passthrough_mark and passthrough
2076 ++ * are identical.
2077 ++ */
2078 ++ sbinfo->passthrough_mark = sbinfo->passthrough;
2079 ++
2080 ++ cred_tmp = prepare_creds();
2081 ++ if (!cred_tmp) {
2082 ++ err = -ENOMEM;
2083 ++ goto out_put_path;
2084 ++ }
2085 ++ /* Don't override disk quota limits or use reserved space. */
2086 ++ cap_lower(cred_tmp->cap_effective, CAP_SYS_RESOURCE);
2087 ++ sbinfo->creator_cred = cred_tmp;
2088 ++ }
2089 ++ } else {
2090 ++ /*
2091 ++ * This leg executes if we're admin capable in the namespace,
2092 ++ * so be very careful.
2093 ++ */
2094 ++ err = -EPERM;
2095 ++ if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC)
2096 ++ goto out_put_path;
2097 ++
2098 ++ sbinfo_mp = path.dentry->d_sb->s_fs_info;
2099 ++ if (!sbinfo_mp->mark)
2100 ++ goto out_put_path;
2101 ++
2102 ++ if (!passthrough_is_subset(sbinfo_mp->passthrough,
2103 ++ sbinfo->passthrough))
2104 ++ goto out_put_path;
2105 ++
2106 ++ sbinfo->mnt = mntget(sbinfo_mp->mnt);
2107 ++ sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
2108 ++ dentry = dget(path.dentry->d_fsdata);
2109 ++ /*
2110 ++ * Copy up passthrough settings from mark mountpoint so we can
2111 ++ * verify when the overlay wants to remount with different
2112 ++ * passthrough settings.
2113 ++ */
2114 ++ sbinfo->passthrough_mark = sbinfo_mp->passthrough;
2115 ++ shiftfs_super_force_flags(sb, path.mnt->mnt_sb->s_flags);
2116 ++ }
2117 ++
2118 ++ sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1;
2119 ++ if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
2120 ++ printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n");
2121 ++ err = -EINVAL;
2122 ++ goto out_put_path;
2123 ++ }
2124 ++
2125 ++ inode = new_inode(sb);
2126 ++ if (!inode) {
2127 ++ err = -ENOMEM;
2128 ++ goto out_put_path;
2129 ++ }
2130 ++ shiftfs_fill_inode(inode, dentry->d_inode->i_ino, S_IFDIR, 0, dentry);
2131 ++
2132 ++ ihold(dentry->d_inode);
2133 ++ inode->i_private = dentry->d_inode;
2134 ++
2135 ++ sb->s_magic = SHIFTFS_MAGIC;
2136 ++ sb->s_maxbytes = MAX_LFS_FILESIZE;
2137 ++ sb->s_op = &shiftfs_super_ops;
2138 ++ sb->s_xattr = shiftfs_xattr_handlers;
2139 ++ sb->s_d_op = &shiftfs_dentry_ops;
2140 ++ sb->s_root = d_make_root(inode);
2141 ++ if (!sb->s_root) {
2142 ++ err = -ENOMEM;
2143 ++ goto out_put_path;
2144 ++ }
2145 ++
2146 ++ sb->s_root->d_fsdata = dentry;
2147 ++ sbinfo->userns = get_user_ns(dentry->d_sb->s_user_ns);
2148 ++ shiftfs_copyattr(dentry->d_inode, sb->s_root->d_inode);
2149 ++
2150 ++ dentry = NULL;
2151 ++ err = 0;
2152 ++
2153 ++out_put_path:
2154 ++ path_put(&path);
2155 ++
2156 ++out_free_name:
2157 ++ kfree(name);
2158 ++
2159 ++ dput(dentry);
2160 ++
2161 ++ return err;
2162 ++}
2163 ++
2164 ++static struct dentry *shiftfs_mount(struct file_system_type *fs_type,
2165 ++ int flags, const char *dev_name, void *data)
2166 ++{
2167 ++ struct shiftfs_data d = { data, dev_name };
2168 ++
2169 ++ return mount_nodev(fs_type, flags, &d, shiftfs_fill_super);
2170 ++}
2171 ++
2172 ++static struct file_system_type shiftfs_type = {
2173 ++ .owner = THIS_MODULE,
2174 ++ .name = "shiftfs",
2175 ++ .mount = shiftfs_mount,
2176 ++ .kill_sb = kill_anon_super,
2177 ++ .fs_flags = FS_USERNS_MOUNT,
2178 ++};
2179 ++
2180 ++static int __init shiftfs_init(void)
2181 ++{
2182 ++ return register_filesystem(&shiftfs_type);
2183 ++}
2184 ++
2185 ++static void __exit shiftfs_exit(void)
2186 ++{
2187 ++ unregister_filesystem(&shiftfs_type);
2188 ++}
2189 ++
2190 ++MODULE_ALIAS_FS("shiftfs");
2191 ++MODULE_AUTHOR("James Bottomley");
2192 ++MODULE_AUTHOR("Seth Forshee <seth.forshee@×××××××××.com>");
2193 ++MODULE_AUTHOR("Christian Brauner <christian.brauner@××××××.com>");
2194 ++MODULE_DESCRIPTION("id shifting filesystem");
2195 ++MODULE_LICENSE("GPL v2");
2196 ++module_init(shiftfs_init)
2197 ++module_exit(shiftfs_exit)
2198 +--- a/include/uapi/linux/magic.h 2021-01-06 19:08:45.234777659 -0500
2199 ++++ b/include/uapi/linux/magic.h 2021-01-06 19:09:53.900375394 -0500
2200 +@@ -96,4 +96,6 @@
2201 + #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */
2202 + #define Z3FOLD_MAGIC 0x33
2203 +
2204 ++#define SHIFTFS_MAGIC 0x6a656a62
2205 ++
2206 + #endif /* __LINUX_MAGIC_H__ */
2207 +--- a/fs/Makefile 2021-02-27 09:04:17.727589780 -0500
2208 ++++ b/fs/Makefile 2021-02-27 09:06:07.620915657 -0500
2209 +@@ -132,3 +132,4 @@ obj-$(CONFIG_CEPH_FS) += ceph/
2210 + obj-$(CONFIG_PSTORE) += pstore/
2211 + obj-$(CONFIG_EFIVAR_FS) += efivarfs/
2212 + obj-$(CONFIG_EROFS_FS) += erofs/
2213 ++obj-$(CONFIG_SHIFT_FS) += shiftfs.o
2214 +--- a/fs/Kconfig 2021-01-06 19:14:17.709697891 -0500
2215 ++++ b/fs/Kconfig 2021-01-06 19:15:23.413281282 -0500
2216 +@@ -122,6 +122,24 @@ source "fs/autofs/Kconfig"
2217 + source "fs/fuse/Kconfig"
2218 + source "fs/overlayfs/Kconfig"
2219 +
2220 ++config SHIFT_FS
2221 ++ tristate "UID/GID shifting overlay filesystem for containers"
2222 ++ help
2223 ++ This filesystem can overlay any mounted filesystem and shift
2224 ++ the uid/gid the files appear at. The idea is that
2225 ++ unprivileged containers can use this to mount root volumes
2226 ++ using this technique.
2227 ++
2228 ++config SHIFT_FS_POSIX_ACL
2229 ++ bool "shiftfs POSIX Access Control Lists"
2230 ++ depends on SHIFT_FS
2231 ++ select FS_POSIX_ACL
2232 ++ help
2233 ++ POSIX Access Control Lists (ACLs) support permissions for users and
2234 ++ groups beyond the owner/group/world scheme.
2235 ++
2236 ++ If you don't know what Access Control Lists are, say N.
2237 ++
2238 + menu "Caches"
2239 +
2240 + source "fs/fscache/Kconfig"