Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:5.10 commit in: /
Date: Sat, 09 Jan 2021 00:14:53
Message-Id: 1610151265.52fcea92547bfbe61325bee94a23e801d4a80453.mpagano@gentoo
1 commit: 52fcea92547bfbe61325bee94a23e801d4a80453
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Sat Jan 9 00:14:25 2021 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Sat Jan 9 00:14:25 2021 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=52fcea92
7
8 Add support for shiftfs
9
10 UID/GID shifting overlay filesystem for containers
11
12 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>
13
14 0000_README | 4 +
15 5000_shifts-ubuntu-20.04.patch | 2203 ++++++++++++++++++++++++++++++++++++++++
16 2 files changed, 2207 insertions(+)
17
18 diff --git a/0000_README b/0000_README
19 index 53642e2..2fb3d39 100644
20 --- a/0000_README
21 +++ b/0000_README
22 @@ -87,6 +87,10 @@ Patch: 4567_distro-Gentoo-Kconfig.patch
23 From: Tom Wijsman <TomWij@g.o>
24 Desc: Add Gentoo Linux support config settings and defaults.
25
26 +Patch: 5000_shifts-ubuntu-20.04.patch
27 +From: https://git.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux/+git/focal
28 +Desc: UID/GID shifting overlay filesystem for containers
29 +
30 Patch: 5013_enable-cpu-optimizations-for-gcc10.patch
31 From: https://github.com/graysky2/kernel_gcc_patch/
32 Desc: Kernel patch enables gcc = v10.1+ optimizations for additional CPUs.
33
34 diff --git a/5000_shifts-ubuntu-20.04.patch b/5000_shifts-ubuntu-20.04.patch
35 new file mode 100644
36 index 0000000..665fc66
37 --- /dev/null
38 +++ b/5000_shifts-ubuntu-20.04.patch
39 @@ -0,0 +1,2203 @@
40 +--- /dev/null 2021-01-08 13:33:13.190303432 -0500
41 ++++ b/fs/shiftfs.c 2021-01-08 19:02:40.000000000 -0500
42 +@@ -0,0 +1,2157 @@
43 ++#include <linux/btrfs.h>
44 ++#include <linux/capability.h>
45 ++#include <linux/cred.h>
46 ++#include <linux/mount.h>
47 ++#include <linux/fdtable.h>
48 ++#include <linux/file.h>
49 ++#include <linux/fs.h>
50 ++#include <linux/namei.h>
51 ++#include <linux/module.h>
52 ++#include <linux/kernel.h>
53 ++#include <linux/magic.h>
54 ++#include <linux/parser.h>
55 ++#include <linux/security.h>
56 ++#include <linux/seq_file.h>
57 ++#include <linux/statfs.h>
58 ++#include <linux/slab.h>
59 ++#include <linux/user_namespace.h>
60 ++#include <linux/uidgid.h>
61 ++#include <linux/xattr.h>
62 ++#include <linux/posix_acl.h>
63 ++#include <linux/posix_acl_xattr.h>
64 ++#include <linux/uio.h>
65 ++#include <linux/fiemap.h>
66 ++
67 ++struct shiftfs_super_info {
68 ++ struct vfsmount *mnt;
69 ++ struct user_namespace *userns;
70 ++ /* creds of process who created the super block */
71 ++ const struct cred *creator_cred;
72 ++ bool mark;
73 ++ unsigned int passthrough;
74 ++ unsigned int passthrough_mark;
75 ++};
76 ++
77 ++static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
78 ++ umode_t mode, dev_t dev, struct dentry *dentry);
79 ++
80 ++#define SHIFTFS_PASSTHROUGH_NONE 0
81 ++#define SHIFTFS_PASSTHROUGH_STAT 1
82 ++#define SHIFTFS_PASSTHROUGH_IOCTL 2
83 ++#define SHIFTFS_PASSTHROUGH_ALL \
84 ++ (SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL)
85 ++
86 ++static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info)
87 ++{
88 ++ if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL))
89 ++ return false;
90 ++
91 ++ return true;
92 ++}
93 ++
94 ++static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info)
95 ++{
96 ++ if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT))
97 ++ return false;
98 ++
99 ++ return true;
100 ++}
101 ++
102 ++enum {
103 ++ OPT_MARK,
104 ++ OPT_PASSTHROUGH,
105 ++ OPT_LAST,
106 ++};
107 ++
108 ++/* global filesystem options */
109 ++static const match_table_t tokens = {
110 ++ { OPT_MARK, "mark" },
111 ++ { OPT_PASSTHROUGH, "passthrough=%u" },
112 ++ { OPT_LAST, NULL }
113 ++};
114 ++
115 ++static const struct cred *shiftfs_override_creds(const struct super_block *sb)
116 ++{
117 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
118 ++
119 ++ return override_creds(sbinfo->creator_cred);
120 ++}
121 ++
122 ++static inline void shiftfs_revert_object_creds(const struct cred *oldcred,
123 ++ struct cred *newcred)
124 ++{
125 ++ revert_creds(oldcred);
126 ++ put_cred(newcred);
127 ++}
128 ++
129 ++static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to,
130 ++ kuid_t kuid)
131 ++{
132 ++ uid_t uid = from_kuid(from, kuid);
133 ++ return make_kuid(to, uid);
134 ++}
135 ++
136 ++static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to,
137 ++ kgid_t kgid)
138 ++{
139 ++ gid_t gid = from_kgid(from, kgid);
140 ++ return make_kgid(to, gid);
141 ++}
142 ++
143 ++static int shiftfs_override_object_creds(const struct super_block *sb,
144 ++ const struct cred **oldcred,
145 ++ struct cred **newcred,
146 ++ struct dentry *dentry, umode_t mode,
147 ++ bool hardlink)
148 ++{
149 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
150 ++ kuid_t fsuid = current_fsuid();
151 ++ kgid_t fsgid = current_fsgid();
152 ++
153 ++ *oldcred = shiftfs_override_creds(sb);
154 ++
155 ++ *newcred = prepare_creds();
156 ++ if (!*newcred) {
157 ++ revert_creds(*oldcred);
158 ++ return -ENOMEM;
159 ++ }
160 ++
161 ++ (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
162 ++ (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
163 ++
164 ++ if (!hardlink) {
165 ++ int err = security_dentry_create_files_as(dentry, mode,
166 ++ &dentry->d_name,
167 ++ *oldcred, *newcred);
168 ++ if (err) {
169 ++ shiftfs_revert_object_creds(*oldcred, *newcred);
170 ++ return err;
171 ++ }
172 ++ }
173 ++
174 ++ put_cred(override_creds(*newcred));
175 ++ return 0;
176 ++}
177 ++
178 ++static void shiftfs_copyattr(struct inode *from, struct inode *to)
179 ++{
180 ++ struct user_namespace *from_ns = from->i_sb->s_user_ns;
181 ++ struct user_namespace *to_ns = to->i_sb->s_user_ns;
182 ++
183 ++ to->i_uid = shift_kuid(from_ns, to_ns, from->i_uid);
184 ++ to->i_gid = shift_kgid(from_ns, to_ns, from->i_gid);
185 ++ to->i_mode = from->i_mode;
186 ++ to->i_atime = from->i_atime;
187 ++ to->i_mtime = from->i_mtime;
188 ++ to->i_ctime = from->i_ctime;
189 ++ i_size_write(to, i_size_read(from));
190 ++}
191 ++
192 ++static void shiftfs_copyflags(struct inode *from, struct inode *to)
193 ++{
194 ++ unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
195 ++
196 ++ inode_set_flags(to, from->i_flags & mask, mask);
197 ++}
198 ++
199 ++static void shiftfs_file_accessed(struct file *file)
200 ++{
201 ++ struct inode *upperi, *loweri;
202 ++
203 ++ if (file->f_flags & O_NOATIME)
204 ++ return;
205 ++
206 ++ upperi = file_inode(file);
207 ++ loweri = upperi->i_private;
208 ++
209 ++ if (!loweri)
210 ++ return;
211 ++
212 ++ upperi->i_mtime = loweri->i_mtime;
213 ++ upperi->i_ctime = loweri->i_ctime;
214 ++
215 ++ touch_atime(&file->f_path);
216 ++}
217 ++
218 ++static int shiftfs_parse_mount_options(struct shiftfs_super_info *sbinfo,
219 ++ char *options)
220 ++{
221 ++ char *p;
222 ++ substring_t args[MAX_OPT_ARGS];
223 ++
224 ++ sbinfo->mark = false;
225 ++ sbinfo->passthrough = 0;
226 ++
227 ++ while ((p = strsep(&options, ",")) != NULL) {
228 ++ int err, intarg, token;
229 ++
230 ++ if (!*p)
231 ++ continue;
232 ++
233 ++ token = match_token(p, tokens, args);
234 ++ switch (token) {
235 ++ case OPT_MARK:
236 ++ sbinfo->mark = true;
237 ++ break;
238 ++ case OPT_PASSTHROUGH:
239 ++ err = match_int(&args[0], &intarg);
240 ++ if (err)
241 ++ return err;
242 ++
243 ++ if (intarg & ~SHIFTFS_PASSTHROUGH_ALL)
244 ++ return -EINVAL;
245 ++
246 ++ sbinfo->passthrough = intarg;
247 ++ break;
248 ++ default:
249 ++ return -EINVAL;
250 ++ }
251 ++ }
252 ++
253 ++ return 0;
254 ++}
255 ++
256 ++static void shiftfs_d_release(struct dentry *dentry)
257 ++{
258 ++ struct dentry *lowerd = dentry->d_fsdata;
259 ++
260 ++ if (lowerd)
261 ++ dput(lowerd);
262 ++}
263 ++
264 ++static struct dentry *shiftfs_d_real(struct dentry *dentry,
265 ++ const struct inode *inode)
266 ++{
267 ++ struct dentry *lowerd = dentry->d_fsdata;
268 ++
269 ++ if (inode && d_inode(dentry) == inode)
270 ++ return dentry;
271 ++
272 ++ lowerd = d_real(lowerd, inode);
273 ++ if (lowerd && (!inode || inode == d_inode(lowerd)))
274 ++ return lowerd;
275 ++
276 ++ WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
277 ++ inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
278 ++ return dentry;
279 ++}
280 ++
281 ++static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags)
282 ++{
283 ++ int err = 1;
284 ++ struct dentry *lowerd = dentry->d_fsdata;
285 ++
286 ++ if (d_is_negative(lowerd) != d_is_negative(dentry))
287 ++ return 0;
288 ++
289 ++ if ((lowerd->d_flags & DCACHE_OP_WEAK_REVALIDATE))
290 ++ err = lowerd->d_op->d_weak_revalidate(lowerd, flags);
291 ++
292 ++ if (d_really_is_positive(dentry)) {
293 ++ struct inode *inode = d_inode(dentry);
294 ++ struct inode *loweri = d_inode(lowerd);
295 ++
296 ++ shiftfs_copyattr(loweri, inode);
297 ++ }
298 ++
299 ++ return err;
300 ++}
301 ++
302 ++static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags)
303 ++{
304 ++ int err = 1;
305 ++ struct dentry *lowerd = dentry->d_fsdata;
306 ++
307 ++ if (d_unhashed(lowerd) ||
308 ++ ((d_is_negative(lowerd) != d_is_negative(dentry))))
309 ++ return 0;
310 ++
311 ++ if (flags & LOOKUP_RCU)
312 ++ return -ECHILD;
313 ++
314 ++ if ((lowerd->d_flags & DCACHE_OP_REVALIDATE))
315 ++ err = lowerd->d_op->d_revalidate(lowerd, flags);
316 ++
317 ++ if (d_really_is_positive(dentry)) {
318 ++ struct inode *inode = d_inode(dentry);
319 ++ struct inode *loweri = d_inode(lowerd);
320 ++
321 ++ shiftfs_copyattr(loweri, inode);
322 ++ }
323 ++
324 ++ return err;
325 ++}
326 ++
327 ++static const struct dentry_operations shiftfs_dentry_ops = {
328 ++ .d_release = shiftfs_d_release,
329 ++ .d_real = shiftfs_d_real,
330 ++ .d_revalidate = shiftfs_d_revalidate,
331 ++ .d_weak_revalidate = shiftfs_d_weak_revalidate,
332 ++};
333 ++
334 ++static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode,
335 ++ struct delayed_call *done)
336 ++{
337 ++ const char *p;
338 ++ const struct cred *oldcred;
339 ++ struct dentry *lowerd;
340 ++
341 ++ /* RCU lookup not supported */
342 ++ if (!dentry)
343 ++ return ERR_PTR(-ECHILD);
344 ++
345 ++ lowerd = dentry->d_fsdata;
346 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
347 ++ p = vfs_get_link(lowerd, done);
348 ++ revert_creds(oldcred);
349 ++
350 ++ return p;
351 ++}
352 ++
353 ++static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode,
354 ++ const char *name, const void *value,
355 ++ size_t size, int flags)
356 ++{
357 ++ struct dentry *lowerd = dentry->d_fsdata;
358 ++ int err;
359 ++ const struct cred *oldcred;
360 ++
361 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
362 ++ err = vfs_setxattr(lowerd, name, value, size, flags);
363 ++ revert_creds(oldcred);
364 ++
365 ++ shiftfs_copyattr(lowerd->d_inode, inode);
366 ++
367 ++ return err;
368 ++}
369 ++
370 ++static int shiftfs_xattr_get(const struct xattr_handler *handler,
371 ++ struct dentry *dentry, struct inode *inode,
372 ++ const char *name, void *value, size_t size)
373 ++{
374 ++ struct dentry *lowerd = dentry->d_fsdata;
375 ++ int err;
376 ++ const struct cred *oldcred;
377 ++
378 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
379 ++ err = vfs_getxattr(lowerd, name, value, size);
380 ++ revert_creds(oldcred);
381 ++
382 ++ return err;
383 ++}
384 ++
385 ++static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list,
386 ++ size_t size)
387 ++{
388 ++ struct dentry *lowerd = dentry->d_fsdata;
389 ++ int err;
390 ++ const struct cred *oldcred;
391 ++
392 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
393 ++ err = vfs_listxattr(lowerd, list, size);
394 ++ revert_creds(oldcred);
395 ++
396 ++ return err;
397 ++}
398 ++
399 ++static int shiftfs_removexattr(struct dentry *dentry, const char *name)
400 ++{
401 ++ struct dentry *lowerd = dentry->d_fsdata;
402 ++ int err;
403 ++ const struct cred *oldcred;
404 ++
405 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
406 ++ err = vfs_removexattr(lowerd, name);
407 ++ revert_creds(oldcred);
408 ++
409 ++ /* update c/mtime */
410 ++ shiftfs_copyattr(lowerd->d_inode, d_inode(dentry));
411 ++
412 ++ return err;
413 ++}
414 ++
415 ++static int shiftfs_xattr_set(const struct xattr_handler *handler,
416 ++ struct dentry *dentry, struct inode *inode,
417 ++ const char *name, const void *value, size_t size,
418 ++ int flags)
419 ++{
420 ++ if (!value)
421 ++ return shiftfs_removexattr(dentry, name);
422 ++ return shiftfs_setxattr(dentry, inode, name, value, size, flags);
423 ++}
424 ++
425 ++static int shiftfs_inode_test(struct inode *inode, void *data)
426 ++{
427 ++ return inode->i_private == data;
428 ++}
429 ++
430 ++static int shiftfs_inode_set(struct inode *inode, void *data)
431 ++{
432 ++ inode->i_private = data;
433 ++ return 0;
434 ++}
435 ++
436 ++static int shiftfs_create_object(struct inode *diri, struct dentry *dentry,
437 ++ umode_t mode, const char *symlink,
438 ++ struct dentry *hardlink, bool excl)
439 ++{
440 ++ int err;
441 ++ const struct cred *oldcred;
442 ++ struct cred *newcred;
443 ++ void *loweri_iop_ptr = NULL;
444 ++ umode_t modei = mode;
445 ++ struct super_block *dir_sb = diri->i_sb;
446 ++ struct dentry *lowerd_new = dentry->d_fsdata;
447 ++ struct inode *inode = NULL, *loweri_dir = diri->i_private;
448 ++ const struct inode_operations *loweri_dir_iop = loweri_dir->i_op;
449 ++ struct dentry *lowerd_link = NULL;
450 ++
451 ++ if (hardlink) {
452 ++ loweri_iop_ptr = loweri_dir_iop->link;
453 ++ } else {
454 ++ switch (mode & S_IFMT) {
455 ++ case S_IFDIR:
456 ++ loweri_iop_ptr = loweri_dir_iop->mkdir;
457 ++ break;
458 ++ case S_IFREG:
459 ++ loweri_iop_ptr = loweri_dir_iop->create;
460 ++ break;
461 ++ case S_IFLNK:
462 ++ loweri_iop_ptr = loweri_dir_iop->symlink;
463 ++ break;
464 ++ case S_IFSOCK:
465 ++ /* fall through */
466 ++ case S_IFIFO:
467 ++ loweri_iop_ptr = loweri_dir_iop->mknod;
468 ++ break;
469 ++ }
470 ++ }
471 ++ if (!loweri_iop_ptr) {
472 ++ err = -EINVAL;
473 ++ goto out_iput;
474 ++ }
475 ++
476 ++ inode_lock_nested(loweri_dir, I_MUTEX_PARENT);
477 ++
478 ++ if (!hardlink) {
479 ++ inode = new_inode(dir_sb);
480 ++ if (!inode) {
481 ++ err = -ENOMEM;
482 ++ goto out_iput;
483 ++ }
484 ++
485 ++ /*
486 ++ * new_inode() will have added the new inode to the super
487 ++ * block's list of inodes. Further below we will call
488 ++ * inode_insert5() Which would perform the same operation again
489 ++ * thereby corrupting the list. To avoid this raise I_CREATING
490 ++ * in i_state which will cause inode_insert5() to skip this
491 ++ * step. I_CREATING will be cleared by d_instantiate_new()
492 ++ * below.
493 ++ */
494 ++ spin_lock(&inode->i_lock);
495 ++ inode->i_state |= I_CREATING;
496 ++ spin_unlock(&inode->i_lock);
497 ++
498 ++ inode_init_owner(inode, diri, mode);
499 ++ modei = inode->i_mode;
500 ++ }
501 ++
502 ++ err = shiftfs_override_object_creds(dentry->d_sb, &oldcred, &newcred,
503 ++ dentry, modei, hardlink != NULL);
504 ++ if (err)
505 ++ goto out_iput;
506 ++
507 ++ if (hardlink) {
508 ++ lowerd_link = hardlink->d_fsdata;
509 ++ err = vfs_link(lowerd_link, loweri_dir, lowerd_new, NULL);
510 ++ } else {
511 ++ switch (modei & S_IFMT) {
512 ++ case S_IFDIR:
513 ++ err = vfs_mkdir(loweri_dir, lowerd_new, modei);
514 ++ break;
515 ++ case S_IFREG:
516 ++ err = vfs_create(loweri_dir, lowerd_new, modei, excl);
517 ++ break;
518 ++ case S_IFLNK:
519 ++ err = vfs_symlink(loweri_dir, lowerd_new, symlink);
520 ++ break;
521 ++ case S_IFSOCK:
522 ++ /* fall through */
523 ++ case S_IFIFO:
524 ++ err = vfs_mknod(loweri_dir, lowerd_new, modei, 0);
525 ++ break;
526 ++ default:
527 ++ err = -EINVAL;
528 ++ break;
529 ++ }
530 ++ }
531 ++
532 ++ shiftfs_revert_object_creds(oldcred, newcred);
533 ++
534 ++ if (!err && WARN_ON(!lowerd_new->d_inode))
535 ++ err = -EIO;
536 ++ if (err)
537 ++ goto out_iput;
538 ++
539 ++ if (hardlink) {
540 ++ inode = d_inode(hardlink);
541 ++ ihold(inode);
542 ++
543 ++ /* copy up times from lower inode */
544 ++ shiftfs_copyattr(d_inode(lowerd_link), inode);
545 ++ set_nlink(d_inode(hardlink), d_inode(lowerd_link)->i_nlink);
546 ++ d_instantiate(dentry, inode);
547 ++ } else {
548 ++ struct inode *inode_tmp;
549 ++ struct inode *loweri_new = d_inode(lowerd_new);
550 ++
551 ++ inode_tmp = inode_insert5(inode, (unsigned long)loweri_new,
552 ++ shiftfs_inode_test, shiftfs_inode_set,
553 ++ loweri_new);
554 ++ if (unlikely(inode_tmp != inode)) {
555 ++ pr_err_ratelimited("shiftfs: newly created inode found in cache\n");
556 ++ iput(inode_tmp);
557 ++ err = -EINVAL;
558 ++ goto out_iput;
559 ++ }
560 ++
561 ++ ihold(loweri_new);
562 ++ shiftfs_fill_inode(inode, loweri_new->i_ino, loweri_new->i_mode,
563 ++ 0, lowerd_new);
564 ++ d_instantiate_new(dentry, inode);
565 ++ }
566 ++
567 ++ shiftfs_copyattr(loweri_dir, diri);
568 ++ if (loweri_iop_ptr == loweri_dir_iop->mkdir)
569 ++ set_nlink(diri, loweri_dir->i_nlink);
570 ++
571 ++ inode = NULL;
572 ++
573 ++out_iput:
574 ++ iput(inode);
575 ++ inode_unlock(loweri_dir);
576 ++
577 ++ return err;
578 ++}
579 ++
580 ++static int shiftfs_create(struct inode *dir, struct dentry *dentry,
581 ++ umode_t mode, bool excl)
582 ++{
583 ++ mode |= S_IFREG;
584 ++
585 ++ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl);
586 ++}
587 ++
588 ++static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry,
589 ++ umode_t mode)
590 ++{
591 ++ mode |= S_IFDIR;
592 ++
593 ++ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
594 ++}
595 ++
596 ++static int shiftfs_link(struct dentry *hardlink, struct inode *dir,
597 ++ struct dentry *dentry)
598 ++{
599 ++ return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false);
600 ++}
601 ++
602 ++static int shiftfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
603 ++ dev_t rdev)
604 ++{
605 ++ if (!S_ISFIFO(mode) && !S_ISSOCK(mode))
606 ++ return -EPERM;
607 ++
608 ++ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
609 ++}
610 ++
611 ++static int shiftfs_symlink(struct inode *dir, struct dentry *dentry,
612 ++ const char *symlink)
613 ++{
614 ++ return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false);
615 ++}
616 ++
617 ++static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir)
618 ++{
619 ++ struct dentry *lowerd = dentry->d_fsdata;
620 ++ struct inode *loweri = dir->i_private;
621 ++ struct inode *inode = d_inode(dentry);
622 ++ int err;
623 ++ const struct cred *oldcred;
624 ++
625 ++ dget(lowerd);
626 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
627 ++ inode_lock_nested(loweri, I_MUTEX_PARENT);
628 ++ if (rmdir)
629 ++ err = vfs_rmdir(loweri, lowerd);
630 ++ else
631 ++ err = vfs_unlink(loweri, lowerd, NULL);
632 ++ revert_creds(oldcred);
633 ++
634 ++ if (!err) {
635 ++ d_drop(dentry);
636 ++
637 ++ if (rmdir)
638 ++ clear_nlink(inode);
639 ++ else
640 ++ drop_nlink(inode);
641 ++ }
642 ++ inode_unlock(loweri);
643 ++
644 ++ shiftfs_copyattr(loweri, dir);
645 ++ dput(lowerd);
646 ++
647 ++ return err;
648 ++}
649 ++
650 ++static int shiftfs_unlink(struct inode *dir, struct dentry *dentry)
651 ++{
652 ++ return shiftfs_rm(dir, dentry, false);
653 ++}
654 ++
655 ++static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry)
656 ++{
657 ++ return shiftfs_rm(dir, dentry, true);
658 ++}
659 ++
660 ++static int shiftfs_rename(struct inode *olddir, struct dentry *old,
661 ++ struct inode *newdir, struct dentry *new,
662 ++ unsigned int flags)
663 ++{
664 ++ struct dentry *lowerd_dir_old = old->d_parent->d_fsdata,
665 ++ *lowerd_dir_new = new->d_parent->d_fsdata,
666 ++ *lowerd_old = old->d_fsdata, *lowerd_new = new->d_fsdata,
667 ++ *trapd;
668 ++ struct inode *loweri_dir_old = lowerd_dir_old->d_inode,
669 ++ *loweri_dir_new = lowerd_dir_new->d_inode;
670 ++ int err = -EINVAL;
671 ++ const struct cred *oldcred;
672 ++
673 ++ trapd = lock_rename(lowerd_dir_new, lowerd_dir_old);
674 ++
675 ++ if (trapd == lowerd_old || trapd == lowerd_new)
676 ++ goto out_unlock;
677 ++
678 ++ oldcred = shiftfs_override_creds(old->d_sb);
679 ++ err = vfs_rename(loweri_dir_old, lowerd_old, loweri_dir_new, lowerd_new,
680 ++ NULL, flags);
681 ++ revert_creds(oldcred);
682 ++
683 ++ shiftfs_copyattr(loweri_dir_old, olddir);
684 ++ shiftfs_copyattr(loweri_dir_new, newdir);
685 ++
686 ++out_unlock:
687 ++ unlock_rename(lowerd_dir_new, lowerd_dir_old);
688 ++
689 ++ return err;
690 ++}
691 ++
692 ++static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry,
693 ++ unsigned int flags)
694 ++{
695 ++ struct dentry *new;
696 ++ struct inode *newi;
697 ++ const struct cred *oldcred;
698 ++ struct dentry *lowerd = dentry->d_parent->d_fsdata;
699 ++ struct inode *inode = NULL, *loweri = lowerd->d_inode;
700 ++
701 ++ inode_lock(loweri);
702 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
703 ++ new = lookup_one_len(dentry->d_name.name, lowerd, dentry->d_name.len);
704 ++ revert_creds(oldcred);
705 ++ inode_unlock(loweri);
706 ++
707 ++ if (IS_ERR(new))
708 ++ return new;
709 ++
710 ++ dentry->d_fsdata = new;
711 ++
712 ++ newi = new->d_inode;
713 ++ if (!newi)
714 ++ goto out;
715 ++
716 ++ inode = iget5_locked(dentry->d_sb, (unsigned long)newi,
717 ++ shiftfs_inode_test, shiftfs_inode_set, newi);
718 ++ if (!inode) {
719 ++ dput(new);
720 ++ return ERR_PTR(-ENOMEM);
721 ++ }
722 ++ if (inode->i_state & I_NEW) {
723 ++ /*
724 ++ * inode->i_private set by shiftfs_inode_set(), but we still
725 ++ * need to take a reference
726 ++ */
727 ++ ihold(newi);
728 ++ shiftfs_fill_inode(inode, newi->i_ino, newi->i_mode, 0, new);
729 ++ unlock_new_inode(inode);
730 ++ }
731 ++
732 ++out:
733 ++ return d_splice_alias(inode, dentry);
734 ++}
735 ++
736 ++static int shiftfs_permission(struct inode *inode, int mask)
737 ++{
738 ++ int err;
739 ++ const struct cred *oldcred;
740 ++ struct inode *loweri = inode->i_private;
741 ++
742 ++ if (!loweri) {
743 ++ WARN_ON(!(mask & MAY_NOT_BLOCK));
744 ++ return -ECHILD;
745 ++ }
746 ++
747 ++ err = generic_permission(inode, mask);
748 ++ if (err)
749 ++ return err;
750 ++
751 ++ oldcred = shiftfs_override_creds(inode->i_sb);
752 ++ err = inode_permission(loweri, mask);
753 ++ revert_creds(oldcred);
754 ++
755 ++ return err;
756 ++}
757 ++
758 ++static int shiftfs_fiemap(struct inode *inode,
759 ++ struct fiemap_extent_info *fieinfo, u64 start,
760 ++ u64 len)
761 ++{
762 ++ int err;
763 ++ const struct cred *oldcred;
764 ++ struct inode *loweri = inode->i_private;
765 ++
766 ++ if (!loweri->i_op->fiemap)
767 ++ return -EOPNOTSUPP;
768 ++
769 ++ oldcred = shiftfs_override_creds(inode->i_sb);
770 ++ if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
771 ++ filemap_write_and_wait(loweri->i_mapping);
772 ++ err = loweri->i_op->fiemap(loweri, fieinfo, start, len);
773 ++ revert_creds(oldcred);
774 ++
775 ++ return err;
776 ++}
777 ++
778 ++static int shiftfs_tmpfile(struct inode *dir, struct dentry *dentry,
779 ++ umode_t mode)
780 ++{
781 ++ int err;
782 ++ const struct cred *oldcred;
783 ++ struct dentry *lowerd = dentry->d_fsdata;
784 ++ struct inode *loweri = dir->i_private;
785 ++
786 ++ if (!loweri->i_op->tmpfile)
787 ++ return -EOPNOTSUPP;
788 ++
789 ++ oldcred = shiftfs_override_creds(dir->i_sb);
790 ++ err = loweri->i_op->tmpfile(loweri, lowerd, mode);
791 ++ revert_creds(oldcred);
792 ++
793 ++ return err;
794 ++}
795 ++
796 ++static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr)
797 ++{
798 ++ struct dentry *lowerd = dentry->d_fsdata;
799 ++ struct inode *loweri = lowerd->d_inode;
800 ++ struct iattr newattr;
801 ++ const struct cred *oldcred;
802 ++ struct super_block *sb = dentry->d_sb;
803 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
804 ++ int err;
805 ++
806 ++ err = setattr_prepare(dentry, attr);
807 ++ if (err)
808 ++ return err;
809 ++
810 ++ newattr = *attr;
811 ++ newattr.ia_uid = shift_kuid(sb->s_user_ns, sbinfo->userns, attr->ia_uid);
812 ++ newattr.ia_gid = shift_kgid(sb->s_user_ns, sbinfo->userns, attr->ia_gid);
813 ++
814 ++ /*
815 ++ * mode change is for clearing setuid/setgid bits. Allow lower fs
816 ++ * to interpret this in its own way.
817 ++ */
818 ++ if (newattr.ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
819 ++ newattr.ia_valid &= ~ATTR_MODE;
820 ++
821 ++ inode_lock(loweri);
822 ++ oldcred = shiftfs_override_creds(dentry->d_sb);
823 ++ err = notify_change(lowerd, &newattr, NULL);
824 ++ revert_creds(oldcred);
825 ++ inode_unlock(loweri);
826 ++
827 ++ shiftfs_copyattr(loweri, d_inode(dentry));
828 ++
829 ++ return err;
830 ++}
831 ++
832 ++static int shiftfs_getattr(const struct path *path, struct kstat *stat,
833 ++ u32 request_mask, unsigned int query_flags)
834 ++{
835 ++ struct inode *inode = path->dentry->d_inode;
836 ++ struct dentry *lowerd = path->dentry->d_fsdata;
837 ++ struct inode *loweri = lowerd->d_inode;
838 ++ struct shiftfs_super_info *info = path->dentry->d_sb->s_fs_info;
839 ++ struct path newpath = { .mnt = info->mnt, .dentry = lowerd };
840 ++ struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
841 ++ struct user_namespace *to_ns = inode->i_sb->s_user_ns;
842 ++ const struct cred *oldcred;
843 ++ int err;
844 ++
845 ++ oldcred = shiftfs_override_creds(inode->i_sb);
846 ++ err = vfs_getattr(&newpath, stat, request_mask, query_flags);
847 ++ revert_creds(oldcred);
848 ++
849 ++ if (err)
850 ++ return err;
851 ++
852 ++ /* transform the underlying id */
853 ++ stat->uid = shift_kuid(from_ns, to_ns, stat->uid);
854 ++ stat->gid = shift_kgid(from_ns, to_ns, stat->gid);
855 ++ return 0;
856 ++}
857 ++
858 ++#ifdef CONFIG_SHIFT_FS_POSIX_ACL
859 ++
860 ++static int
861 ++shift_acl_ids(struct user_namespace *from, struct user_namespace *to,
862 ++ struct posix_acl *acl)
863 ++{
864 ++ int i;
865 ++
866 ++ for (i = 0; i < acl->a_count; i++) {
867 ++ struct posix_acl_entry *e = &acl->a_entries[i];
868 ++ switch(e->e_tag) {
869 ++ case ACL_USER:
870 ++ e->e_uid = shift_kuid(from, to, e->e_uid);
871 ++ if (!uid_valid(e->e_uid))
872 ++ return -EOVERFLOW;
873 ++ break;
874 ++ case ACL_GROUP:
875 ++ e->e_gid = shift_kgid(from, to, e->e_gid);
876 ++ if (!gid_valid(e->e_gid))
877 ++ return -EOVERFLOW;
878 ++ break;
879 ++ }
880 ++ }
881 ++ return 0;
882 ++}
883 ++
884 ++static void
885 ++shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to,
886 ++ void *value, size_t size)
887 ++{
888 ++ struct posix_acl_xattr_header *header = value;
889 ++ struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
890 ++ int count;
891 ++ kuid_t kuid;
892 ++ kgid_t kgid;
893 ++
894 ++ if (!value)
895 ++ return;
896 ++ if (size < sizeof(struct posix_acl_xattr_header))
897 ++ return;
898 ++ if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
899 ++ return;
900 ++
901 ++ count = posix_acl_xattr_count(size);
902 ++ if (count < 0)
903 ++ return;
904 ++ if (count == 0)
905 ++ return;
906 ++
907 ++ for (end = entry + count; entry != end; entry++) {
908 ++ switch(le16_to_cpu(entry->e_tag)) {
909 ++ case ACL_USER:
910 ++ kuid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
911 ++ kuid = shift_kuid(from, to, kuid);
912 ++ entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid));
913 ++ break;
914 ++ case ACL_GROUP:
915 ++ kgid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
916 ++ kgid = shift_kgid(from, to, kgid);
917 ++ entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, kgid));
918 ++ break;
919 ++ default:
920 ++ break;
921 ++ }
922 ++ }
923 ++}
924 ++
925 ++static struct posix_acl *shiftfs_get_acl(struct inode *inode, int type)
926 ++{
927 ++ struct inode *loweri = inode->i_private;
928 ++ const struct cred *oldcred;
929 ++ struct posix_acl *lower_acl, *acl = NULL;
930 ++ struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
931 ++ struct user_namespace *to_ns = inode->i_sb->s_user_ns;
932 ++ int size;
933 ++ int err;
934 ++
935 ++ if (!IS_POSIXACL(loweri))
936 ++ return NULL;
937 ++
938 ++ oldcred = shiftfs_override_creds(inode->i_sb);
939 ++ lower_acl = get_acl(loweri, type);
940 ++ revert_creds(oldcred);
941 ++
942 ++ if (lower_acl && !IS_ERR(lower_acl)) {
943 ++ /* XXX: export posix_acl_clone? */
944 ++ size = sizeof(struct posix_acl) +
945 ++ lower_acl->a_count * sizeof(struct posix_acl_entry);
946 ++ acl = kmemdup(lower_acl, size, GFP_KERNEL);
947 ++ posix_acl_release(lower_acl);
948 ++
949 ++ if (!acl)
950 ++ return ERR_PTR(-ENOMEM);
951 ++
952 ++ refcount_set(&acl->a_refcount, 1);
953 ++
954 ++ err = shift_acl_ids(from_ns, to_ns, acl);
955 ++ if (err) {
956 ++ kfree(acl);
957 ++ return ERR_PTR(err);
958 ++ }
959 ++ }
960 ++
961 ++ return acl;
962 ++}
963 ++
964 ++static int
965 ++shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler,
966 ++ struct dentry *dentry, struct inode *inode,
967 ++ const char *name, void *buffer, size_t size)
968 ++{
969 ++ struct inode *loweri = inode->i_private;
970 ++ int ret;
971 ++
972 ++ ret = shiftfs_xattr_get(NULL, dentry, inode, handler->name,
973 ++ buffer, size);
974 ++ if (ret < 0)
975 ++ return ret;
976 ++
977 ++ inode_lock(loweri);
978 ++ shift_acl_xattr_ids(loweri->i_sb->s_user_ns, inode->i_sb->s_user_ns,
979 ++ buffer, size);
980 ++ inode_unlock(loweri);
981 ++ return ret;
982 ++}
983 ++
984 ++static int
985 ++shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler,
986 ++ struct dentry *dentry, struct inode *inode,
987 ++ const char *name, const void *value,
988 ++ size_t size, int flags)
989 ++{
990 ++ struct inode *loweri = inode->i_private;
991 ++ int err;
992 ++
993 ++ if (!IS_POSIXACL(loweri) || !loweri->i_op->set_acl)
994 ++ return -EOPNOTSUPP;
995 ++ if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
996 ++ return value ? -EACCES : 0;
997 ++ if (!inode_owner_or_capable(inode))
998 ++ return -EPERM;
999 ++
1000 ++ if (value) {
1001 ++ shift_acl_xattr_ids(inode->i_sb->s_user_ns,
1002 ++ loweri->i_sb->s_user_ns,
1003 ++ (void *)value, size);
1004 ++ err = shiftfs_setxattr(dentry, inode, handler->name, value,
1005 ++ size, flags);
1006 ++ } else {
1007 ++ err = shiftfs_removexattr(dentry, handler->name);
1008 ++ }
1009 ++
1010 ++ if (!err)
1011 ++ shiftfs_copyattr(loweri, inode);
1012 ++
1013 ++ return err;
1014 ++}
1015 ++
1016 ++static const struct xattr_handler
1017 ++shiftfs_posix_acl_access_xattr_handler = {
1018 ++ .name = XATTR_NAME_POSIX_ACL_ACCESS,
1019 ++ .flags = ACL_TYPE_ACCESS,
1020 ++ .get = shiftfs_posix_acl_xattr_get,
1021 ++ .set = shiftfs_posix_acl_xattr_set,
1022 ++};
1023 ++
1024 ++static const struct xattr_handler
1025 ++shiftfs_posix_acl_default_xattr_handler = {
1026 ++ .name = XATTR_NAME_POSIX_ACL_DEFAULT,
1027 ++ .flags = ACL_TYPE_DEFAULT,
1028 ++ .get = shiftfs_posix_acl_xattr_get,
1029 ++ .set = shiftfs_posix_acl_xattr_set,
1030 ++};
1031 ++
1032 ++#else /* !CONFIG_SHIFT_FS_POSIX_ACL */
1033 ++
1034 ++#define shiftfs_get_acl NULL
1035 ++
1036 ++#endif /* CONFIG_SHIFT_FS_POSIX_ACL */
1037 ++
1038 ++static const struct inode_operations shiftfs_dir_inode_operations = {
1039 ++ .lookup = shiftfs_lookup,
1040 ++ .mkdir = shiftfs_mkdir,
1041 ++ .symlink = shiftfs_symlink,
1042 ++ .unlink = shiftfs_unlink,
1043 ++ .rmdir = shiftfs_rmdir,
1044 ++ .rename = shiftfs_rename,
1045 ++ .link = shiftfs_link,
1046 ++ .setattr = shiftfs_setattr,
1047 ++ .create = shiftfs_create,
1048 ++ .mknod = shiftfs_mknod,
1049 ++ .permission = shiftfs_permission,
1050 ++ .getattr = shiftfs_getattr,
1051 ++ .listxattr = shiftfs_listxattr,
1052 ++ .get_acl = shiftfs_get_acl,
1053 ++};
1054 ++
1055 ++static const struct inode_operations shiftfs_file_inode_operations = {
1056 ++ .fiemap = shiftfs_fiemap,
1057 ++ .getattr = shiftfs_getattr,
1058 ++ .get_acl = shiftfs_get_acl,
1059 ++ .listxattr = shiftfs_listxattr,
1060 ++ .permission = shiftfs_permission,
1061 ++ .setattr = shiftfs_setattr,
1062 ++ .tmpfile = shiftfs_tmpfile,
1063 ++};
1064 ++
1065 ++static const struct inode_operations shiftfs_special_inode_operations = {
1066 ++ .getattr = shiftfs_getattr,
1067 ++ .get_acl = shiftfs_get_acl,
1068 ++ .listxattr = shiftfs_listxattr,
1069 ++ .permission = shiftfs_permission,
1070 ++ .setattr = shiftfs_setattr,
1071 ++};
1072 ++
1073 ++static const struct inode_operations shiftfs_symlink_inode_operations = {
1074 ++ .getattr = shiftfs_getattr,
1075 ++ .get_link = shiftfs_get_link,
1076 ++ .listxattr = shiftfs_listxattr,
1077 ++ .setattr = shiftfs_setattr,
1078 ++};
1079 ++
1080 ++static struct file *shiftfs_open_realfile(const struct file *file,
1081 ++ struct inode *realinode)
1082 ++{
1083 ++ struct file *realfile;
1084 ++ const struct cred *old_cred;
1085 ++ struct inode *inode = file_inode(file);
1086 ++ struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1087 ++ struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1088 ++ struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1089 ++
1090 ++ old_cred = shiftfs_override_creds(inode->i_sb);
1091 ++ realfile = open_with_fake_path(&realpath, file->f_flags, realinode,
1092 ++ info->creator_cred);
1093 ++ revert_creds(old_cred);
1094 ++
1095 ++ return realfile;
1096 ++}
1097 ++
1098 ++#define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
1099 ++
1100 ++static int shiftfs_change_flags(struct file *file, unsigned int flags)
1101 ++{
1102 ++ struct inode *inode = file_inode(file);
1103 ++ int err;
1104 ++
1105 ++ /* if some flag changed that cannot be changed then something's amiss */
1106 ++ if (WARN_ON((file->f_flags ^ flags) & ~SHIFTFS_SETFL_MASK))
1107 ++ return -EIO;
1108 ++
1109 ++ flags &= SHIFTFS_SETFL_MASK;
1110 ++
1111 ++ if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
1112 ++ return -EPERM;
1113 ++
1114 ++ if (flags & O_DIRECT) {
1115 ++ if (!file->f_mapping->a_ops ||
1116 ++ !file->f_mapping->a_ops->direct_IO)
1117 ++ return -EINVAL;
1118 ++ }
1119 ++
1120 ++ if (file->f_op->check_flags) {
1121 ++ err = file->f_op->check_flags(flags);
1122 ++ if (err)
1123 ++ return err;
1124 ++ }
1125 ++
1126 ++ spin_lock(&file->f_lock);
1127 ++ file->f_flags = (file->f_flags & ~SHIFTFS_SETFL_MASK) | flags;
1128 ++ spin_unlock(&file->f_lock);
1129 ++
1130 ++ return 0;
1131 ++}
1132 ++
1133 ++static int shiftfs_open(struct inode *inode, struct file *file)
1134 ++{
1135 ++ struct file *realfile;
1136 ++
1137 ++ realfile = shiftfs_open_realfile(file, inode->i_private);
1138 ++ if (IS_ERR(realfile))
1139 ++ return PTR_ERR(realfile);
1140 ++
1141 ++ file->private_data = realfile;
1142 ++ /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */
1143 ++ file->f_mapping = realfile->f_mapping;
1144 ++
1145 ++ return 0;
1146 ++}
1147 ++
1148 ++static int shiftfs_dir_open(struct inode *inode, struct file *file)
1149 ++{
1150 ++ struct file *realfile;
1151 ++ const struct cred *oldcred;
1152 ++ struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1153 ++ struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1154 ++ struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1155 ++
1156 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1157 ++ realfile = dentry_open(&realpath, file->f_flags | O_NOATIME,
1158 ++ info->creator_cred);
1159 ++ revert_creds(oldcred);
1160 ++ if (IS_ERR(realfile))
1161 ++ return PTR_ERR(realfile);
1162 ++
1163 ++ file->private_data = realfile;
1164 ++
1165 ++ return 0;
1166 ++}
1167 ++
1168 ++static int shiftfs_release(struct inode *inode, struct file *file)
1169 ++{
1170 ++ struct file *realfile = file->private_data;
1171 ++
1172 ++ if (realfile)
1173 ++ fput(realfile);
1174 ++
1175 ++ return 0;
1176 ++}
1177 ++
1178 ++static int shiftfs_dir_release(struct inode *inode, struct file *file)
1179 ++{
1180 ++ return shiftfs_release(inode, file);
1181 ++}
1182 ++
1183 ++static loff_t shiftfs_dir_llseek(struct file *file, loff_t offset, int whence)
1184 ++{
1185 ++ struct file *realfile = file->private_data;
1186 ++
1187 ++ return vfs_llseek(realfile, offset, whence);
1188 ++}
1189 ++
1190 ++static loff_t shiftfs_file_llseek(struct file *file, loff_t offset, int whence)
1191 ++{
1192 ++ struct inode *realinode = file_inode(file)->i_private;
1193 ++
1194 ++ return generic_file_llseek_size(file, offset, whence,
1195 ++ realinode->i_sb->s_maxbytes,
1196 ++ i_size_read(realinode));
1197 ++}
1198 ++
1199 ++/* XXX: Need to figure out what to to about atime updates, maybe other
1200 ++ * timestamps too ... ref. ovl_file_accessed() */
1201 ++
1202 ++static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb)
1203 ++{
1204 ++ int ifl = iocb->ki_flags;
1205 ++ rwf_t flags = 0;
1206 ++
1207 ++ if (ifl & IOCB_NOWAIT)
1208 ++ flags |= RWF_NOWAIT;
1209 ++ if (ifl & IOCB_HIPRI)
1210 ++ flags |= RWF_HIPRI;
1211 ++ if (ifl & IOCB_DSYNC)
1212 ++ flags |= RWF_DSYNC;
1213 ++ if (ifl & IOCB_SYNC)
1214 ++ flags |= RWF_SYNC;
1215 ++
1216 ++ return flags;
1217 ++}
1218 ++
1219 ++static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd)
1220 ++{
1221 ++ struct file *realfile;
1222 ++
1223 ++ if (file->f_op->open != shiftfs_open &&
1224 ++ file->f_op->open != shiftfs_dir_open)
1225 ++ return -EINVAL;
1226 ++
1227 ++ realfile = file->private_data;
1228 ++ lowerfd->flags = 0;
1229 ++ lowerfd->file = realfile;
1230 ++
1231 ++ /* Did the flags change since open? */
1232 ++ if (unlikely(file->f_flags & ~lowerfd->file->f_flags))
1233 ++ return shiftfs_change_flags(lowerfd->file, file->f_flags);
1234 ++
1235 ++ return 0;
1236 ++}
1237 ++
1238 ++static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
1239 ++{
1240 ++ struct file *file = iocb->ki_filp;
1241 ++ struct fd lowerfd;
1242 ++ const struct cred *oldcred;
1243 ++ ssize_t ret;
1244 ++
1245 ++ if (!iov_iter_count(iter))
1246 ++ return 0;
1247 ++
1248 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1249 ++ if (ret)
1250 ++ return ret;
1251 ++
1252 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1253 ++ ret = vfs_iter_read(lowerfd.file, iter, &iocb->ki_pos,
1254 ++ shiftfs_iocb_to_rwf(iocb));
1255 ++ revert_creds(oldcred);
1256 ++
1257 ++ shiftfs_file_accessed(file);
1258 ++
1259 ++ fdput(lowerfd);
1260 ++ return ret;
1261 ++}
1262 ++
1263 ++static ssize_t shiftfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
1264 ++{
1265 ++ struct file *file = iocb->ki_filp;
1266 ++ struct inode *inode = file_inode(file);
1267 ++ struct fd lowerfd;
1268 ++ const struct cred *oldcred;
1269 ++ ssize_t ret;
1270 ++
1271 ++ if (!iov_iter_count(iter))
1272 ++ return 0;
1273 ++
1274 ++ inode_lock(inode);
1275 ++ /* Update mode */
1276 ++ shiftfs_copyattr(inode->i_private, inode);
1277 ++ ret = file_remove_privs(file);
1278 ++ if (ret)
1279 ++ goto out_unlock;
1280 ++
1281 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1282 ++ if (ret)
1283 ++ goto out_unlock;
1284 ++
1285 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1286 ++ file_start_write(lowerfd.file);
1287 ++ ret = vfs_iter_write(lowerfd.file, iter, &iocb->ki_pos,
1288 ++ shiftfs_iocb_to_rwf(iocb));
1289 ++ file_end_write(lowerfd.file);
1290 ++ revert_creds(oldcred);
1291 ++
1292 ++ /* Update size */
1293 ++ shiftfs_copyattr(inode->i_private, inode);
1294 ++
1295 ++ fdput(lowerfd);
1296 ++
1297 ++out_unlock:
1298 ++ inode_unlock(inode);
1299 ++ return ret;
1300 ++}
1301 ++
1302 ++static int shiftfs_fsync(struct file *file, loff_t start, loff_t end,
1303 ++ int datasync)
1304 ++{
1305 ++ struct fd lowerfd;
1306 ++ const struct cred *oldcred;
1307 ++ int ret;
1308 ++
1309 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1310 ++ if (ret)
1311 ++ return ret;
1312 ++
1313 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1314 ++ ret = vfs_fsync_range(lowerfd.file, start, end, datasync);
1315 ++ revert_creds(oldcred);
1316 ++
1317 ++ fdput(lowerfd);
1318 ++ return ret;
1319 ++}
1320 ++
1321 ++static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma)
1322 ++{
1323 ++ struct file *realfile = file->private_data;
1324 ++ const struct cred *oldcred;
1325 ++ int ret;
1326 ++
1327 ++ if (!realfile->f_op->mmap)
1328 ++ return -ENODEV;
1329 ++
1330 ++ if (WARN_ON(file != vma->vm_file))
1331 ++ return -EIO;
1332 ++
1333 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1334 ++ vma->vm_file = get_file(realfile);
1335 ++ ret = call_mmap(vma->vm_file, vma);
1336 ++ revert_creds(oldcred);
1337 ++
1338 ++ shiftfs_file_accessed(file);
1339 ++
1340 ++ if (ret) {
1341 ++ /*
1342 ++ * Drop refcount from new vm_file value and restore original
1343 ++ * vm_file value
1344 ++ */
1345 ++ vma->vm_file = file;
1346 ++ fput(realfile);
1347 ++ } else {
1348 ++ /* Drop refcount from previous vm_file value */
1349 ++ fput(file);
1350 ++ }
1351 ++
1352 ++ return ret;
1353 ++}
1354 ++
1355 ++static long shiftfs_fallocate(struct file *file, int mode, loff_t offset,
1356 ++ loff_t len)
1357 ++{
1358 ++ struct inode *inode = file_inode(file);
1359 ++ struct inode *loweri = inode->i_private;
1360 ++ struct fd lowerfd;
1361 ++ const struct cred *oldcred;
1362 ++ int ret;
1363 ++
1364 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1365 ++ if (ret)
1366 ++ return ret;
1367 ++
1368 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1369 ++ ret = vfs_fallocate(lowerfd.file, mode, offset, len);
1370 ++ revert_creds(oldcred);
1371 ++
1372 ++ /* Update size */
1373 ++ shiftfs_copyattr(loweri, inode);
1374 ++
1375 ++ fdput(lowerfd);
1376 ++ return ret;
1377 ++}
1378 ++
1379 ++static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len,
1380 ++ int advice)
1381 ++{
1382 ++ struct fd lowerfd;
1383 ++ const struct cred *oldcred;
1384 ++ int ret;
1385 ++
1386 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1387 ++ if (ret)
1388 ++ return ret;
1389 ++
1390 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1391 ++ ret = vfs_fadvise(lowerfd.file, offset, len, advice);
1392 ++ revert_creds(oldcred);
1393 ++
1394 ++ fdput(lowerfd);
1395 ++ return ret;
1396 ++}
1397 ++
1398 ++static int shiftfs_override_ioctl_creds(int cmd, const struct super_block *sb,
1399 ++ const struct cred **oldcred,
1400 ++ struct cred **newcred)
1401 ++{
1402 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1403 ++ kuid_t fsuid = current_fsuid();
1404 ++ kgid_t fsgid = current_fsgid();
1405 ++
1406 ++ *oldcred = shiftfs_override_creds(sb);
1407 ++
1408 ++ *newcred = prepare_creds();
1409 ++ if (!*newcred) {
1410 ++ revert_creds(*oldcred);
1411 ++ return -ENOMEM;
1412 ++ }
1413 ++
1414 ++ (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
1415 ++ (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
1416 ++
1417 ++ /* clear all caps to prevent bypassing capable() checks */
1418 ++ cap_clear((*newcred)->cap_bset);
1419 ++ cap_clear((*newcred)->cap_effective);
1420 ++ cap_clear((*newcred)->cap_inheritable);
1421 ++ cap_clear((*newcred)->cap_permitted);
1422 ++
1423 ++ if (cmd == BTRFS_IOC_SNAP_DESTROY) {
1424 ++ kuid_t kuid_root = make_kuid(sb->s_user_ns, 0);
1425 ++ /*
1426 ++ * Allow the root user in the container to remove subvolumes
1427 ++ * from other users.
1428 ++ */
1429 ++ if (uid_valid(kuid_root) && uid_eq(fsuid, kuid_root))
1430 ++ cap_raise((*newcred)->cap_effective, CAP_DAC_OVERRIDE);
1431 ++ }
1432 ++
1433 ++ put_cred(override_creds(*newcred));
1434 ++ return 0;
1435 ++}
1436 ++
1437 ++static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred,
1438 ++ struct cred *newcred)
1439 ++{
1440 ++ return shiftfs_revert_object_creds(oldcred, newcred);
1441 ++}
1442 ++
1443 ++static inline bool is_btrfs_snap_ioctl(int cmd)
1444 ++{
1445 ++ if ((cmd == BTRFS_IOC_SNAP_CREATE) || (cmd == BTRFS_IOC_SNAP_CREATE_V2))
1446 ++ return true;
1447 ++
1448 ++ return false;
1449 ++}
1450 ++
1451 ++static int shiftfs_btrfs_ioctl_fd_restore(int cmd, int fd, void __user *arg,
1452 ++ struct btrfs_ioctl_vol_args *v1,
1453 ++ struct btrfs_ioctl_vol_args_v2 *v2)
1454 ++{
1455 ++ int ret;
1456 ++
1457 ++ if (!is_btrfs_snap_ioctl(cmd))
1458 ++ return 0;
1459 ++
1460 ++ if (cmd == BTRFS_IOC_SNAP_CREATE)
1461 ++ ret = copy_to_user(arg, v1, sizeof(*v1));
1462 ++ else
1463 ++ ret = copy_to_user(arg, v2, sizeof(*v2));
1464 ++
1465 ++ __close_fd(current->files, fd);
1466 ++ kfree(v1);
1467 ++ kfree(v2);
1468 ++
1469 ++ return ret;
1470 ++}
1471 ++
1472 ++static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg,
1473 ++ struct btrfs_ioctl_vol_args **b1,
1474 ++ struct btrfs_ioctl_vol_args_v2 **b2,
1475 ++ int *newfd)
1476 ++{
1477 ++ int oldfd, ret;
1478 ++ struct fd src;
1479 ++ struct fd lfd = {};
1480 ++ struct btrfs_ioctl_vol_args *v1 = NULL;
1481 ++ struct btrfs_ioctl_vol_args_v2 *v2 = NULL;
1482 ++
1483 ++ if (!is_btrfs_snap_ioctl(cmd))
1484 ++ return 0;
1485 ++
1486 ++ if (cmd == BTRFS_IOC_SNAP_CREATE) {
1487 ++ v1 = memdup_user(arg, sizeof(*v1));
1488 ++ if (IS_ERR(v1))
1489 ++ return PTR_ERR(v1);
1490 ++ oldfd = v1->fd;
1491 ++ *b1 = v1;
1492 ++ } else {
1493 ++ v2 = memdup_user(arg, sizeof(*v2));
1494 ++ if (IS_ERR(v2))
1495 ++ return PTR_ERR(v2);
1496 ++ oldfd = v2->fd;
1497 ++ *b2 = v2;
1498 ++ }
1499 ++
1500 ++ src = fdget(oldfd);
1501 ++ if (!src.file)
1502 ++ return -EINVAL;
1503 ++
1504 ++ ret = shiftfs_real_fdget(src.file, &lfd);
1505 ++ if (ret) {
1506 ++ fdput(src);
1507 ++ return ret;
1508 ++ }
1509 ++
1510 ++ /*
1511 ++ * shiftfs_real_fdget() does not take a reference to lfd.file, so
1512 ++ * take a reference here to offset the one which will be put by
1513 ++ * __close_fd(), and make sure that reference is put on fdput(lfd).
1514 ++ */
1515 ++ get_file(lfd.file);
1516 ++ lfd.flags |= FDPUT_FPUT;
1517 ++ fdput(src);
1518 ++
1519 ++ *newfd = get_unused_fd_flags(lfd.file->f_flags);
1520 ++ if (*newfd < 0) {
1521 ++ fdput(lfd);
1522 ++ return *newfd;
1523 ++ }
1524 ++
1525 ++ fd_install(*newfd, lfd.file);
1526 ++
1527 ++ if (cmd == BTRFS_IOC_SNAP_CREATE) {
1528 ++ v1->fd = *newfd;
1529 ++ ret = copy_to_user(arg, v1, sizeof(*v1));
1530 ++ v1->fd = oldfd;
1531 ++ } else {
1532 ++ v2->fd = *newfd;
1533 ++ ret = copy_to_user(arg, v2, sizeof(*v2));
1534 ++ v2->fd = oldfd;
1535 ++ }
1536 ++
1537 ++ if (ret)
1538 ++ shiftfs_btrfs_ioctl_fd_restore(cmd, *newfd, arg, v1, v2);
1539 ++
1540 ++ return ret;
1541 ++}
1542 ++
1543 ++static long shiftfs_real_ioctl(struct file *file, unsigned int cmd,
1544 ++ unsigned long arg)
1545 ++{
1546 ++ struct fd lowerfd;
1547 ++ struct cred *newcred;
1548 ++ const struct cred *oldcred;
1549 ++ int newfd = -EBADF;
1550 ++ long err = 0, ret = 0;
1551 ++ void __user *argp = (void __user *)arg;
1552 ++ struct super_block *sb = file->f_path.dentry->d_sb;
1553 ++ struct btrfs_ioctl_vol_args *btrfs_v1 = NULL;
1554 ++ struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL;
1555 ++
1556 ++ ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2,
1557 ++ &newfd);
1558 ++ if (ret < 0)
1559 ++ return ret;
1560 ++
1561 ++ ret = shiftfs_real_fdget(file, &lowerfd);
1562 ++ if (ret)
1563 ++ goto out_restore;
1564 ++
1565 ++ ret = shiftfs_override_ioctl_creds(cmd, sb, &oldcred, &newcred);
1566 ++ if (ret)
1567 ++ goto out_fdput;
1568 ++
1569 ++ ret = vfs_ioctl(lowerfd.file, cmd, arg);
1570 ++
1571 ++ shiftfs_revert_ioctl_creds(oldcred, newcred);
1572 ++
1573 ++ shiftfs_copyattr(file_inode(lowerfd.file), file_inode(file));
1574 ++ shiftfs_copyflags(file_inode(lowerfd.file), file_inode(file));
1575 ++
1576 ++out_fdput:
1577 ++ fdput(lowerfd);
1578 ++
1579 ++out_restore:
1580 ++ err = shiftfs_btrfs_ioctl_fd_restore(cmd, newfd, argp,
1581 ++ btrfs_v1, btrfs_v2);
1582 ++ if (!ret)
1583 ++ ret = err;
1584 ++
1585 ++ return ret;
1586 ++}
1587 ++
1588 ++static bool in_ioctl_whitelist(int flag, unsigned long arg)
1589 ++{
1590 ++ void __user *argp = (void __user *)arg;
1591 ++ u64 flags = 0;
1592 ++
1593 ++ switch (flag) {
1594 ++ case BTRFS_IOC_FS_INFO:
1595 ++ return true;
1596 ++ case BTRFS_IOC_SNAP_CREATE:
1597 ++ return true;
1598 ++ case BTRFS_IOC_SNAP_CREATE_V2:
1599 ++ return true;
1600 ++ case BTRFS_IOC_SUBVOL_CREATE:
1601 ++ return true;
1602 ++ case BTRFS_IOC_SUBVOL_CREATE_V2:
1603 ++ return true;
1604 ++ case BTRFS_IOC_SUBVOL_GETFLAGS:
1605 ++ return true;
1606 ++ case BTRFS_IOC_SUBVOL_SETFLAGS:
1607 ++ if (copy_from_user(&flags, argp, sizeof(flags)))
1608 ++ return false;
1609 ++
1610 ++ if (flags & ~BTRFS_SUBVOL_RDONLY)
1611 ++ return false;
1612 ++
1613 ++ return true;
1614 ++ case BTRFS_IOC_SNAP_DESTROY:
1615 ++ return true;
1616 ++ }
1617 ++
1618 ++ return false;
1619 ++}
1620 ++
1621 ++static long shiftfs_ioctl(struct file *file, unsigned int cmd,
1622 ++ unsigned long arg)
1623 ++{
1624 ++ switch (cmd) {
1625 ++ case FS_IOC_GETVERSION:
1626 ++ /* fall through */
1627 ++ case FS_IOC_GETFLAGS:
1628 ++ /* fall through */
1629 ++ case FS_IOC_SETFLAGS:
1630 ++ break;
1631 ++ default:
1632 ++ if (!in_ioctl_whitelist(cmd, arg) ||
1633 ++ !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1634 ++ return -ENOTTY;
1635 ++ }
1636 ++
1637 ++ return shiftfs_real_ioctl(file, cmd, arg);
1638 ++}
1639 ++
1640 ++static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd,
1641 ++ unsigned long arg)
1642 ++{
1643 ++ switch (cmd) {
1644 ++ case FS_IOC32_GETVERSION:
1645 ++ /* fall through */
1646 ++ case FS_IOC32_GETFLAGS:
1647 ++ /* fall through */
1648 ++ case FS_IOC32_SETFLAGS:
1649 ++ break;
1650 ++ default:
1651 ++ if (!in_ioctl_whitelist(cmd, arg) ||
1652 ++ !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1653 ++ return -ENOIOCTLCMD;
1654 ++ }
1655 ++
1656 ++ return shiftfs_real_ioctl(file, cmd, arg);
1657 ++}
1658 ++
1659 ++enum shiftfs_copyop {
1660 ++ SHIFTFS_COPY,
1661 ++ SHIFTFS_CLONE,
1662 ++ SHIFTFS_DEDUPE,
1663 ++};
1664 ++
1665 ++static ssize_t shiftfs_copyfile(struct file *file_in, loff_t pos_in,
1666 ++ struct file *file_out, loff_t pos_out, u64 len,
1667 ++ unsigned int flags, enum shiftfs_copyop op)
1668 ++{
1669 ++ ssize_t ret;
1670 ++ struct fd real_in, real_out;
1671 ++ const struct cred *oldcred;
1672 ++ struct inode *inode_out = file_inode(file_out);
1673 ++ struct inode *loweri = inode_out->i_private;
1674 ++
1675 ++ ret = shiftfs_real_fdget(file_out, &real_out);
1676 ++ if (ret)
1677 ++ return ret;
1678 ++
1679 ++ ret = shiftfs_real_fdget(file_in, &real_in);
1680 ++ if (ret) {
1681 ++ fdput(real_out);
1682 ++ return ret;
1683 ++ }
1684 ++
1685 ++ oldcred = shiftfs_override_creds(inode_out->i_sb);
1686 ++ switch (op) {
1687 ++ case SHIFTFS_COPY:
1688 ++ ret = vfs_copy_file_range(real_in.file, pos_in, real_out.file,
1689 ++ pos_out, len, flags);
1690 ++ break;
1691 ++
1692 ++ case SHIFTFS_CLONE:
1693 ++ ret = vfs_clone_file_range(real_in.file, pos_in, real_out.file,
1694 ++ pos_out, len, flags);
1695 ++ break;
1696 ++
1697 ++ case SHIFTFS_DEDUPE:
1698 ++ ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
1699 ++ real_out.file, pos_out, len,
1700 ++ flags);
1701 ++ break;
1702 ++ }
1703 ++ revert_creds(oldcred);
1704 ++
1705 ++ /* Update size */
1706 ++ shiftfs_copyattr(loweri, inode_out);
1707 ++
1708 ++ fdput(real_in);
1709 ++ fdput(real_out);
1710 ++
1711 ++ return ret;
1712 ++}
1713 ++
1714 ++static ssize_t shiftfs_copy_file_range(struct file *file_in, loff_t pos_in,
1715 ++ struct file *file_out, loff_t pos_out,
1716 ++ size_t len, unsigned int flags)
1717 ++{
1718 ++ return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
1719 ++ SHIFTFS_COPY);
1720 ++}
1721 ++
1722 ++static loff_t shiftfs_remap_file_range(struct file *file_in, loff_t pos_in,
1723 ++ struct file *file_out, loff_t pos_out,
1724 ++ loff_t len, unsigned int remap_flags)
1725 ++{
1726 ++ enum shiftfs_copyop op;
1727 ++
1728 ++ if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
1729 ++ return -EINVAL;
1730 ++
1731 ++ if (remap_flags & REMAP_FILE_DEDUP)
1732 ++ op = SHIFTFS_DEDUPE;
1733 ++ else
1734 ++ op = SHIFTFS_CLONE;
1735 ++
1736 ++ return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len,
1737 ++ remap_flags, op);
1738 ++}
1739 ++
1740 ++static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx)
1741 ++{
1742 ++ const struct cred *oldcred;
1743 ++ int err = -ENOTDIR;
1744 ++ struct file *realfile = file->private_data;
1745 ++
1746 ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1747 ++ err = iterate_dir(realfile, ctx);
1748 ++ revert_creds(oldcred);
1749 ++
1750 ++ return err;
1751 ++}
1752 ++
1753 ++const struct file_operations shiftfs_file_operations = {
1754 ++ .open = shiftfs_open,
1755 ++ .release = shiftfs_release,
1756 ++ .llseek = shiftfs_file_llseek,
1757 ++ .read_iter = shiftfs_read_iter,
1758 ++ .write_iter = shiftfs_write_iter,
1759 ++ .fsync = shiftfs_fsync,
1760 ++ .mmap = shiftfs_mmap,
1761 ++ .fallocate = shiftfs_fallocate,
1762 ++ .fadvise = shiftfs_fadvise,
1763 ++ .unlocked_ioctl = shiftfs_ioctl,
1764 ++ .compat_ioctl = shiftfs_compat_ioctl,
1765 ++ .copy_file_range = shiftfs_copy_file_range,
1766 ++ .remap_file_range = shiftfs_remap_file_range,
1767 ++};
1768 ++
1769 ++const struct file_operations shiftfs_dir_operations = {
1770 ++ .open = shiftfs_dir_open,
1771 ++ .release = shiftfs_dir_release,
1772 ++ .compat_ioctl = shiftfs_compat_ioctl,
1773 ++ .fsync = shiftfs_fsync,
1774 ++ .iterate_shared = shiftfs_iterate_shared,
1775 ++ .llseek = shiftfs_dir_llseek,
1776 ++ .read = generic_read_dir,
1777 ++ .unlocked_ioctl = shiftfs_ioctl,
1778 ++};
1779 ++
1780 ++static const struct address_space_operations shiftfs_aops = {
1781 ++ /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
1782 ++ .direct_IO = noop_direct_IO,
1783 ++};
1784 ++
1785 ++static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
1786 ++ umode_t mode, dev_t dev, struct dentry *dentry)
1787 ++{
1788 ++ struct inode *loweri;
1789 ++
1790 ++ inode->i_ino = ino;
1791 ++ inode->i_flags |= S_NOCMTIME;
1792 ++
1793 ++ mode &= S_IFMT;
1794 ++ inode->i_mode = mode;
1795 ++ switch (mode & S_IFMT) {
1796 ++ case S_IFDIR:
1797 ++ inode->i_op = &shiftfs_dir_inode_operations;
1798 ++ inode->i_fop = &shiftfs_dir_operations;
1799 ++ break;
1800 ++ case S_IFLNK:
1801 ++ inode->i_op = &shiftfs_symlink_inode_operations;
1802 ++ break;
1803 ++ case S_IFREG:
1804 ++ inode->i_op = &shiftfs_file_inode_operations;
1805 ++ inode->i_fop = &shiftfs_file_operations;
1806 ++ inode->i_mapping->a_ops = &shiftfs_aops;
1807 ++ break;
1808 ++ default:
1809 ++ inode->i_op = &shiftfs_special_inode_operations;
1810 ++ init_special_inode(inode, mode, dev);
1811 ++ break;
1812 ++ }
1813 ++
1814 ++ if (!dentry)
1815 ++ return;
1816 ++
1817 ++ loweri = dentry->d_inode;
1818 ++ if (!loweri->i_op->get_link)
1819 ++ inode->i_opflags |= IOP_NOFOLLOW;
1820 ++
1821 ++ shiftfs_copyattr(loweri, inode);
1822 ++ shiftfs_copyflags(loweri, inode);
1823 ++ set_nlink(inode, loweri->i_nlink);
1824 ++}
1825 ++
1826 ++static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry)
1827 ++{
1828 ++ struct super_block *sb = dentry->d_sb;
1829 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1830 ++
1831 ++ if (sbinfo->mark)
1832 ++ seq_show_option(m, "mark", NULL);
1833 ++
1834 ++ if (sbinfo->passthrough)
1835 ++ seq_printf(m, ",passthrough=%u", sbinfo->passthrough);
1836 ++
1837 ++ return 0;
1838 ++}
1839 ++
1840 ++static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1841 ++{
1842 ++ struct super_block *sb = dentry->d_sb;
1843 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1844 ++ struct dentry *root = sb->s_root;
1845 ++ struct dentry *realroot = root->d_fsdata;
1846 ++ struct path realpath = { .mnt = sbinfo->mnt, .dentry = realroot };
1847 ++ int err;
1848 ++
1849 ++ err = vfs_statfs(&realpath, buf);
1850 ++ if (err)
1851 ++ return err;
1852 ++
1853 ++ if (!shiftfs_passthrough_statfs(sbinfo))
1854 ++ buf->f_type = sb->s_magic;
1855 ++
1856 ++ return 0;
1857 ++}
1858 ++
1859 ++static void shiftfs_evict_inode(struct inode *inode)
1860 ++{
1861 ++ struct inode *loweri = inode->i_private;
1862 ++
1863 ++ clear_inode(inode);
1864 ++
1865 ++ if (loweri)
1866 ++ iput(loweri);
1867 ++}
1868 ++
1869 ++static void shiftfs_put_super(struct super_block *sb)
1870 ++{
1871 ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1872 ++
1873 ++ if (sbinfo) {
1874 ++ mntput(sbinfo->mnt);
1875 ++ put_cred(sbinfo->creator_cred);
1876 ++ kfree(sbinfo);
1877 ++ }
1878 ++}
1879 ++
1880 ++static const struct xattr_handler shiftfs_xattr_handler = {
1881 ++ .prefix = "",
1882 ++ .get = shiftfs_xattr_get,
1883 ++ .set = shiftfs_xattr_set,
1884 ++};
1885 ++
1886 ++const struct xattr_handler *shiftfs_xattr_handlers[] = {
1887 ++#ifdef CONFIG_SHIFT_FS_POSIX_ACL
1888 ++ &shiftfs_posix_acl_access_xattr_handler,
1889 ++ &shiftfs_posix_acl_default_xattr_handler,
1890 ++#endif
1891 ++ &shiftfs_xattr_handler,
1892 ++ NULL
1893 ++};
1894 ++
1895 ++static inline bool passthrough_is_subset(int old_flags, int new_flags)
1896 ++{
1897 ++ if ((new_flags & old_flags) != new_flags)
1898 ++ return false;
1899 ++
1900 ++ return true;
1901 ++}
1902 ++
1903 ++static int shiftfs_super_check_flags(unsigned long old_flags,
1904 ++ unsigned long new_flags)
1905 ++{
1906 ++ if ((old_flags & SB_RDONLY) && !(new_flags & SB_RDONLY))
1907 ++ return -EPERM;
1908 ++
1909 ++ if ((old_flags & SB_NOSUID) && !(new_flags & SB_NOSUID))
1910 ++ return -EPERM;
1911 ++
1912 ++ if ((old_flags & SB_NODEV) && !(new_flags & SB_NODEV))
1913 ++ return -EPERM;
1914 ++
1915 ++ if ((old_flags & SB_NOEXEC) && !(new_flags & SB_NOEXEC))
1916 ++ return -EPERM;
1917 ++
1918 ++ if ((old_flags & SB_NOATIME) && !(new_flags & SB_NOATIME))
1919 ++ return -EPERM;
1920 ++
1921 ++ if ((old_flags & SB_NODIRATIME) && !(new_flags & SB_NODIRATIME))
1922 ++ return -EPERM;
1923 ++
1924 ++ if (!(old_flags & SB_POSIXACL) && (new_flags & SB_POSIXACL))
1925 ++ return -EPERM;
1926 ++
1927 ++ return 0;
1928 ++}
1929 ++
1930 ++static int shiftfs_remount(struct super_block *sb, int *flags, char *data)
1931 ++{
1932 ++ int err;
1933 ++ struct shiftfs_super_info new = {};
1934 ++ struct shiftfs_super_info *info = sb->s_fs_info;
1935 ++
1936 ++ err = shiftfs_parse_mount_options(&new, data);
1937 ++ if (err)
1938 ++ return err;
1939 ++
1940 ++ err = shiftfs_super_check_flags(sb->s_flags, *flags);
1941 ++ if (err)
1942 ++ return err;
1943 ++
1944 ++ /* Mark mount option cannot be changed. */
1945 ++ if (info->mark || (info->mark != new.mark))
1946 ++ return -EPERM;
1947 ++
1948 ++ if (info->passthrough != new.passthrough) {
1949 ++ /* Don't allow exceeding passthrough options of mark mount. */
1950 ++ if (!passthrough_is_subset(info->passthrough_mark,
1951 ++ info->passthrough))
1952 ++ return -EPERM;
1953 ++
1954 ++ info->passthrough = new.passthrough;
1955 ++ }
1956 ++
1957 ++ return 0;
1958 ++}
1959 ++
1960 ++static const struct super_operations shiftfs_super_ops = {
1961 ++ .put_super = shiftfs_put_super,
1962 ++ .show_options = shiftfs_show_options,
1963 ++ .statfs = shiftfs_statfs,
1964 ++ .remount_fs = shiftfs_remount,
1965 ++ .evict_inode = shiftfs_evict_inode,
1966 ++};
1967 ++
1968 ++struct shiftfs_data {
1969 ++ void *data;
1970 ++ const char *path;
1971 ++};
1972 ++
1973 ++static void shiftfs_super_force_flags(struct super_block *sb,
1974 ++ unsigned long lower_flags)
1975 ++{
1976 ++ sb->s_flags |= lower_flags & (SB_RDONLY | SB_NOSUID | SB_NODEV |
1977 ++ SB_NOEXEC | SB_NOATIME | SB_NODIRATIME);
1978 ++
1979 ++ if (!(lower_flags & SB_POSIXACL))
1980 ++ sb->s_flags &= ~SB_POSIXACL;
1981 ++}
1982 ++
1983 ++static int shiftfs_fill_super(struct super_block *sb, void *raw_data,
1984 ++ int silent)
1985 ++{
1986 ++ int err;
1987 ++ struct path path = {};
1988 ++ struct shiftfs_super_info *sbinfo_mp;
1989 ++ char *name = NULL;
1990 ++ struct inode *inode = NULL;
1991 ++ struct dentry *dentry = NULL;
1992 ++ struct shiftfs_data *data = raw_data;
1993 ++ struct shiftfs_super_info *sbinfo = NULL;
1994 ++
1995 ++ if (!data->path)
1996 ++ return -EINVAL;
1997 ++
1998 ++ sb->s_fs_info = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
1999 ++ if (!sb->s_fs_info)
2000 ++ return -ENOMEM;
2001 ++ sbinfo = sb->s_fs_info;
2002 ++
2003 ++ err = shiftfs_parse_mount_options(sbinfo, data->data);
2004 ++ if (err)
2005 ++ return err;
2006 ++
2007 ++ /* to mount a mark, must be userns admin */
2008 ++ if (!sbinfo->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
2009 ++ return -EPERM;
2010 ++
2011 ++ name = kstrdup(data->path, GFP_KERNEL);
2012 ++ if (!name)
2013 ++ return -ENOMEM;
2014 ++
2015 ++ err = kern_path(name, LOOKUP_FOLLOW, &path);
2016 ++ if (err)
2017 ++ goto out_free_name;
2018 ++
2019 ++ if (!S_ISDIR(path.dentry->d_inode->i_mode)) {
2020 ++ err = -ENOTDIR;
2021 ++ goto out_put_path;
2022 ++ }
2023 ++
2024 ++ sb->s_flags |= SB_POSIXACL;
2025 ++
2026 ++ if (sbinfo->mark) {
2027 ++ struct cred *cred_tmp;
2028 ++ struct super_block *lower_sb = path.mnt->mnt_sb;
2029 ++
2030 ++ /* to mark a mount point, must root wrt lower s_user_ns */
2031 ++ if (!ns_capable(lower_sb->s_user_ns, CAP_SYS_ADMIN)) {
2032 ++ err = -EPERM;
2033 ++ goto out_put_path;
2034 ++ }
2035 ++
2036 ++ /*
2037 ++ * this part is visible unshifted, so make sure no
2038 ++ * executables that could be used to give suid
2039 ++ * privileges
2040 ++ */
2041 ++ sb->s_iflags = SB_I_NOEXEC;
2042 ++
2043 ++ shiftfs_super_force_flags(sb, lower_sb->s_flags);
2044 ++
2045 ++ /*
2046 ++ * Handle nesting of shiftfs mounts by referring this mark
2047 ++ * mount back to the original mark mount. This is more
2048 ++ * efficient and alleviates concerns about stack depth.
2049 ++ */
2050 ++ if (lower_sb->s_magic == SHIFTFS_MAGIC) {
2051 ++ sbinfo_mp = lower_sb->s_fs_info;
2052 ++
2053 ++ /* Doesn't make sense to mark a mark mount */
2054 ++ if (sbinfo_mp->mark) {
2055 ++ err = -EINVAL;
2056 ++ goto out_put_path;
2057 ++ }
2058 ++
2059 ++ if (!passthrough_is_subset(sbinfo_mp->passthrough,
2060 ++ sbinfo->passthrough)) {
2061 ++ err = -EPERM;
2062 ++ goto out_put_path;
2063 ++ }
2064 ++
2065 ++ sbinfo->mnt = mntget(sbinfo_mp->mnt);
2066 ++ dentry = dget(path.dentry->d_fsdata);
2067 ++ /*
2068 ++ * Copy up the passthrough mount options from the
2069 ++ * parent mark mountpoint.
2070 ++ */
2071 ++ sbinfo->passthrough_mark = sbinfo_mp->passthrough_mark;
2072 ++ sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
2073 ++ } else {
2074 ++ sbinfo->mnt = mntget(path.mnt);
2075 ++ dentry = dget(path.dentry);
2076 ++ /*
2077 ++ * For a new mark passthrough_mark and passthrough
2078 ++ * are identical.
2079 ++ */
2080 ++ sbinfo->passthrough_mark = sbinfo->passthrough;
2081 ++
2082 ++ cred_tmp = prepare_creds();
2083 ++ if (!cred_tmp) {
2084 ++ err = -ENOMEM;
2085 ++ goto out_put_path;
2086 ++ }
2087 ++ /* Don't override disk quota limits or use reserved space. */
2088 ++ cap_lower(cred_tmp->cap_effective, CAP_SYS_RESOURCE);
2089 ++ sbinfo->creator_cred = cred_tmp;
2090 ++ }
2091 ++ } else {
2092 ++ /*
2093 ++ * This leg executes if we're admin capable in the namespace,
2094 ++ * so be very careful.
2095 ++ */
2096 ++ err = -EPERM;
2097 ++ if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC)
2098 ++ goto out_put_path;
2099 ++
2100 ++ sbinfo_mp = path.dentry->d_sb->s_fs_info;
2101 ++ if (!sbinfo_mp->mark)
2102 ++ goto out_put_path;
2103 ++
2104 ++ if (!passthrough_is_subset(sbinfo_mp->passthrough,
2105 ++ sbinfo->passthrough))
2106 ++ goto out_put_path;
2107 ++
2108 ++ sbinfo->mnt = mntget(sbinfo_mp->mnt);
2109 ++ sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
2110 ++ dentry = dget(path.dentry->d_fsdata);
2111 ++ /*
2112 ++ * Copy up passthrough settings from mark mountpoint so we can
2113 ++ * verify when the overlay wants to remount with different
2114 ++ * passthrough settings.
2115 ++ */
2116 ++ sbinfo->passthrough_mark = sbinfo_mp->passthrough;
2117 ++ shiftfs_super_force_flags(sb, path.mnt->mnt_sb->s_flags);
2118 ++ }
2119 ++
2120 ++ sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1;
2121 ++ if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
2122 ++ printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n");
2123 ++ err = -EINVAL;
2124 ++ goto out_put_path;
2125 ++ }
2126 ++
2127 ++ inode = new_inode(sb);
2128 ++ if (!inode) {
2129 ++ err = -ENOMEM;
2130 ++ goto out_put_path;
2131 ++ }
2132 ++ shiftfs_fill_inode(inode, dentry->d_inode->i_ino, S_IFDIR, 0, dentry);
2133 ++
2134 ++ ihold(dentry->d_inode);
2135 ++ inode->i_private = dentry->d_inode;
2136 ++
2137 ++ sb->s_magic = SHIFTFS_MAGIC;
2138 ++ sb->s_maxbytes = MAX_LFS_FILESIZE;
2139 ++ sb->s_op = &shiftfs_super_ops;
2140 ++ sb->s_xattr = shiftfs_xattr_handlers;
2141 ++ sb->s_d_op = &shiftfs_dentry_ops;
2142 ++ sb->s_root = d_make_root(inode);
2143 ++ if (!sb->s_root) {
2144 ++ err = -ENOMEM;
2145 ++ goto out_put_path;
2146 ++ }
2147 ++
2148 ++ sb->s_root->d_fsdata = dentry;
2149 ++ sbinfo->userns = get_user_ns(dentry->d_sb->s_user_ns);
2150 ++ shiftfs_copyattr(dentry->d_inode, sb->s_root->d_inode);
2151 ++
2152 ++ dentry = NULL;
2153 ++ err = 0;
2154 ++
2155 ++out_put_path:
2156 ++ path_put(&path);
2157 ++
2158 ++out_free_name:
2159 ++ kfree(name);
2160 ++
2161 ++ dput(dentry);
2162 ++
2163 ++ return err;
2164 ++}
2165 ++
2166 ++static struct dentry *shiftfs_mount(struct file_system_type *fs_type,
2167 ++ int flags, const char *dev_name, void *data)
2168 ++{
2169 ++ struct shiftfs_data d = { data, dev_name };
2170 ++
2171 ++ return mount_nodev(fs_type, flags, &d, shiftfs_fill_super);
2172 ++}
2173 ++
2174 ++static struct file_system_type shiftfs_type = {
2175 ++ .owner = THIS_MODULE,
2176 ++ .name = "shiftfs",
2177 ++ .mount = shiftfs_mount,
2178 ++ .kill_sb = kill_anon_super,
2179 ++ .fs_flags = FS_USERNS_MOUNT,
2180 ++};
2181 ++
2182 ++static int __init shiftfs_init(void)
2183 ++{
2184 ++ return register_filesystem(&shiftfs_type);
2185 ++}
2186 ++
2187 ++static void __exit shiftfs_exit(void)
2188 ++{
2189 ++ unregister_filesystem(&shiftfs_type);
2190 ++}
2191 ++
2192 ++MODULE_ALIAS_FS("shiftfs");
2193 ++MODULE_AUTHOR("James Bottomley");
2194 ++MODULE_AUTHOR("Seth Forshee <seth.forshee@×××××××××.com>");
2195 ++MODULE_AUTHOR("Christian Brauner <christian.brauner@××××××.com>");
2196 ++MODULE_DESCRIPTION("id shifting filesystem");
2197 ++MODULE_LICENSE("GPL v2");
2198 ++module_init(shiftfs_init)
2199 ++module_exit(shiftfs_exit)
2200 +--- a/include/uapi/linux/magic.h 2021-01-06 19:08:45.234777659 -0500
2201 ++++ b/include/uapi/linux/magic.h 2021-01-06 19:09:53.900375394 -0500
2202 +@@ -96,4 +96,6 @@
2203 + #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */
2204 + #define Z3FOLD_MAGIC 0x33
2205 +
2206 ++#define SHIFTFS_MAGIC 0x6a656a62
2207 ++
2208 + #endif /* __LINUX_MAGIC_H__ */
2209 +--- a/fs/Makefile 2021-01-08 18:08:28.187064015 -0500
2210 ++++ b/fs/Makefile 2021-01-08 18:09:00.788217579 -0500
2211 +@@ -136,3 +136,4 @@ obj-$(CONFIG_EFIVAR_FS) += efivarfs/
2212 + obj-$(CONFIG_EROFS_FS) += erofs/
2213 + obj-$(CONFIG_VBOXSF_FS) += vboxsf/
2214 + obj-$(CONFIG_ZONEFS_FS) += zonefs/
2215 ++obj-$(CONFIG_SHIFT_FS) += shiftfs.o
2216 +--- a/fs/Kconfig 2021-01-06 19:14:17.709697891 -0500
2217 ++++ b/fs/Kconfig 2021-01-06 19:15:23.413281282 -0500
2218 +@@ -122,6 +122,24 @@ source "fs/autofs/Kconfig"
2219 + source "fs/fuse/Kconfig"
2220 + source "fs/overlayfs/Kconfig"
2221 +
2222 ++config SHIFT_FS
2223 ++ tristate "UID/GID shifting overlay filesystem for containers"
2224 ++ help
2225 ++ This filesystem can overlay any mounted filesystem and shift
2226 ++ the uid/gid the files appear at. The idea is that
2227 ++ unprivileged containers can use this to mount root volumes
2228 ++ using this technique.
2229 ++
2230 ++config SHIFT_FS_POSIX_ACL
2231 ++ bool "shiftfs POSIX Access Control Lists"
2232 ++ depends on SHIFT_FS
2233 ++ select FS_POSIX_ACL
2234 ++ help
2235 ++ POSIX Access Control Lists (ACLs) support permissions for users and
2236 ++ groups beyond the owner/group/world scheme.
2237 ++
2238 ++ If you don't know what Access Control Lists are, say N.
2239 ++
2240 + menu "Caches"
2241 +
2242 + source "fs/fscache/Kconfig"