diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/autofs/autofs_i.h | 2 | ||||
| -rw-r--r-- | fs/fsopen.c | 2 | ||||
| -rw-r--r-- | fs/internal.h | 1 | ||||
| -rw-r--r-- | fs/mnt_idmapping.c | 51 | ||||
| -rw-r--r-- | fs/mount.h | 26 | ||||
| -rw-r--r-- | fs/namespace.c | 485 | ||||
| -rw-r--r-- | fs/notify/fanotify/fanotify.c | 38 | ||||
| -rw-r--r-- | fs/notify/fanotify/fanotify.h | 18 | ||||
| -rw-r--r-- | fs/notify/fanotify/fanotify_user.c | 89 | ||||
| -rw-r--r-- | fs/notify/fdinfo.c | 5 | ||||
| -rw-r--r-- | fs/notify/fsnotify.c | 47 | ||||
| -rw-r--r-- | fs/notify/fsnotify.h | 11 | ||||
| -rw-r--r-- | fs/notify/mark.c | 14 | ||||
| -rw-r--r-- | fs/pnode.c | 4 |
14 files changed, 642 insertions, 151 deletions
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index 77c7991d89aa..23cea74f9933 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -218,6 +218,8 @@ void autofs_clean_ino(struct autofs_info *); static inline int autofs_check_pipe(struct file *pipe) { + if (pipe->f_mode & FMODE_PATH) + return -EINVAL; if (!(pipe->f_mode & FMODE_CAN_WRITE)) return -EINVAL; if (!S_ISFIFO(file_inode(pipe)->i_mode)) diff --git a/fs/fsopen.c b/fs/fsopen.c index 094a7f510edf..1aaf4cb2afb2 100644 --- a/fs/fsopen.c +++ b/fs/fsopen.c @@ -453,7 +453,7 @@ SYSCALL_DEFINE5(fsconfig, case FSCONFIG_SET_FD: param.type = fs_value_is_file; ret = -EBADF; - param.file = fget(aux); + param.file = fget_raw(aux); if (!param.file) goto out_key; param.dirfd = aux; diff --git a/fs/internal.h b/fs/internal.h index 82127c69e641..4c19d15ef08a 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -338,3 +338,4 @@ static inline bool path_mounted(const struct path *path) } void file_f_owner_release(struct file *file); bool file_seek_cur_needs_f_lock(struct file *file); +int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map); diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c index 7b1df8cc2821..a37991fdb194 100644 --- a/fs/mnt_idmapping.c +++ b/fs/mnt_idmapping.c @@ -6,6 +6,7 @@ #include <linux/mnt_idmapping.h> #include <linux/slab.h> #include <linux/user_namespace.h> +#include <linux/seq_file.h> #include "internal.h" @@ -334,3 +335,53 @@ void mnt_idmap_put(struct mnt_idmap *idmap) free_mnt_idmap(idmap); } EXPORT_SYMBOL_GPL(mnt_idmap_put); + +int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map) +{ + struct uid_gid_map *map, *map_up; + u32 idx, nr_mappings; + + if (!is_valid_mnt_idmap(idmap)) + return 0; + + /* + * Idmappings are shown relative to the caller's idmapping. + * This is both the most intuitive and most useful solution. + */ + if (uid_map) { + map = &idmap->uid_map; + map_up = ¤t_user_ns()->uid_map; + } else { + map = &idmap->gid_map; + map_up = ¤t_user_ns()->gid_map; + } + + for (idx = 0, nr_mappings = 0; idx < map->nr_extents; idx++) { + uid_t lower; + struct uid_gid_extent *extent; + + if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) + extent = &map->extent[idx]; + else + extent = &map->forward[idx]; + + /* + * Verify that the whole range of the mapping can be + * resolved in the caller's idmapping. If it cannot be + * resolved skip the mapping. + */ + lower = map_id_range_up(map_up, extent->lower_first, extent->count); + if (lower == (uid_t) -1) + continue; + + seq_printf(seq, "%u %u %u", extent->first, lower, extent->count); + + seq->count++; /* mappings are separated by \0 */ + if (seq_has_overflowed(seq)) + return -EAGAIN; + + nr_mappings++; + } + + return nr_mappings; +} diff --git a/fs/mount.h b/fs/mount.h index ffb613cdfeee..946dc8b792d7 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -5,6 +5,8 @@ #include <linux/ns_common.h> #include <linux/fs_pin.h> +extern struct list_head notify_list; + struct mnt_namespace { struct ns_common ns; struct mount * root; @@ -21,6 +23,10 @@ struct mnt_namespace { struct rcu_head mnt_ns_rcu; }; u64 event; +#ifdef CONFIG_FSNOTIFY + __u32 n_fsnotify_mask; + struct fsnotify_mark_connector __rcu *n_fsnotify_marks; +#endif unsigned int nr_mounts; /* # of mounts in the namespace */ unsigned int pending_mounts; struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */ @@ -76,6 +82,8 @@ struct mount { #ifdef CONFIG_FSNOTIFY struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks; __u32 mnt_fsnotify_mask; + struct list_head to_notify; /* need to queue notification */ + struct mnt_namespace *prev_ns; /* previous namespace (NULL if none) */ #endif int mnt_id; /* mount identifier, reused */ u64 mnt_id_unique; /* mount ID unique until reboot */ @@ -177,3 +185,21 @@ static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns) { return container_of(ns, struct mnt_namespace, ns); } + +#ifdef CONFIG_FSNOTIFY +static inline void mnt_notify_add(struct mount *m) +{ + /* Optimize the case where there are no watches */ + if ((m->mnt_ns && m->mnt_ns->n_fsnotify_marks) || + (m->prev_ns && m->prev_ns->n_fsnotify_marks)) + list_add_tail(&m->to_notify, ¬ify_list); + else + m->prev_ns = m->mnt_ns; +} +#else +static inline void mnt_notify_add(struct mount *m) +{ +} +#endif + +struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry); diff --git a/fs/namespace.c b/fs/namespace.c index 8f1000f9f3df..4c8bd48c8a62 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -81,15 +81,23 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ static DEFINE_SEQLOCK(mnt_ns_tree_lock); +#ifdef CONFIG_FSNOTIFY +LIST_HEAD(notify_list); /* protected by namespace_sem */ +#endif static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */ static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */ +enum mount_kattr_flags_t { + MOUNT_KATTR_RECURSE = (1 << 0), + MOUNT_KATTR_IDMAP_REPLACE = (1 << 1), +}; + struct mount_kattr { unsigned int attr_set; unsigned int attr_clr; unsigned int propagation; unsigned int lookup_flags; - bool recurse; + enum mount_kattr_flags_t kflags; struct user_namespace *mnt_userns; struct mnt_idmap *mnt_idmap; }; @@ -163,6 +171,7 @@ static void mnt_ns_release(struct mnt_namespace *ns) { /* keep alive for {list,stat}mount() */ if (refcount_dec_and_test(&ns->passive)) { + fsnotify_mntns_delete(ns); put_user_ns(ns->user_ns); kfree(ns); } @@ -1176,6 +1185,8 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt) ns->mnt_first_node = &mnt->mnt_node; rb_link_node(&mnt->mnt_node, parent, link); rb_insert_color(&mnt->mnt_node, &ns->mounts); + + mnt_notify_add(mnt); } /* @@ -1723,6 +1734,50 @@ int may_umount(struct vfsmount *mnt) EXPORT_SYMBOL(may_umount); +#ifdef CONFIG_FSNOTIFY +static void mnt_notify(struct mount *p) +{ + if (!p->prev_ns && p->mnt_ns) { + fsnotify_mnt_attach(p->mnt_ns, &p->mnt); + } else if (p->prev_ns && !p->mnt_ns) { + fsnotify_mnt_detach(p->prev_ns, &p->mnt); + } else if (p->prev_ns == p->mnt_ns) { + fsnotify_mnt_move(p->mnt_ns, &p->mnt); + } else { + fsnotify_mnt_detach(p->prev_ns, &p->mnt); + fsnotify_mnt_attach(p->mnt_ns, &p->mnt); + } + p->prev_ns = p->mnt_ns; +} + +static void notify_mnt_list(void) +{ + struct mount *m, *tmp; + /* + * Notify about mounts that were added/reparented/detached/remain + * connected after unmount. + */ + list_for_each_entry_safe(m, tmp, ¬ify_list, to_notify) { + mnt_notify(m); + list_del_init(&m->to_notify); + } +} + +static bool need_notify_mnt_list(void) +{ + return !list_empty(¬ify_list); +} +#else +static void notify_mnt_list(void) +{ +} + +static bool need_notify_mnt_list(void) +{ + return false; +} +#endif + static void namespace_unlock(void) { struct hlist_head head; @@ -1733,7 +1788,18 @@ static void namespace_unlock(void) hlist_move_list(&unmounted, &head); list_splice_init(&ex_mountpoints, &list); - up_write(&namespace_sem); + if (need_notify_mnt_list()) { + /* + * No point blocking out concurrent readers while notifications + * are sent. This will also allow statmount()/listmount() to run + * concurrently. + */ + downgrade_write(&namespace_sem); + notify_mnt_list(); + up_read(&namespace_sem); + } else { + up_write(&namespace_sem); + } shrink_dentry_list(&list); @@ -1846,6 +1912,19 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) change_mnt_propagation(p, MS_PRIVATE); if (disconnect) hlist_add_head(&p->mnt_umount, &unmounted); + + /* + * At this point p->mnt_ns is NULL, notification will be queued + * only if + * + * - p->prev_ns is non-NULL *and* + * - p->prev_ns->n_fsnotify_marks is non-NULL + * + * This will preclude queuing the mount if this is a cleanup + * after a failed copy_tree() or destruction of an anonymous + * namespace, etc. + */ + mnt_notify_add(p); } } @@ -2026,6 +2105,7 @@ static void warn_mandlock(void) static int can_umount(const struct path *path, int flags) { struct mount *mnt = real_mount(path->mnt); + struct super_block *sb = path->dentry->d_sb; if (!may_mount()) return -EPERM; @@ -2035,7 +2115,7 @@ static int can_umount(const struct path *path, int flags) return -EINVAL; if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */ return -EINVAL; - if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) + if (flags & MNT_FORCE && !ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; return 0; } @@ -2145,16 +2225,24 @@ struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool pr } } +struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry) +{ + if (!is_mnt_ns_file(dentry)) + return NULL; + + return to_mnt_ns(get_proc_ns(dentry->d_inode)); +} + static bool mnt_ns_loop(struct dentry *dentry) { /* Could bind mounting the mount namespace inode cause a * mount namespace loop? */ - struct mnt_namespace *mnt_ns; - if (!is_mnt_ns_file(dentry)) + struct mnt_namespace *mnt_ns = mnt_ns_from_dentry(dentry); + + if (!mnt_ns) return false; - mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode)); return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; } @@ -2287,6 +2375,28 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry) return false; } +/* + * Check that there aren't references to earlier/same mount namespaces in the + * specified subtree. Such references can act as pins for mount namespaces + * that aren't checked by the mount-cycle checking code, thereby allowing + * cycles to be made. + */ +static bool check_for_nsfs_mounts(struct mount *subtree) +{ + struct mount *p; + bool ret = false; + + lock_mount_hash(); + for (p = subtree; p; p = next_mnt(p, subtree)) + if (mnt_ns_loop(p->mnt.mnt_root)) + goto out; + + ret = true; +out: + unlock_mount_hash(); + return ret; +} + /** * clone_private_mount - create a private clone of a path * @path: path to clone @@ -2295,6 +2405,8 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry) * will not be attached anywhere in the namespace and will be private (i.e. * changes to the originating mount won't be propagated into this). * + * This assumes caller has called or done the equivalent of may_mount(). + * * Release with mntput(). */ struct vfsmount *clone_private_mount(const struct path *path) @@ -2302,30 +2414,36 @@ struct vfsmount *clone_private_mount(const struct path *path) struct mount *old_mnt = real_mount(path->mnt); struct mount *new_mnt; - down_read(&namespace_sem); + scoped_guard(rwsem_read, &namespace_sem) if (IS_MNT_UNBINDABLE(old_mnt)) - goto invalid; + return ERR_PTR(-EINVAL); + + if (mnt_has_parent(old_mnt)) { + if (!check_mnt(old_mnt)) + return ERR_PTR(-EINVAL); + } else { + if (!is_mounted(&old_mnt->mnt)) + return ERR_PTR(-EINVAL); - if (!check_mnt(old_mnt)) - goto invalid; + /* Make sure this isn't something purely kernel internal. */ + if (!is_anon_ns(old_mnt->mnt_ns)) + return ERR_PTR(-EINVAL); + + /* Make sure we don't create mount namespace loops. */ + if (!check_for_nsfs_mounts(old_mnt)) + return ERR_PTR(-EINVAL); + } if (has_locked_children(old_mnt, path->dentry)) - goto invalid; + return ERR_PTR(-EINVAL); new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); - up_read(&namespace_sem); - if (IS_ERR(new_mnt)) - return ERR_CAST(new_mnt); + return ERR_PTR(-EINVAL); /* Longterm mount to be removed by kern_unmount*() */ new_mnt->mnt_ns = MNT_NS_INTERNAL; - return &new_mnt->mnt; - -invalid: - up_read(&namespace_sem); - return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(clone_private_mount); @@ -2547,6 +2665,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, dest_mp = smp; unhash_mnt(source_mnt); attach_mnt(source_mnt, top_mnt, dest_mp, beneath); + mnt_notify_add(source_mnt); touch_mnt_namespace(source_mnt->mnt_ns); } else { if (source_mnt->mnt_ns) { @@ -2889,24 +3008,22 @@ static struct file *open_detached_copy(struct path *path, bool recursive) return file; } -SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags) +static struct file *vfs_open_tree(int dfd, const char __user *filename, unsigned int flags) { - struct file *file; - struct path path; + int ret; + struct path path __free(path_put) = {}; int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; bool detached = flags & OPEN_TREE_CLONE; - int error; - int fd; BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC); if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE | AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC)) - return -EINVAL; + return ERR_PTR(-EINVAL); if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE) - return -EINVAL; + return ERR_PTR(-EINVAL); if (flags & AT_NO_AUTOMOUNT) lookup_flags &= ~LOOKUP_AUTOMOUNT; @@ -2916,27 +3033,32 @@ SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, fl lookup_flags |= LOOKUP_EMPTY; if (detached && !may_mount()) - return -EPERM; + return ERR_PTR(-EPERM); + + ret = user_path_at(dfd, filename, lookup_flags, &path); + if (unlikely(ret)) + return ERR_PTR(ret); + + if (detached) + return open_detached_copy(&path, flags & AT_RECURSIVE); + + return dentry_open(&path, O_PATH, current_cred()); +} + +SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags) +{ + int fd; + struct file *file __free(fput) = NULL; + + file = vfs_open_tree(dfd, filename, flags); + if (IS_ERR(file)) + return PTR_ERR(file); fd = get_unused_fd_flags(flags & O_CLOEXEC); if (fd < 0) return fd; - error = user_path_at(dfd, filename, lookup_flags, &path); - if (unlikely(error)) { - file = ERR_PTR(error); - } else { - if (detached) - file = open_detached_copy(&path, flags & AT_RECURSIVE); - else - file = dentry_open(&path, O_PATH, current_cred()); - path_put(&path); - } - if (IS_ERR(file)) { - put_unused_fd(fd); - return PTR_ERR(file); - } - fd_install(fd, file); + fd_install(fd, no_free_ptr(file)); return fd; } @@ -3123,28 +3245,6 @@ static inline int tree_contains_unbindable(struct mount *mnt) return 0; } -/* - * Check that there aren't references to earlier/same mount namespaces in the - * specified subtree. Such references can act as pins for mount namespaces - * that aren't checked by the mount-cycle checking code, thereby allowing - * cycles to be made. - */ -static bool check_for_nsfs_mounts(struct mount *subtree) -{ - struct mount *p; - bool ret = false; - - lock_mount_hash(); - for (p = subtree; p; p = next_mnt(p, subtree)) - if (mnt_ns_loop(p->mnt.mnt_root)) - goto out; - - ret = true; -out: - unlock_mount_hash(); - return ret; -} - static int do_set_group(struct path *from_path, struct path *to_path) { struct mount *from, *to; @@ -4468,6 +4568,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, list_del_init(&new_mnt->mnt_expire); put_mountpoint(root_mp); unlock_mount_hash(); + mnt_notify_add(root_mnt); + mnt_notify_add(new_mnt); chroot_fs_refs(&root, &new); error = 0; out4: @@ -4512,11 +4614,10 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) return -EINVAL; /* - * Once a mount has been idmapped we don't allow it to change its - * mapping. It makes things simpler and callers can just create - * another bind-mount they can idmap if they want to. + * We only allow an mount to change it's idmapping if it has + * never been accessible to userspace. */ - if (is_idmapped_mnt(m)) + if (!(kattr->kflags & MOUNT_KATTR_IDMAP_REPLACE) && is_idmapped_mnt(m)) return -EPERM; /* The underlying filesystem doesn't support idmapped mounts yet. */ @@ -4576,7 +4677,7 @@ static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt) break; } - if (!kattr->recurse) + if (!(kattr->kflags & MOUNT_KATTR_RECURSE)) return 0; } @@ -4606,18 +4707,16 @@ static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt) static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) { + struct mnt_idmap *old_idmap; + if (!kattr->mnt_idmap) return; - /* - * Pairs with smp_load_acquire() in mnt_idmap(). - * - * Since we only allow a mount to change the idmapping once and - * verified this in can_idmap_mount() we know that the mount has - * @nop_mnt_idmap attached to it. So there's no need to drop any - * references. - */ + old_idmap = mnt_idmap(&mnt->mnt); + + /* Pairs with smp_load_acquire() in mnt_idmap(). */ smp_store_release(&mnt->mnt.mnt_idmap, mnt_idmap_get(kattr->mnt_idmap)); + mnt_idmap_put(old_idmap); } static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt) @@ -4637,7 +4736,7 @@ static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt) if (kattr->propagation) change_mnt_propagation(m, kattr->propagation); - if (!kattr->recurse) + if (!(kattr->kflags & MOUNT_KATTR_RECURSE)) break; } touch_mnt_namespace(mnt->mnt_ns); @@ -4667,7 +4766,7 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr) */ namespace_lock(); if (kattr->propagation == MS_SHARED) { - err = invent_group_ids(mnt, kattr->recurse); + err = invent_group_ids(mnt, kattr->kflags & MOUNT_KATTR_RECURSE); if (err) { namespace_unlock(); return err; @@ -4718,7 +4817,7 @@ out: } static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, - struct mount_kattr *kattr, unsigned int flags) + struct mount_kattr *kattr) { struct ns_common *ns; struct user_namespace *mnt_userns; @@ -4726,13 +4825,23 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP)) return 0; - /* - * We currently do not support clearing an idmapped mount. If this ever - * is a use-case we can revisit this but for now let's keep it simple - * and not allow it. - */ - if (attr->attr_clr & MOUNT_ATTR_IDMAP) - return -EINVAL; + if (attr->attr_clr & MOUNT_ATTR_IDMAP) { + /* + * We can only remove an idmapping if it's never been + * exposed to userspace. + */ + if (!(kattr->kflags & MOUNT_KATTR_IDMAP_REPLACE)) + return -EINVAL; + + /* + * Removal of idmappings is equivalent to setting + * nop_mnt_idmap. + */ + if (!(attr->attr_set & MOUNT_ATTR_IDMAP)) { + kattr->mnt_idmap = &nop_mnt_idmap; + return 0; + } + } if (attr->userns_fd > INT_MAX) return -EINVAL; @@ -4769,22 +4878,8 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, } static int build_mount_kattr(const struct mount_attr *attr, size_t usize, - struct mount_kattr *kattr, unsigned int flags) + struct mount_kattr *kattr) { - unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; - - if (flags & AT_NO_AUTOMOUNT) - lookup_flags &= ~LOOKUP_AUTOMOUNT; - if (flags & AT_SYMLINK_NOFOLLOW) - lookup_flags &= ~LOOKUP_FOLLOW; - if (flags & AT_EMPTY_PATH) - lookup_flags |= LOOKUP_EMPTY; - - *kattr = (struct mount_kattr) { - .lookup_flags = lookup_flags, - .recurse = !!(flags & AT_RECURSIVE), - }; - if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS) return -EINVAL; if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1) @@ -4832,35 +4927,28 @@ static int build_mount_kattr(const struct mount_attr *attr, size_t usize, return -EINVAL; } - return build_mount_idmapped(attr, usize, kattr, flags); + return build_mount_idmapped(attr, usize, kattr); } static void finish_mount_kattr(struct mount_kattr *kattr) { - put_user_ns(kattr->mnt_userns); - kattr->mnt_userns = NULL; + if (kattr->mnt_userns) { + put_user_ns(kattr->mnt_userns); + kattr->mnt_userns = NULL; + } if (kattr->mnt_idmap) mnt_idmap_put(kattr->mnt_idmap); } -SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, - unsigned int, flags, struct mount_attr __user *, uattr, - size_t, usize) +static int copy_mount_setattr(struct mount_attr __user *uattr, size_t usize, + struct mount_kattr *kattr) { - int err; - struct path target; + int ret; struct mount_attr attr; - struct mount_kattr kattr; BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0); - if (flags & ~(AT_EMPTY_PATH | - AT_RECURSIVE | - AT_SYMLINK_NOFOLLOW | - AT_NO_AUTOMOUNT)) - return -EINVAL; - if (unlikely(usize > PAGE_SIZE)) return -E2BIG; if (unlikely(usize < MOUNT_ATTR_SIZE_VER0)) @@ -4869,9 +4957,9 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, if (!may_mount()) return -EPERM; - err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize); - if (err) - return err; + ret = copy_struct_from_user(&attr, sizeof(attr), uattr, usize); + if (ret) + return ret; /* Don't bother walking through the mounts if this is a nop. */ if (attr.attr_set == 0 && @@ -4879,7 +4967,39 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, attr.propagation == 0) return 0; - err = build_mount_kattr(&attr, usize, &kattr, flags); + return build_mount_kattr(&attr, usize, kattr); +} + +SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, + unsigned int, flags, struct mount_attr __user *, uattr, + size_t, usize) +{ + int err; + struct path target; + struct mount_kattr kattr; + unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; + + if (flags & ~(AT_EMPTY_PATH | + AT_RECURSIVE | + AT_SYMLINK_NOFOLLOW | + AT_NO_AUTOMOUNT)) + return -EINVAL; + + if (flags & AT_NO_AUTOMOUNT) + lookup_flags &= ~LOOKUP_AUTOMOUNT; + if (flags & AT_SYMLINK_NOFOLLOW) + lookup_flags &= ~LOOKUP_FOLLOW; + if (flags & AT_EMPTY_PATH) + lookup_flags |= LOOKUP_EMPTY; + + kattr = (struct mount_kattr) { + .lookup_flags = lookup_flags, + }; + + if (flags & AT_RECURSIVE) + kattr.kflags |= MOUNT_KATTR_RECURSE; + + err = copy_mount_setattr(uattr, usize, &kattr); if (err) return err; @@ -4892,6 +5012,47 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, return err; } +SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename, + unsigned, flags, struct mount_attr __user *, uattr, + size_t, usize) +{ + struct file __free(fput) *file = NULL; + int fd; + + if (!uattr && usize) + return -EINVAL; + + file = vfs_open_tree(dfd, filename, flags); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (uattr) { + int ret; + struct mount_kattr kattr = {}; + + kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE; + if (flags & AT_RECURSIVE) + kattr.kflags |= MOUNT_KATTR_RECURSE; + + ret = copy_mount_setattr(uattr, usize, &kattr); + if (ret) + return ret; + + ret = do_mount_setattr(&file->f_path, &kattr); + if (ret) + return ret; + + finish_mount_kattr(&kattr); + } + + fd = get_unused_fd_flags(flags & O_CLOEXEC); + if (fd < 0) + return fd; + + fd_install(fd, no_free_ptr(file)); + return fd; +} + int show_path(struct seq_file *m, struct dentry *root) { if (root->d_sb->s_op->show_path) @@ -4915,6 +5076,7 @@ struct kstatmount { struct statmount __user *buf; size_t bufsize; struct vfsmount *mnt; + struct mnt_idmap *idmap; u64 mask; struct path root; struct statmount sm; @@ -5184,6 +5346,46 @@ static int statmount_opt_sec_array(struct kstatmount *s, struct seq_file *seq) return 0; } +static inline int statmount_mnt_uidmap(struct kstatmount *s, struct seq_file *seq) +{ + int ret; + + ret = statmount_mnt_idmap(s->idmap, seq, true); + if (ret < 0) + return ret; + + s->sm.mnt_uidmap_num = ret; + /* + * Always raise STATMOUNT_MNT_UIDMAP even if there are no valid + * mappings. This allows userspace to distinguish between a + * non-idmapped mount and an idmapped mount where none of the + * individual mappings are valid in the caller's idmapping. + */ + if (is_valid_mnt_idmap(s->idmap)) + s->sm.mask |= STATMOUNT_MNT_UIDMAP; + return 0; +} + +static inline int statmount_mnt_gidmap(struct kstatmount *s, struct seq_file *seq) +{ + int ret; + + ret = statmount_mnt_idmap(s->idmap, seq, false); + if (ret < 0) + return ret; + + s->sm.mnt_gidmap_num = ret; + /* + * Always raise STATMOUNT_MNT_GIDMAP even if there are no valid + * mappings. This allows userspace to distinguish between a + * non-idmapped mount and an idmapped mount where none of the + * individual mappings are valid in the caller's idmapping. + */ + if (is_valid_mnt_idmap(s->idmap)) + s->sm.mask |= STATMOUNT_MNT_GIDMAP; + return 0; +} + static int statmount_string(struct kstatmount *s, u64 flag) { int ret = 0; @@ -5231,6 +5433,14 @@ static int statmount_string(struct kstatmount *s, u64 flag) offp = &sm->sb_source; ret = statmount_sb_source(s, seq); break; + case STATMOUNT_MNT_UIDMAP: + sm->mnt_uidmap = start; + ret = statmount_mnt_uidmap(s, seq); + break; + case STATMOUNT_MNT_GIDMAP: + sm->mnt_gidmap = start; + ret = statmount_mnt_gidmap(s, seq); + break; default: WARN_ON_ONCE(true); return -EINVAL; @@ -5323,6 +5533,21 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root) return 0; } +/* This must be updated whenever a new flag is added */ +#define STATMOUNT_SUPPORTED (STATMOUNT_SB_BASIC | \ + STATMOUNT_MNT_BASIC | \ + STATMOUNT_PROPAGATE_FROM | \ + STATMOUNT_MNT_ROOT | \ + STATMOUNT_MNT_POINT | \ + STATMOUNT_FS_TYPE | \ + STATMOUNT_MNT_NS_ID | \ + STATMOUNT_MNT_OPTS | \ + STATMOUNT_FS_SUBTYPE | \ + STATMOUNT_SB_SOURCE | \ + STATMOUNT_OPT_ARRAY | \ + STATMOUNT_OPT_SEC_ARRAY | \ + STATMOUNT_SUPPORTED_MASK) + static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, struct mnt_namespace *ns) { @@ -5356,6 +5581,7 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, return err; s->root = root; + s->idmap = mnt_idmap(s->mnt); if (s->mask & STATMOUNT_SB_BASIC) statmount_sb_basic(s); @@ -5389,12 +5615,26 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, if (!err && s->mask & STATMOUNT_SB_SOURCE) err = statmount_string(s, STATMOUNT_SB_SOURCE); + if (!err && s->mask & STATMOUNT_MNT_UIDMAP) + err = statmount_string(s, STATMOUNT_MNT_UIDMAP); + + if (!err && s->mask & STATMOUNT_MNT_GIDMAP) + err = statmount_string(s, STATMOUNT_MNT_GIDMAP); + if (!err && s->mask & STATMOUNT_MNT_NS_ID) statmount_mnt_ns_id(s, ns); + if (!err && s->mask & STATMOUNT_SUPPORTED_MASK) { + s->sm.mask |= STATMOUNT_SUPPORTED_MASK; + s->sm.supported_mask = STATMOUNT_SUPPORTED; + } + if (err) return err; + /* Are there bits in the return mask not present in STATMOUNT_SUPPORTED? */ + WARN_ON_ONCE(~STATMOUNT_SUPPORTED & s->sm.mask); + return 0; } @@ -5412,7 +5652,8 @@ static inline bool retry_statmount(const long ret, size_t *seq_size) #define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \ STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS | \ STATMOUNT_FS_SUBTYPE | STATMOUNT_SB_SOURCE | \ - STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY) + STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY | \ + STATMOUNT_MNT_UIDMAP | STATMOUNT_MNT_GIDMAP) static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq, struct statmount __user *buf, size_t bufsize, diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 95646f7c46ca..6d386080faf2 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -166,6 +166,8 @@ static bool fanotify_should_merge(struct fanotify_event *old, case FANOTIFY_EVENT_TYPE_FS_ERROR: return fanotify_error_event_equal(FANOTIFY_EE(old), FANOTIFY_EE(new)); + case FANOTIFY_EVENT_TYPE_MNT: + return false; default: WARN_ON_ONCE(1); } @@ -312,7 +314,10 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n", __func__, iter_info->report_mask, event_mask, data, data_type); - if (!fid_mode) { + if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) { + if (data_type != FSNOTIFY_EVENT_MNT) + return 0; + } else if (!fid_mode) { /* Do we have path to open a file descriptor? */ if (!path) return 0; @@ -557,6 +562,20 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, return &pevent->fae; } +static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp) +{ + struct fanotify_mnt_event *pevent; + + pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp); + if (!pevent) + return NULL; + + pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT; + pevent->mnt_id = mnt_id; + + return &pevent->fae; +} + static struct fanotify_event *fanotify_alloc_perm_event(const void *data, int data_type, gfp_t gfp) @@ -731,6 +750,7 @@ static struct fanotify_event *fanotify_alloc_event( fid_mode); struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir); const struct path *path = fsnotify_data_path(data, data_type); + u64 mnt_id = fsnotify_data_mnt_id(data, data_type); struct mem_cgroup *old_memcg; struct dentry *moved = NULL; struct inode *child = NULL; @@ -826,8 +846,12 @@ static struct fanotify_event *fanotify_alloc_event( moved, &hash, gfp); } else if (fid_mode) { event = fanotify_alloc_fid_event(id, fsid, &hash, gfp); - } else { + } else if (path) { event = fanotify_alloc_path_event(path, &hash, gfp); + } else if (mnt_id) { + event = fanotify_alloc_mnt_event(mnt_id, gfp); + } else { + WARN_ON_ONCE(1); } if (!event) @@ -927,7 +951,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, BUILD_BUG_ON(FAN_RENAME != FS_RENAME); BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 24); mask = fanotify_group_event_mask(group, iter_info, &match_mask, mask, data, data_type, dir); @@ -1028,6 +1052,11 @@ static void fanotify_free_error_event(struct fsnotify_group *group, mempool_free(fee, &group->fanotify_data.error_events_pool); } +static void fanotify_free_mnt_event(struct fanotify_event *event) +{ + kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event)); +} + static void fanotify_free_event(struct fsnotify_group *group, struct fsnotify_event *fsn_event) { @@ -1054,6 +1083,9 @@ static void fanotify_free_event(struct fsnotify_group *group, case FANOTIFY_EVENT_TYPE_FS_ERROR: fanotify_free_error_event(group, event); break; + case FANOTIFY_EVENT_TYPE_MNT: + fanotify_free_mnt_event(event); + break; default: WARN_ON_ONCE(1); } diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index c12cbc270539..b44e70e44be6 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -9,6 +9,7 @@ extern struct kmem_cache *fanotify_mark_cache; extern struct kmem_cache *fanotify_fid_event_cachep; extern struct kmem_cache *fanotify_path_event_cachep; extern struct kmem_cache *fanotify_perm_event_cachep; +extern struct kmem_cache *fanotify_mnt_event_cachep; /* Possible states of the permission event */ enum { @@ -244,6 +245,7 @@ enum fanotify_event_type { FANOTIFY_EVENT_TYPE_PATH_PERM, FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */ FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */ + FANOTIFY_EVENT_TYPE_MNT, __FANOTIFY_EVENT_TYPE_NUM }; @@ -409,12 +411,23 @@ struct fanotify_path_event { struct path path; }; +struct fanotify_mnt_event { + struct fanotify_event fae; + u64 mnt_id; +}; + static inline struct fanotify_path_event * FANOTIFY_PE(struct fanotify_event *event) { return container_of(event, struct fanotify_path_event, fae); } +static inline struct fanotify_mnt_event * +FANOTIFY_ME(struct fanotify_event *event) +{ + return container_of(event, struct fanotify_mnt_event, fae); +} + /* * Structure for permission fanotify events. It gets allocated and freed in * fanotify_handle_event() since we wait there for user response. When the @@ -466,6 +479,11 @@ static inline bool fanotify_is_error_event(u32 mask) return mask & FAN_FS_ERROR; } +static inline bool fanotify_is_mnt_event(u32 mask) +{ + return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH); +} + static inline const struct path *fanotify_event_path(struct fanotify_event *event) { if (event->type == FANOTIFY_EVENT_TYPE_PATH) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index ba3e2d09eb44..f2d840ae4ded 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -113,6 +113,7 @@ struct kmem_cache *fanotify_mark_cache __ro_after_init; struct kmem_cache *fanotify_fid_event_cachep __ro_after_init; struct kmem_cache *fanotify_path_event_cachep __ro_after_init; struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; +struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init; #define FANOTIFY_EVENT_ALIGN 4 #define FANOTIFY_FID_INFO_HDR_LEN \ @@ -123,6 +124,8 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; (sizeof(struct fanotify_event_info_error)) #define FANOTIFY_RANGE_INFO_LEN \ (sizeof(struct fanotify_event_info_range)) +#define FANOTIFY_MNT_INFO_LEN \ + (sizeof(struct fanotify_event_info_mnt)) static int fanotify_fid_info_len(int fh_len, int name_len) { @@ -178,6 +181,8 @@ static size_t fanotify_event_len(unsigned int info_mode, fh_len = fanotify_event_object_fh_len(event); event_len += fanotify_fid_info_len(fh_len, dot_len); } + if (fanotify_is_mnt_event(event->mask)) + event_len += FANOTIFY_MNT_INFO_LEN; if (info_mode & FAN_REPORT_PIDFD) event_len += FANOTIFY_PIDFD_INFO_LEN; @@ -405,6 +410,25 @@ static int process_access_response(struct fsnotify_group *group, return -ENOENT; } +static size_t copy_mnt_info_to_user(struct fanotify_event *event, + char __user *buf, int count) +{ + struct fanotify_event_info_mnt info = { }; + + info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT; + info.hdr.len = FANOTIFY_MNT_INFO_LEN; + + if (WARN_ON(count < info.hdr.len)) + return -EFAULT; + + info.mnt_id = FANOTIFY_ME(event)->mnt_id; + + if (copy_to_user(buf, &info, sizeof(info))) + return -EFAULT; + + return info.hdr.len; +} + static size_t copy_error_info_to_user(struct fanotify_event *event, char __user *buf, int count) { @@ -700,6 +724,15 @@ static int copy_info_records_to_user(struct fanotify_event *event, total_bytes += ret; } + if (fanotify_is_mnt_event(event->mask)) { + ret = copy_mnt_info_to_user(event, buf, count); + if (ret < 0) + return ret; + buf += ret; + count -= ret; + total_bytes += ret; + } + return total_bytes; } @@ -1508,6 +1541,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID)) return -EINVAL; + /* Don't allow mixing mnt events with inode events for now */ + if (flags & FAN_REPORT_MNT) { + if (class != FAN_CLASS_NOTIF) + return -EINVAL; + if (flags & (FANOTIFY_FID_BITS | FAN_REPORT_FD_ERROR)) + return -EINVAL; + } + if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS) return -EINVAL; @@ -1767,7 +1808,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, int dfd, const char __user *pathname) { struct inode *inode = NULL; - struct vfsmount *mnt = NULL; struct fsnotify_group *group; struct path path; struct fan_fsid __fsid, *fsid = NULL; @@ -1776,7 +1816,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS; unsigned int obj_type, fid_mode; - void *obj; + void *obj = NULL; u32 umask = 0; int ret; @@ -1800,6 +1840,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, case FAN_MARK_FILESYSTEM: obj_type = FSNOTIFY_OBJ_TYPE_SB; break; + case FAN_MARK_MNTNS: + obj_type = FSNOTIFY_OBJ_TYPE_MNTNS; + break; default: return -EINVAL; } @@ -1847,6 +1890,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; group = fd_file(f)->private_data; + /* Only report mount events on mnt namespace */ + if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) { + if (mask & ~FANOTIFY_MOUNT_EVENTS) + return -EINVAL; + if (mark_type != FAN_MARK_MNTNS) + return -EINVAL; + } else { + if (mask & FANOTIFY_MOUNT_EVENTS) + return -EINVAL; + if (mark_type == FAN_MARK_MNTNS) + return -EINVAL; + } + /* * An unprivileged user is not allowed to setup mount nor filesystem * marks. This also includes setting up such marks by a group that @@ -1888,7 +1944,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, * point. */ fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); - if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) && + if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_MOUNT_EVENTS|FANOTIFY_EVENT_FLAGS) && (!fid_mode || mark_type == FAN_MARK_MOUNT)) return -EINVAL; @@ -1938,17 +1994,21 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, } /* inode held in place by reference to path; group by fget on fd */ - if (mark_type == FAN_MARK_INODE) { + if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) { inode = path.dentry->d_inode; obj = inode; - } else { - mnt = path.mnt; - if (mark_type == FAN_MARK_MOUNT) - obj = mnt; - else - obj = mnt->mnt_sb; + } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + obj = path.mnt; + } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) { + obj = path.mnt->mnt_sb; + } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) { + obj = mnt_ns_from_dentry(path.dentry); } + ret = -EINVAL; + if (!obj) + goto path_put_and_out; + /* * If some other task has this inode open for write we should not add * an ignore mask, unless that ignore mask is supposed to survive @@ -1956,10 +2016,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, */ if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) && !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) { - ret = mnt ? -EINVAL : -EISDIR; + ret = !inode ? -EINVAL : -EISDIR; /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ if (ignore == FAN_MARK_IGNORE && - (mnt || S_ISDIR(inode->i_mode))) + (!inode || S_ISDIR(inode->i_mode))) goto path_put_and_out; ret = 0; @@ -1968,7 +2028,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, } /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ - if (mnt || !S_ISDIR(inode->i_mode)) { + if (!inode || !S_ISDIR(inode->i_mode)) { mask &= ~FAN_EVENT_ON_CHILD; umask = FAN_EVENT_ON_CHILD; /* @@ -2042,7 +2102,7 @@ static int __init fanotify_user_setup(void) FANOTIFY_DEFAULT_MAX_USER_MARKS); BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 13); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11); fanotify_mark_cache = KMEM_CACHE(fanotify_mark, @@ -2055,6 +2115,7 @@ static int __init fanotify_user_setup(void) fanotify_perm_event_cachep = KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); } + fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC); fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS; init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] = diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index e933f9c65d90..1161eabf11ee 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -121,6 +121,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n", sb->s_dev, mflags, mark->mask, mark->ignore_mask); + } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_MNTNS) { + struct mnt_namespace *mnt_ns = fsnotify_conn_mntns(mark->connector); + + seq_printf(m, "fanotify mnt_ns:%u mflags:%x mask:%x ignored_mask:%x\n", + mnt_ns->ns.inum, mflags, mark->mask, mark->ignore_mask); } } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index fae1b6d397ea..e2b4f17a48bb 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -28,6 +28,11 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) fsnotify_clear_marks_by_mount(mnt); } +void __fsnotify_mntns_delete(struct mnt_namespace *mntns) +{ + fsnotify_clear_marks_by_mntns(mntns); +} + /** * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. * @sb: superblock being unmounted. @@ -420,7 +425,7 @@ static int send_to_group(__u32 mask, const void *data, int data_type, file_name, cookie, iter_info); } -static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp) +static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector *const *connp) { struct fsnotify_mark_connector *conn; struct hlist_node *node = NULL; @@ -538,14 +543,15 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, { const struct path *path = fsnotify_data_path(data, data_type); struct super_block *sb = fsnotify_data_sb(data, data_type); - struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); + const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type); + struct fsnotify_sb_info *sbinfo = sb ? fsnotify_sb_info(sb) : NULL; struct fsnotify_iter_info iter_info = {}; struct mount *mnt = NULL; struct inode *inode2 = NULL; struct dentry *moved; int inode2_type; int ret = 0; - __u32 test_mask, marks_mask; + __u32 test_mask, marks_mask = 0; if (path) mnt = real_mount(path->mnt); @@ -578,17 +584,20 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, if ((!sbinfo || !sbinfo->sb_marks) && (!mnt || !mnt->mnt_fsnotify_marks) && (!inode || !inode->i_fsnotify_marks) && - (!inode2 || !inode2->i_fsnotify_marks)) + (!inode2 || !inode2->i_fsnotify_marks) && + (!mnt_data || !mnt_data->ns->n_fsnotify_marks)) return 0; - marks_mask = READ_ONCE(sb->s_fsnotify_mask); + if (sb) + marks_mask |= READ_ONCE(sb->s_fsnotify_mask); if (mnt) marks_mask |= READ_ONCE(mnt->mnt_fsnotify_mask); if (inode) marks_mask |= READ_ONCE(inode->i_fsnotify_mask); if (inode2) marks_mask |= READ_ONCE(inode2->i_fsnotify_mask); - + if (mnt_data) + marks_mask |= READ_ONCE(mnt_data->ns->n_fsnotify_mask); /* * If this is a modify event we may need to clear some ignore masks. @@ -618,6 +627,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, iter_info.marks[inode2_type] = fsnotify_first_mark(&inode2->i_fsnotify_marks); } + if (mnt_data) { + iter_info.marks[FSNOTIFY_ITER_TYPE_MNTNS] = + fsnotify_first_mark(&mnt_data->ns->n_fsnotify_marks); + } /* * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark @@ -708,11 +721,31 @@ void file_set_fsnotify_mode_from_watchers(struct file *file) } #endif +void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt) +{ + struct fsnotify_mnt data = { + .ns = ns, + .mnt_id = real_mount(mnt)->mnt_id_unique, + }; + + if (WARN_ON_ONCE(!ns)) + return; + + /* + * This is an optimization as well as making sure fsnotify_init() has + * been called. + */ + if (!ns->n_fsnotify_marks) + return; + + fsnotify(mask, &data, FSNOTIFY_EVENT_MNT, NULL, NULL, NULL, 0); +} + static __init int fsnotify_init(void) { int ret; - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 24); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 663759ed6fbc..5950c7a67f41 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -33,6 +33,12 @@ static inline struct super_block *fsnotify_conn_sb( return conn->obj; } +static inline struct mnt_namespace *fsnotify_conn_mntns( + struct fsnotify_mark_connector *conn) +{ + return conn->obj; +} + static inline struct super_block *fsnotify_object_sb(void *obj, enum fsnotify_obj_type obj_type) { @@ -89,6 +95,11 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb) fsnotify_destroy_marks(fsnotify_sb_marks(sb)); } +static inline void fsnotify_clear_marks_by_mntns(struct mnt_namespace *mntns) +{ + fsnotify_destroy_marks(&mntns->n_fsnotify_marks); +} + /* * update the dentry->d_flags of all of inode's children to indicate if inode cares * about events that happen to its children. diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 4981439e6209..798340db69d7 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -107,6 +107,8 @@ static fsnotify_connp_t *fsnotify_object_connp(void *obj, return &real_mount(obj)->mnt_fsnotify_marks; case FSNOTIFY_OBJ_TYPE_SB: return fsnotify_sb_marks(obj); + case FSNOTIFY_OBJ_TYPE_MNTNS: + return &((struct mnt_namespace *)obj)->n_fsnotify_marks; default: return NULL; } @@ -120,6 +122,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn) return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask; else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) return &fsnotify_conn_sb(conn)->s_fsnotify_mask; + else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) + return &fsnotify_conn_mntns(conn)->n_fsnotify_mask; return NULL; } @@ -346,12 +350,15 @@ static void *fsnotify_detach_connector_from_object( fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; + } else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) { + fsnotify_conn_mntns(conn)->n_fsnotify_mask = 0; } rcu_assign_pointer(*connp, NULL); conn->obj = NULL; conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; - fsnotify_update_sb_watchers(sb, conn); + if (sb) + fsnotify_update_sb_watchers(sb, conn); return inode; } @@ -724,7 +731,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj, * Attach the sb info before attaching a connector to any object on sb. * The sb info will remain attached as long as sb lives. */ - if (!fsnotify_sb_info(sb)) { + if (sb && !fsnotify_sb_info(sb)) { err = fsnotify_attach_info_to_sb(sb); if (err) return err; @@ -770,7 +777,8 @@ restart: /* mark should be the last entry. last is the current last entry */ hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); added: - fsnotify_update_sb_watchers(sb, conn); + if (sb) + fsnotify_update_sb_watchers(sb, conn); /* * Since connector is attached to object using cmpxchg() we are * guaranteed that connector initialization is fully visible by anyone diff --git a/fs/pnode.c b/fs/pnode.c index ef048f008bdd..82d809c785ec 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -549,8 +549,10 @@ static void restore_mounts(struct list_head *to_restore) mp = parent->mnt_mp; parent = parent->mnt_parent; } - if (parent != mnt->mnt_parent) + if (parent != mnt->mnt_parent) { mnt_change_mountpoint(parent, mp, mnt); + mnt_notify_add(mnt); + } } } |
