aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/autofs/autofs_i.h2
-rw-r--r--fs/fsopen.c2
-rw-r--r--fs/internal.h1
-rw-r--r--fs/mnt_idmapping.c51
-rw-r--r--fs/mount.h26
-rw-r--r--fs/namespace.c485
-rw-r--r--fs/notify/fanotify/fanotify.c38
-rw-r--r--fs/notify/fanotify/fanotify.h18
-rw-r--r--fs/notify/fanotify/fanotify_user.c89
-rw-r--r--fs/notify/fdinfo.c5
-rw-r--r--fs/notify/fsnotify.c47
-rw-r--r--fs/notify/fsnotify.h11
-rw-r--r--fs/notify/mark.c14
-rw-r--r--fs/pnode.c4
14 files changed, 642 insertions, 151 deletions
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 77c7991d89aa..23cea74f9933 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -218,6 +218,8 @@ void autofs_clean_ino(struct autofs_info *);
static inline int autofs_check_pipe(struct file *pipe)
{
+ if (pipe->f_mode & FMODE_PATH)
+ return -EINVAL;
if (!(pipe->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
if (!S_ISFIFO(file_inode(pipe)->i_mode))
diff --git a/fs/fsopen.c b/fs/fsopen.c
index 094a7f510edf..1aaf4cb2afb2 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -453,7 +453,7 @@ SYSCALL_DEFINE5(fsconfig,
case FSCONFIG_SET_FD:
param.type = fs_value_is_file;
ret = -EBADF;
- param.file = fget(aux);
+ param.file = fget_raw(aux);
if (!param.file)
goto out_key;
param.dirfd = aux;
diff --git a/fs/internal.h b/fs/internal.h
index 82127c69e641..4c19d15ef08a 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -338,3 +338,4 @@ static inline bool path_mounted(const struct path *path)
}
void file_f_owner_release(struct file *file);
bool file_seek_cur_needs_f_lock(struct file *file);
+int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map);
diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
index 7b1df8cc2821..a37991fdb194 100644
--- a/fs/mnt_idmapping.c
+++ b/fs/mnt_idmapping.c
@@ -6,6 +6,7 @@
#include <linux/mnt_idmapping.h>
#include <linux/slab.h>
#include <linux/user_namespace.h>
+#include <linux/seq_file.h>
#include "internal.h"
@@ -334,3 +335,53 @@ void mnt_idmap_put(struct mnt_idmap *idmap)
free_mnt_idmap(idmap);
}
EXPORT_SYMBOL_GPL(mnt_idmap_put);
+
+int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map)
+{
+ struct uid_gid_map *map, *map_up;
+ u32 idx, nr_mappings;
+
+ if (!is_valid_mnt_idmap(idmap))
+ return 0;
+
+ /*
+ * Idmappings are shown relative to the caller's idmapping.
+ * This is both the most intuitive and most useful solution.
+ */
+ if (uid_map) {
+ map = &idmap->uid_map;
+ map_up = &current_user_ns()->uid_map;
+ } else {
+ map = &idmap->gid_map;
+ map_up = &current_user_ns()->gid_map;
+ }
+
+ for (idx = 0, nr_mappings = 0; idx < map->nr_extents; idx++) {
+ uid_t lower;
+ struct uid_gid_extent *extent;
+
+ if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
+ extent = &map->extent[idx];
+ else
+ extent = &map->forward[idx];
+
+ /*
+ * Verify that the whole range of the mapping can be
+ * resolved in the caller's idmapping. If it cannot be
+ * resolved skip the mapping.
+ */
+ lower = map_id_range_up(map_up, extent->lower_first, extent->count);
+ if (lower == (uid_t) -1)
+ continue;
+
+ seq_printf(seq, "%u %u %u", extent->first, lower, extent->count);
+
+ seq->count++; /* mappings are separated by \0 */
+ if (seq_has_overflowed(seq))
+ return -EAGAIN;
+
+ nr_mappings++;
+ }
+
+ return nr_mappings;
+}
diff --git a/fs/mount.h b/fs/mount.h
index ffb613cdfeee..946dc8b792d7 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -5,6 +5,8 @@
#include <linux/ns_common.h>
#include <linux/fs_pin.h>
+extern struct list_head notify_list;
+
struct mnt_namespace {
struct ns_common ns;
struct mount * root;
@@ -21,6 +23,10 @@ struct mnt_namespace {
struct rcu_head mnt_ns_rcu;
};
u64 event;
+#ifdef CONFIG_FSNOTIFY
+ __u32 n_fsnotify_mask;
+ struct fsnotify_mark_connector __rcu *n_fsnotify_marks;
+#endif
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
@@ -76,6 +82,8 @@ struct mount {
#ifdef CONFIG_FSNOTIFY
struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
__u32 mnt_fsnotify_mask;
+ struct list_head to_notify; /* need to queue notification */
+ struct mnt_namespace *prev_ns; /* previous namespace (NULL if none) */
#endif
int mnt_id; /* mount identifier, reused */
u64 mnt_id_unique; /* mount ID unique until reboot */
@@ -177,3 +185,21 @@ static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
{
return container_of(ns, struct mnt_namespace, ns);
}
+
+#ifdef CONFIG_FSNOTIFY
+static inline void mnt_notify_add(struct mount *m)
+{
+ /* Optimize the case where there are no watches */
+ if ((m->mnt_ns && m->mnt_ns->n_fsnotify_marks) ||
+ (m->prev_ns && m->prev_ns->n_fsnotify_marks))
+ list_add_tail(&m->to_notify, &notify_list);
+ else
+ m->prev_ns = m->mnt_ns;
+}
+#else
+static inline void mnt_notify_add(struct mount *m)
+{
+}
+#endif
+
+struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);
diff --git a/fs/namespace.c b/fs/namespace.c
index 8f1000f9f3df..4c8bd48c8a62 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -81,15 +81,23 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
static DEFINE_SEQLOCK(mnt_ns_tree_lock);
+#ifdef CONFIG_FSNOTIFY
+LIST_HEAD(notify_list); /* protected by namespace_sem */
+#endif
static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */
+enum mount_kattr_flags_t {
+ MOUNT_KATTR_RECURSE = (1 << 0),
+ MOUNT_KATTR_IDMAP_REPLACE = (1 << 1),
+};
+
struct mount_kattr {
unsigned int attr_set;
unsigned int attr_clr;
unsigned int propagation;
unsigned int lookup_flags;
- bool recurse;
+ enum mount_kattr_flags_t kflags;
struct user_namespace *mnt_userns;
struct mnt_idmap *mnt_idmap;
};
@@ -163,6 +171,7 @@ static void mnt_ns_release(struct mnt_namespace *ns)
{
/* keep alive for {list,stat}mount() */
if (refcount_dec_and_test(&ns->passive)) {
+ fsnotify_mntns_delete(ns);
put_user_ns(ns->user_ns);
kfree(ns);
}
@@ -1176,6 +1185,8 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt)
ns->mnt_first_node = &mnt->mnt_node;
rb_link_node(&mnt->mnt_node, parent, link);
rb_insert_color(&mnt->mnt_node, &ns->mounts);
+
+ mnt_notify_add(mnt);
}
/*
@@ -1723,6 +1734,50 @@ int may_umount(struct vfsmount *mnt)
EXPORT_SYMBOL(may_umount);
+#ifdef CONFIG_FSNOTIFY
+static void mnt_notify(struct mount *p)
+{
+ if (!p->prev_ns && p->mnt_ns) {
+ fsnotify_mnt_attach(p->mnt_ns, &p->mnt);
+ } else if (p->prev_ns && !p->mnt_ns) {
+ fsnotify_mnt_detach(p->prev_ns, &p->mnt);
+ } else if (p->prev_ns == p->mnt_ns) {
+ fsnotify_mnt_move(p->mnt_ns, &p->mnt);
+ } else {
+ fsnotify_mnt_detach(p->prev_ns, &p->mnt);
+ fsnotify_mnt_attach(p->mnt_ns, &p->mnt);
+ }
+ p->prev_ns = p->mnt_ns;
+}
+
+static void notify_mnt_list(void)
+{
+ struct mount *m, *tmp;
+ /*
+ * Notify about mounts that were added/reparented/detached/remain
+ * connected after unmount.
+ */
+ list_for_each_entry_safe(m, tmp, &notify_list, to_notify) {
+ mnt_notify(m);
+ list_del_init(&m->to_notify);
+ }
+}
+
+static bool need_notify_mnt_list(void)
+{
+ return !list_empty(&notify_list);
+}
+#else
+static void notify_mnt_list(void)
+{
+}
+
+static bool need_notify_mnt_list(void)
+{
+ return false;
+}
+#endif
+
static void namespace_unlock(void)
{
struct hlist_head head;
@@ -1733,7 +1788,18 @@ static void namespace_unlock(void)
hlist_move_list(&unmounted, &head);
list_splice_init(&ex_mountpoints, &list);
- up_write(&namespace_sem);
+ if (need_notify_mnt_list()) {
+ /*
+ * No point blocking out concurrent readers while notifications
+ * are sent. This will also allow statmount()/listmount() to run
+ * concurrently.
+ */
+ downgrade_write(&namespace_sem);
+ notify_mnt_list();
+ up_read(&namespace_sem);
+ } else {
+ up_write(&namespace_sem);
+ }
shrink_dentry_list(&list);
@@ -1846,6 +1912,19 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
change_mnt_propagation(p, MS_PRIVATE);
if (disconnect)
hlist_add_head(&p->mnt_umount, &unmounted);
+
+ /*
+ * At this point p->mnt_ns is NULL, notification will be queued
+ * only if
+ *
+ * - p->prev_ns is non-NULL *and*
+ * - p->prev_ns->n_fsnotify_marks is non-NULL
+ *
+ * This will preclude queuing the mount if this is a cleanup
+ * after a failed copy_tree() or destruction of an anonymous
+ * namespace, etc.
+ */
+ mnt_notify_add(p);
}
}
@@ -2026,6 +2105,7 @@ static void warn_mandlock(void)
static int can_umount(const struct path *path, int flags)
{
struct mount *mnt = real_mount(path->mnt);
+ struct super_block *sb = path->dentry->d_sb;
if (!may_mount())
return -EPERM;
@@ -2035,7 +2115,7 @@ static int can_umount(const struct path *path, int flags)
return -EINVAL;
if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
return -EINVAL;
- if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
+ if (flags & MNT_FORCE && !ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
return -EPERM;
return 0;
}
@@ -2145,16 +2225,24 @@ struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool pr
}
}
+struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry)
+{
+ if (!is_mnt_ns_file(dentry))
+ return NULL;
+
+ return to_mnt_ns(get_proc_ns(dentry->d_inode));
+}
+
static bool mnt_ns_loop(struct dentry *dentry)
{
/* Could bind mounting the mount namespace inode cause a
* mount namespace loop?
*/
- struct mnt_namespace *mnt_ns;
- if (!is_mnt_ns_file(dentry))
+ struct mnt_namespace *mnt_ns = mnt_ns_from_dentry(dentry);
+
+ if (!mnt_ns)
return false;
- mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
}
@@ -2287,6 +2375,28 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry)
return false;
}
+/*
+ * Check that there aren't references to earlier/same mount namespaces in the
+ * specified subtree. Such references can act as pins for mount namespaces
+ * that aren't checked by the mount-cycle checking code, thereby allowing
+ * cycles to be made.
+ */
+static bool check_for_nsfs_mounts(struct mount *subtree)
+{
+ struct mount *p;
+ bool ret = false;
+
+ lock_mount_hash();
+ for (p = subtree; p; p = next_mnt(p, subtree))
+ if (mnt_ns_loop(p->mnt.mnt_root))
+ goto out;
+
+ ret = true;
+out:
+ unlock_mount_hash();
+ return ret;
+}
+
/**
* clone_private_mount - create a private clone of a path
* @path: path to clone
@@ -2295,6 +2405,8 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry)
* will not be attached anywhere in the namespace and will be private (i.e.
* changes to the originating mount won't be propagated into this).
*
+ * This assumes caller has called or done the equivalent of may_mount().
+ *
* Release with mntput().
*/
struct vfsmount *clone_private_mount(const struct path *path)
@@ -2302,30 +2414,36 @@ struct vfsmount *clone_private_mount(const struct path *path)
struct mount *old_mnt = real_mount(path->mnt);
struct mount *new_mnt;
- down_read(&namespace_sem);
+ scoped_guard(rwsem_read, &namespace_sem)
if (IS_MNT_UNBINDABLE(old_mnt))
- goto invalid;
+ return ERR_PTR(-EINVAL);
+
+ if (mnt_has_parent(old_mnt)) {
+ if (!check_mnt(old_mnt))
+ return ERR_PTR(-EINVAL);
+ } else {
+ if (!is_mounted(&old_mnt->mnt))
+ return ERR_PTR(-EINVAL);
- if (!check_mnt(old_mnt))
- goto invalid;
+ /* Make sure this isn't something purely kernel internal. */
+ if (!is_anon_ns(old_mnt->mnt_ns))
+ return ERR_PTR(-EINVAL);
+
+ /* Make sure we don't create mount namespace loops. */
+ if (!check_for_nsfs_mounts(old_mnt))
+ return ERR_PTR(-EINVAL);
+ }
if (has_locked_children(old_mnt, path->dentry))
- goto invalid;
+ return ERR_PTR(-EINVAL);
new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
- up_read(&namespace_sem);
-
if (IS_ERR(new_mnt))
- return ERR_CAST(new_mnt);
+ return ERR_PTR(-EINVAL);
/* Longterm mount to be removed by kern_unmount*() */
new_mnt->mnt_ns = MNT_NS_INTERNAL;
-
return &new_mnt->mnt;
-
-invalid:
- up_read(&namespace_sem);
- return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL_GPL(clone_private_mount);
@@ -2547,6 +2665,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
dest_mp = smp;
unhash_mnt(source_mnt);
attach_mnt(source_mnt, top_mnt, dest_mp, beneath);
+ mnt_notify_add(source_mnt);
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
if (source_mnt->mnt_ns) {
@@ -2889,24 +3008,22 @@ static struct file *open_detached_copy(struct path *path, bool recursive)
return file;
}
-SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
+static struct file *vfs_open_tree(int dfd, const char __user *filename, unsigned int flags)
{
- struct file *file;
- struct path path;
+ int ret;
+ struct path path __free(path_put) = {};
int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
bool detached = flags & OPEN_TREE_CLONE;
- int error;
- int fd;
BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
OPEN_TREE_CLOEXEC))
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
if (flags & AT_NO_AUTOMOUNT)
lookup_flags &= ~LOOKUP_AUTOMOUNT;
@@ -2916,27 +3033,32 @@ SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, fl
lookup_flags |= LOOKUP_EMPTY;
if (detached && !may_mount())
- return -EPERM;
+ return ERR_PTR(-EPERM);
+
+ ret = user_path_at(dfd, filename, lookup_flags, &path);
+ if (unlikely(ret))
+ return ERR_PTR(ret);
+
+ if (detached)
+ return open_detached_copy(&path, flags & AT_RECURSIVE);
+
+ return dentry_open(&path, O_PATH, current_cred());
+}
+
+SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
+{
+ int fd;
+ struct file *file __free(fput) = NULL;
+
+ file = vfs_open_tree(dfd, filename, flags);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
fd = get_unused_fd_flags(flags & O_CLOEXEC);
if (fd < 0)
return fd;
- error = user_path_at(dfd, filename, lookup_flags, &path);
- if (unlikely(error)) {
- file = ERR_PTR(error);
- } else {
- if (detached)
- file = open_detached_copy(&path, flags & AT_RECURSIVE);
- else
- file = dentry_open(&path, O_PATH, current_cred());
- path_put(&path);
- }
- if (IS_ERR(file)) {
- put_unused_fd(fd);
- return PTR_ERR(file);
- }
- fd_install(fd, file);
+ fd_install(fd, no_free_ptr(file));
return fd;
}
@@ -3123,28 +3245,6 @@ static inline int tree_contains_unbindable(struct mount *mnt)
return 0;
}
-/*
- * Check that there aren't references to earlier/same mount namespaces in the
- * specified subtree. Such references can act as pins for mount namespaces
- * that aren't checked by the mount-cycle checking code, thereby allowing
- * cycles to be made.
- */
-static bool check_for_nsfs_mounts(struct mount *subtree)
-{
- struct mount *p;
- bool ret = false;
-
- lock_mount_hash();
- for (p = subtree; p; p = next_mnt(p, subtree))
- if (mnt_ns_loop(p->mnt.mnt_root))
- goto out;
-
- ret = true;
-out:
- unlock_mount_hash();
- return ret;
-}
-
static int do_set_group(struct path *from_path, struct path *to_path)
{
struct mount *from, *to;
@@ -4468,6 +4568,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
list_del_init(&new_mnt->mnt_expire);
put_mountpoint(root_mp);
unlock_mount_hash();
+ mnt_notify_add(root_mnt);
+ mnt_notify_add(new_mnt);
chroot_fs_refs(&root, &new);
error = 0;
out4:
@@ -4512,11 +4614,10 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
return -EINVAL;
/*
- * Once a mount has been idmapped we don't allow it to change its
- * mapping. It makes things simpler and callers can just create
- * another bind-mount they can idmap if they want to.
+ * We only allow an mount to change it's idmapping if it has
+ * never been accessible to userspace.
*/
- if (is_idmapped_mnt(m))
+ if (!(kattr->kflags & MOUNT_KATTR_IDMAP_REPLACE) && is_idmapped_mnt(m))
return -EPERM;
/* The underlying filesystem doesn't support idmapped mounts yet. */
@@ -4576,7 +4677,7 @@ static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt)
break;
}
- if (!kattr->recurse)
+ if (!(kattr->kflags & MOUNT_KATTR_RECURSE))
return 0;
}
@@ -4606,18 +4707,16 @@ static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt)
static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
{
+ struct mnt_idmap *old_idmap;
+
if (!kattr->mnt_idmap)
return;
- /*
- * Pairs with smp_load_acquire() in mnt_idmap().
- *
- * Since we only allow a mount to change the idmapping once and
- * verified this in can_idmap_mount() we know that the mount has
- * @nop_mnt_idmap attached to it. So there's no need to drop any
- * references.
- */
+ old_idmap = mnt_idmap(&mnt->mnt);
+
+ /* Pairs with smp_load_acquire() in mnt_idmap(). */
smp_store_release(&mnt->mnt.mnt_idmap, mnt_idmap_get(kattr->mnt_idmap));
+ mnt_idmap_put(old_idmap);
}
static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt)
@@ -4637,7 +4736,7 @@ static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt)
if (kattr->propagation)
change_mnt_propagation(m, kattr->propagation);
- if (!kattr->recurse)
+ if (!(kattr->kflags & MOUNT_KATTR_RECURSE))
break;
}
touch_mnt_namespace(mnt->mnt_ns);
@@ -4667,7 +4766,7 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
*/
namespace_lock();
if (kattr->propagation == MS_SHARED) {
- err = invent_group_ids(mnt, kattr->recurse);
+ err = invent_group_ids(mnt, kattr->kflags & MOUNT_KATTR_RECURSE);
if (err) {
namespace_unlock();
return err;
@@ -4718,7 +4817,7 @@ out:
}
static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
- struct mount_kattr *kattr, unsigned int flags)
+ struct mount_kattr *kattr)
{
struct ns_common *ns;
struct user_namespace *mnt_userns;
@@ -4726,13 +4825,23 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP))
return 0;
- /*
- * We currently do not support clearing an idmapped mount. If this ever
- * is a use-case we can revisit this but for now let's keep it simple
- * and not allow it.
- */
- if (attr->attr_clr & MOUNT_ATTR_IDMAP)
- return -EINVAL;
+ if (attr->attr_clr & MOUNT_ATTR_IDMAP) {
+ /*
+ * We can only remove an idmapping if it's never been
+ * exposed to userspace.
+ */
+ if (!(kattr->kflags & MOUNT_KATTR_IDMAP_REPLACE))
+ return -EINVAL;
+
+ /*
+ * Removal of idmappings is equivalent to setting
+ * nop_mnt_idmap.
+ */
+ if (!(attr->attr_set & MOUNT_ATTR_IDMAP)) {
+ kattr->mnt_idmap = &nop_mnt_idmap;
+ return 0;
+ }
+ }
if (attr->userns_fd > INT_MAX)
return -EINVAL;
@@ -4769,22 +4878,8 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
}
static int build_mount_kattr(const struct mount_attr *attr, size_t usize,
- struct mount_kattr *kattr, unsigned int flags)
+ struct mount_kattr *kattr)
{
- unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
-
- if (flags & AT_NO_AUTOMOUNT)
- lookup_flags &= ~LOOKUP_AUTOMOUNT;
- if (flags & AT_SYMLINK_NOFOLLOW)
- lookup_flags &= ~LOOKUP_FOLLOW;
- if (flags & AT_EMPTY_PATH)
- lookup_flags |= LOOKUP_EMPTY;
-
- *kattr = (struct mount_kattr) {
- .lookup_flags = lookup_flags,
- .recurse = !!(flags & AT_RECURSIVE),
- };
-
if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS)
return -EINVAL;
if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1)
@@ -4832,35 +4927,28 @@ static int build_mount_kattr(const struct mount_attr *attr, size_t usize,
return -EINVAL;
}
- return build_mount_idmapped(attr, usize, kattr, flags);
+ return build_mount_idmapped(attr, usize, kattr);
}
static void finish_mount_kattr(struct mount_kattr *kattr)
{
- put_user_ns(kattr->mnt_userns);
- kattr->mnt_userns = NULL;
+ if (kattr->mnt_userns) {
+ put_user_ns(kattr->mnt_userns);
+ kattr->mnt_userns = NULL;
+ }
if (kattr->mnt_idmap)
mnt_idmap_put(kattr->mnt_idmap);
}
-SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
- unsigned int, flags, struct mount_attr __user *, uattr,
- size_t, usize)
+static int copy_mount_setattr(struct mount_attr __user *uattr, size_t usize,
+ struct mount_kattr *kattr)
{
- int err;
- struct path target;
+ int ret;
struct mount_attr attr;
- struct mount_kattr kattr;
BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0);
- if (flags & ~(AT_EMPTY_PATH |
- AT_RECURSIVE |
- AT_SYMLINK_NOFOLLOW |
- AT_NO_AUTOMOUNT))
- return -EINVAL;
-
if (unlikely(usize > PAGE_SIZE))
return -E2BIG;
if (unlikely(usize < MOUNT_ATTR_SIZE_VER0))
@@ -4869,9 +4957,9 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
if (!may_mount())
return -EPERM;
- err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize);
- if (err)
- return err;
+ ret = copy_struct_from_user(&attr, sizeof(attr), uattr, usize);
+ if (ret)
+ return ret;
/* Don't bother walking through the mounts if this is a nop. */
if (attr.attr_set == 0 &&
@@ -4879,7 +4967,39 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
attr.propagation == 0)
return 0;
- err = build_mount_kattr(&attr, usize, &kattr, flags);
+ return build_mount_kattr(&attr, usize, kattr);
+}
+
+SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
+ unsigned int, flags, struct mount_attr __user *, uattr,
+ size_t, usize)
+{
+ int err;
+ struct path target;
+ struct mount_kattr kattr;
+ unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
+
+ if (flags & ~(AT_EMPTY_PATH |
+ AT_RECURSIVE |
+ AT_SYMLINK_NOFOLLOW |
+ AT_NO_AUTOMOUNT))
+ return -EINVAL;
+
+ if (flags & AT_NO_AUTOMOUNT)
+ lookup_flags &= ~LOOKUP_AUTOMOUNT;
+ if (flags & AT_SYMLINK_NOFOLLOW)
+ lookup_flags &= ~LOOKUP_FOLLOW;
+ if (flags & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+
+ kattr = (struct mount_kattr) {
+ .lookup_flags = lookup_flags,
+ };
+
+ if (flags & AT_RECURSIVE)
+ kattr.kflags |= MOUNT_KATTR_RECURSE;
+
+ err = copy_mount_setattr(uattr, usize, &kattr);
if (err)
return err;
@@ -4892,6 +5012,47 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
return err;
}
+SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename,
+ unsigned, flags, struct mount_attr __user *, uattr,
+ size_t, usize)
+{
+ struct file __free(fput) *file = NULL;
+ int fd;
+
+ if (!uattr && usize)
+ return -EINVAL;
+
+ file = vfs_open_tree(dfd, filename, flags);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ if (uattr) {
+ int ret;
+ struct mount_kattr kattr = {};
+
+ kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE;
+ if (flags & AT_RECURSIVE)
+ kattr.kflags |= MOUNT_KATTR_RECURSE;
+
+ ret = copy_mount_setattr(uattr, usize, &kattr);
+ if (ret)
+ return ret;
+
+ ret = do_mount_setattr(&file->f_path, &kattr);
+ if (ret)
+ return ret;
+
+ finish_mount_kattr(&kattr);
+ }
+
+ fd = get_unused_fd_flags(flags & O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ fd_install(fd, no_free_ptr(file));
+ return fd;
+}
+
int show_path(struct seq_file *m, struct dentry *root)
{
if (root->d_sb->s_op->show_path)
@@ -4915,6 +5076,7 @@ struct kstatmount {
struct statmount __user *buf;
size_t bufsize;
struct vfsmount *mnt;
+ struct mnt_idmap *idmap;
u64 mask;
struct path root;
struct statmount sm;
@@ -5184,6 +5346,46 @@ static int statmount_opt_sec_array(struct kstatmount *s, struct seq_file *seq)
return 0;
}
+static inline int statmount_mnt_uidmap(struct kstatmount *s, struct seq_file *seq)
+{
+ int ret;
+
+ ret = statmount_mnt_idmap(s->idmap, seq, true);
+ if (ret < 0)
+ return ret;
+
+ s->sm.mnt_uidmap_num = ret;
+ /*
+ * Always raise STATMOUNT_MNT_UIDMAP even if there are no valid
+ * mappings. This allows userspace to distinguish between a
+ * non-idmapped mount and an idmapped mount where none of the
+ * individual mappings are valid in the caller's idmapping.
+ */
+ if (is_valid_mnt_idmap(s->idmap))
+ s->sm.mask |= STATMOUNT_MNT_UIDMAP;
+ return 0;
+}
+
+static inline int statmount_mnt_gidmap(struct kstatmount *s, struct seq_file *seq)
+{
+ int ret;
+
+ ret = statmount_mnt_idmap(s->idmap, seq, false);
+ if (ret < 0)
+ return ret;
+
+ s->sm.mnt_gidmap_num = ret;
+ /*
+ * Always raise STATMOUNT_MNT_GIDMAP even if there are no valid
+ * mappings. This allows userspace to distinguish between a
+ * non-idmapped mount and an idmapped mount where none of the
+ * individual mappings are valid in the caller's idmapping.
+ */
+ if (is_valid_mnt_idmap(s->idmap))
+ s->sm.mask |= STATMOUNT_MNT_GIDMAP;
+ return 0;
+}
+
static int statmount_string(struct kstatmount *s, u64 flag)
{
int ret = 0;
@@ -5231,6 +5433,14 @@ static int statmount_string(struct kstatmount *s, u64 flag)
offp = &sm->sb_source;
ret = statmount_sb_source(s, seq);
break;
+ case STATMOUNT_MNT_UIDMAP:
+ sm->mnt_uidmap = start;
+ ret = statmount_mnt_uidmap(s, seq);
+ break;
+ case STATMOUNT_MNT_GIDMAP:
+ sm->mnt_gidmap = start;
+ ret = statmount_mnt_gidmap(s, seq);
+ break;
default:
WARN_ON_ONCE(true);
return -EINVAL;
@@ -5323,6 +5533,21 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
return 0;
}
+/* This must be updated whenever a new flag is added */
+#define STATMOUNT_SUPPORTED (STATMOUNT_SB_BASIC | \
+ STATMOUNT_MNT_BASIC | \
+ STATMOUNT_PROPAGATE_FROM | \
+ STATMOUNT_MNT_ROOT | \
+ STATMOUNT_MNT_POINT | \
+ STATMOUNT_FS_TYPE | \
+ STATMOUNT_MNT_NS_ID | \
+ STATMOUNT_MNT_OPTS | \
+ STATMOUNT_FS_SUBTYPE | \
+ STATMOUNT_SB_SOURCE | \
+ STATMOUNT_OPT_ARRAY | \
+ STATMOUNT_OPT_SEC_ARRAY | \
+ STATMOUNT_SUPPORTED_MASK)
+
static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
struct mnt_namespace *ns)
{
@@ -5356,6 +5581,7 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
return err;
s->root = root;
+ s->idmap = mnt_idmap(s->mnt);
if (s->mask & STATMOUNT_SB_BASIC)
statmount_sb_basic(s);
@@ -5389,12 +5615,26 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
if (!err && s->mask & STATMOUNT_SB_SOURCE)
err = statmount_string(s, STATMOUNT_SB_SOURCE);
+ if (!err && s->mask & STATMOUNT_MNT_UIDMAP)
+ err = statmount_string(s, STATMOUNT_MNT_UIDMAP);
+
+ if (!err && s->mask & STATMOUNT_MNT_GIDMAP)
+ err = statmount_string(s, STATMOUNT_MNT_GIDMAP);
+
if (!err && s->mask & STATMOUNT_MNT_NS_ID)
statmount_mnt_ns_id(s, ns);
+ if (!err && s->mask & STATMOUNT_SUPPORTED_MASK) {
+ s->sm.mask |= STATMOUNT_SUPPORTED_MASK;
+ s->sm.supported_mask = STATMOUNT_SUPPORTED;
+ }
+
if (err)
return err;
+ /* Are there bits in the return mask not present in STATMOUNT_SUPPORTED? */
+ WARN_ON_ONCE(~STATMOUNT_SUPPORTED & s->sm.mask);
+
return 0;
}
@@ -5412,7 +5652,8 @@ static inline bool retry_statmount(const long ret, size_t *seq_size)
#define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \
STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS | \
STATMOUNT_FS_SUBTYPE | STATMOUNT_SB_SOURCE | \
- STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY)
+ STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY | \
+ STATMOUNT_MNT_UIDMAP | STATMOUNT_MNT_GIDMAP)
static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
struct statmount __user *buf, size_t bufsize,
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 95646f7c46ca..6d386080faf2 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -166,6 +166,8 @@ static bool fanotify_should_merge(struct fanotify_event *old,
case FANOTIFY_EVENT_TYPE_FS_ERROR:
return fanotify_error_event_equal(FANOTIFY_EE(old),
FANOTIFY_EE(new));
+ case FANOTIFY_EVENT_TYPE_MNT:
+ return false;
default:
WARN_ON_ONCE(1);
}
@@ -312,7 +314,10 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
__func__, iter_info->report_mask, event_mask, data, data_type);
- if (!fid_mode) {
+ if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
+ if (data_type != FSNOTIFY_EVENT_MNT)
+ return 0;
+ } else if (!fid_mode) {
/* Do we have path to open a file descriptor? */
if (!path)
return 0;
@@ -557,6 +562,20 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
return &pevent->fae;
}
+static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp)
+{
+ struct fanotify_mnt_event *pevent;
+
+ pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp);
+ if (!pevent)
+ return NULL;
+
+ pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT;
+ pevent->mnt_id = mnt_id;
+
+ return &pevent->fae;
+}
+
static struct fanotify_event *fanotify_alloc_perm_event(const void *data,
int data_type,
gfp_t gfp)
@@ -731,6 +750,7 @@ static struct fanotify_event *fanotify_alloc_event(
fid_mode);
struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
const struct path *path = fsnotify_data_path(data, data_type);
+ u64 mnt_id = fsnotify_data_mnt_id(data, data_type);
struct mem_cgroup *old_memcg;
struct dentry *moved = NULL;
struct inode *child = NULL;
@@ -826,8 +846,12 @@ static struct fanotify_event *fanotify_alloc_event(
moved, &hash, gfp);
} else if (fid_mode) {
event = fanotify_alloc_fid_event(id, fsid, &hash, gfp);
- } else {
+ } else if (path) {
event = fanotify_alloc_path_event(path, &hash, gfp);
+ } else if (mnt_id) {
+ event = fanotify_alloc_mnt_event(mnt_id, gfp);
+ } else {
+ WARN_ON_ONCE(1);
}
if (!event)
@@ -927,7 +951,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS);
- BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 24);
mask = fanotify_group_event_mask(group, iter_info, &match_mask,
mask, data, data_type, dir);
@@ -1028,6 +1052,11 @@ static void fanotify_free_error_event(struct fsnotify_group *group,
mempool_free(fee, &group->fanotify_data.error_events_pool);
}
+static void fanotify_free_mnt_event(struct fanotify_event *event)
+{
+ kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event));
+}
+
static void fanotify_free_event(struct fsnotify_group *group,
struct fsnotify_event *fsn_event)
{
@@ -1054,6 +1083,9 @@ static void fanotify_free_event(struct fsnotify_group *group,
case FANOTIFY_EVENT_TYPE_FS_ERROR:
fanotify_free_error_event(group, event);
break;
+ case FANOTIFY_EVENT_TYPE_MNT:
+ fanotify_free_mnt_event(event);
+ break;
default:
WARN_ON_ONCE(1);
}
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index c12cbc270539..b44e70e44be6 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -9,6 +9,7 @@ extern struct kmem_cache *fanotify_mark_cache;
extern struct kmem_cache *fanotify_fid_event_cachep;
extern struct kmem_cache *fanotify_path_event_cachep;
extern struct kmem_cache *fanotify_perm_event_cachep;
+extern struct kmem_cache *fanotify_mnt_event_cachep;
/* Possible states of the permission event */
enum {
@@ -244,6 +245,7 @@ enum fanotify_event_type {
FANOTIFY_EVENT_TYPE_PATH_PERM,
FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
+ FANOTIFY_EVENT_TYPE_MNT,
__FANOTIFY_EVENT_TYPE_NUM
};
@@ -409,12 +411,23 @@ struct fanotify_path_event {
struct path path;
};
+struct fanotify_mnt_event {
+ struct fanotify_event fae;
+ u64 mnt_id;
+};
+
static inline struct fanotify_path_event *
FANOTIFY_PE(struct fanotify_event *event)
{
return container_of(event, struct fanotify_path_event, fae);
}
+static inline struct fanotify_mnt_event *
+FANOTIFY_ME(struct fanotify_event *event)
+{
+ return container_of(event, struct fanotify_mnt_event, fae);
+}
+
/*
* Structure for permission fanotify events. It gets allocated and freed in
* fanotify_handle_event() since we wait there for user response. When the
@@ -466,6 +479,11 @@ static inline bool fanotify_is_error_event(u32 mask)
return mask & FAN_FS_ERROR;
}
+static inline bool fanotify_is_mnt_event(u32 mask)
+{
+ return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH);
+}
+
static inline const struct path *fanotify_event_path(struct fanotify_event *event)
{
if (event->type == FANOTIFY_EVENT_TYPE_PATH)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index ba3e2d09eb44..f2d840ae4ded 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -113,6 +113,7 @@ struct kmem_cache *fanotify_mark_cache __ro_after_init;
struct kmem_cache *fanotify_fid_event_cachep __ro_after_init;
struct kmem_cache *fanotify_path_event_cachep __ro_after_init;
struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
+struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init;
#define FANOTIFY_EVENT_ALIGN 4
#define FANOTIFY_FID_INFO_HDR_LEN \
@@ -123,6 +124,8 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
(sizeof(struct fanotify_event_info_error))
#define FANOTIFY_RANGE_INFO_LEN \
(sizeof(struct fanotify_event_info_range))
+#define FANOTIFY_MNT_INFO_LEN \
+ (sizeof(struct fanotify_event_info_mnt))
static int fanotify_fid_info_len(int fh_len, int name_len)
{
@@ -178,6 +181,8 @@ static size_t fanotify_event_len(unsigned int info_mode,
fh_len = fanotify_event_object_fh_len(event);
event_len += fanotify_fid_info_len(fh_len, dot_len);
}
+ if (fanotify_is_mnt_event(event->mask))
+ event_len += FANOTIFY_MNT_INFO_LEN;
if (info_mode & FAN_REPORT_PIDFD)
event_len += FANOTIFY_PIDFD_INFO_LEN;
@@ -405,6 +410,25 @@ static int process_access_response(struct fsnotify_group *group,
return -ENOENT;
}
+static size_t copy_mnt_info_to_user(struct fanotify_event *event,
+ char __user *buf, int count)
+{
+ struct fanotify_event_info_mnt info = { };
+
+ info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT;
+ info.hdr.len = FANOTIFY_MNT_INFO_LEN;
+
+ if (WARN_ON(count < info.hdr.len))
+ return -EFAULT;
+
+ info.mnt_id = FANOTIFY_ME(event)->mnt_id;
+
+ if (copy_to_user(buf, &info, sizeof(info)))
+ return -EFAULT;
+
+ return info.hdr.len;
+}
+
static size_t copy_error_info_to_user(struct fanotify_event *event,
char __user *buf, int count)
{
@@ -700,6 +724,15 @@ static int copy_info_records_to_user(struct fanotify_event *event,
total_bytes += ret;
}
+ if (fanotify_is_mnt_event(event->mask)) {
+ ret = copy_mnt_info_to_user(event, buf, count);
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ count -= ret;
+ total_bytes += ret;
+ }
+
return total_bytes;
}
@@ -1508,6 +1541,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID))
return -EINVAL;
+ /* Don't allow mixing mnt events with inode events for now */
+ if (flags & FAN_REPORT_MNT) {
+ if (class != FAN_CLASS_NOTIF)
+ return -EINVAL;
+ if (flags & (FANOTIFY_FID_BITS | FAN_REPORT_FD_ERROR))
+ return -EINVAL;
+ }
+
if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS)
return -EINVAL;
@@ -1767,7 +1808,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
int dfd, const char __user *pathname)
{
struct inode *inode = NULL;
- struct vfsmount *mnt = NULL;
struct fsnotify_group *group;
struct path path;
struct fan_fsid __fsid, *fsid = NULL;
@@ -1776,7 +1816,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS;
unsigned int obj_type, fid_mode;
- void *obj;
+ void *obj = NULL;
u32 umask = 0;
int ret;
@@ -1800,6 +1840,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
case FAN_MARK_FILESYSTEM:
obj_type = FSNOTIFY_OBJ_TYPE_SB;
break;
+ case FAN_MARK_MNTNS:
+ obj_type = FSNOTIFY_OBJ_TYPE_MNTNS;
+ break;
default:
return -EINVAL;
}
@@ -1847,6 +1890,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
return -EINVAL;
group = fd_file(f)->private_data;
+ /* Only report mount events on mnt namespace */
+ if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
+ if (mask & ~FANOTIFY_MOUNT_EVENTS)
+ return -EINVAL;
+ if (mark_type != FAN_MARK_MNTNS)
+ return -EINVAL;
+ } else {
+ if (mask & FANOTIFY_MOUNT_EVENTS)
+ return -EINVAL;
+ if (mark_type == FAN_MARK_MNTNS)
+ return -EINVAL;
+ }
+
/*
* An unprivileged user is not allowed to setup mount nor filesystem
* marks. This also includes setting up such marks by a group that
@@ -1888,7 +1944,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
* point.
*/
fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
- if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) &&
+ if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_MOUNT_EVENTS|FANOTIFY_EVENT_FLAGS) &&
(!fid_mode || mark_type == FAN_MARK_MOUNT))
return -EINVAL;
@@ -1938,17 +1994,21 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
/* inode held in place by reference to path; group by fget on fd */
- if (mark_type == FAN_MARK_INODE) {
+ if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = path.dentry->d_inode;
obj = inode;
- } else {
- mnt = path.mnt;
- if (mark_type == FAN_MARK_MOUNT)
- obj = mnt;
- else
- obj = mnt->mnt_sb;
+ } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
+ obj = path.mnt;
+ } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) {
+ obj = path.mnt->mnt_sb;
+ } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) {
+ obj = mnt_ns_from_dentry(path.dentry);
}
+ ret = -EINVAL;
+ if (!obj)
+ goto path_put_and_out;
+
/*
* If some other task has this inode open for write we should not add
* an ignore mask, unless that ignore mask is supposed to survive
@@ -1956,10 +2016,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
*/
if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) &&
!(flags & FAN_MARK_IGNORED_SURV_MODIFY)) {
- ret = mnt ? -EINVAL : -EISDIR;
+ ret = !inode ? -EINVAL : -EISDIR;
/* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
if (ignore == FAN_MARK_IGNORE &&
- (mnt || S_ISDIR(inode->i_mode)))
+ (!inode || S_ISDIR(inode->i_mode)))
goto path_put_and_out;
ret = 0;
@@ -1968,7 +2028,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
- if (mnt || !S_ISDIR(inode->i_mode)) {
+ if (!inode || !S_ISDIR(inode->i_mode)) {
mask &= ~FAN_EVENT_ON_CHILD;
umask = FAN_EVENT_ON_CHILD;
/*
@@ -2042,7 +2102,7 @@ static int __init fanotify_user_setup(void)
FANOTIFY_DEFAULT_MAX_USER_MARKS);
BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
- BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 13);
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);
fanotify_mark_cache = KMEM_CACHE(fanotify_mark,
@@ -2055,6 +2115,7 @@ static int __init fanotify_user_setup(void)
fanotify_perm_event_cachep =
KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
}
+ fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC);
fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS;
init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] =
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index e933f9c65d90..1161eabf11ee 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -121,6 +121,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
sb->s_dev, mflags, mark->mask, mark->ignore_mask);
+ } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_MNTNS) {
+ struct mnt_namespace *mnt_ns = fsnotify_conn_mntns(mark->connector);
+
+ seq_printf(m, "fanotify mnt_ns:%u mflags:%x mask:%x ignored_mask:%x\n",
+ mnt_ns->ns.inum, mflags, mark->mask, mark->ignore_mask);
}
}
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index fae1b6d397ea..e2b4f17a48bb 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -28,6 +28,11 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
fsnotify_clear_marks_by_mount(mnt);
}
+void __fsnotify_mntns_delete(struct mnt_namespace *mntns)
+{
+ fsnotify_clear_marks_by_mntns(mntns);
+}
+
/**
* fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
* @sb: superblock being unmounted.
@@ -420,7 +425,7 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
file_name, cookie, iter_info);
}
-static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp)
+static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector *const *connp)
{
struct fsnotify_mark_connector *conn;
struct hlist_node *node = NULL;
@@ -538,14 +543,15 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
{
const struct path *path = fsnotify_data_path(data, data_type);
struct super_block *sb = fsnotify_data_sb(data, data_type);
- struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
+ const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type);
+ struct fsnotify_sb_info *sbinfo = sb ? fsnotify_sb_info(sb) : NULL;
struct fsnotify_iter_info iter_info = {};
struct mount *mnt = NULL;
struct inode *inode2 = NULL;
struct dentry *moved;
int inode2_type;
int ret = 0;
- __u32 test_mask, marks_mask;
+ __u32 test_mask, marks_mask = 0;
if (path)
mnt = real_mount(path->mnt);
@@ -578,17 +584,20 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
if ((!sbinfo || !sbinfo->sb_marks) &&
(!mnt || !mnt->mnt_fsnotify_marks) &&
(!inode || !inode->i_fsnotify_marks) &&
- (!inode2 || !inode2->i_fsnotify_marks))
+ (!inode2 || !inode2->i_fsnotify_marks) &&
+ (!mnt_data || !mnt_data->ns->n_fsnotify_marks))
return 0;
- marks_mask = READ_ONCE(sb->s_fsnotify_mask);
+ if (sb)
+ marks_mask |= READ_ONCE(sb->s_fsnotify_mask);
if (mnt)
marks_mask |= READ_ONCE(mnt->mnt_fsnotify_mask);
if (inode)
marks_mask |= READ_ONCE(inode->i_fsnotify_mask);
if (inode2)
marks_mask |= READ_ONCE(inode2->i_fsnotify_mask);
-
+ if (mnt_data)
+ marks_mask |= READ_ONCE(mnt_data->ns->n_fsnotify_mask);
/*
* If this is a modify event we may need to clear some ignore masks.
@@ -618,6 +627,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
iter_info.marks[inode2_type] =
fsnotify_first_mark(&inode2->i_fsnotify_marks);
}
+ if (mnt_data) {
+ iter_info.marks[FSNOTIFY_ITER_TYPE_MNTNS] =
+ fsnotify_first_mark(&mnt_data->ns->n_fsnotify_marks);
+ }
/*
* We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
@@ -708,11 +721,31 @@ void file_set_fsnotify_mode_from_watchers(struct file *file)
}
#endif
+void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+ struct fsnotify_mnt data = {
+ .ns = ns,
+ .mnt_id = real_mount(mnt)->mnt_id_unique,
+ };
+
+ if (WARN_ON_ONCE(!ns))
+ return;
+
+ /*
+ * This is an optimization as well as making sure fsnotify_init() has
+ * been called.
+ */
+ if (!ns->n_fsnotify_marks)
+ return;
+
+ fsnotify(mask, &data, FSNOTIFY_EVENT_MNT, NULL, NULL, NULL, 0);
+}
+
static __init int fsnotify_init(void)
{
int ret;
- BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 24);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26);
ret = init_srcu_struct(&fsnotify_mark_srcu);
if (ret)
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 663759ed6fbc..5950c7a67f41 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -33,6 +33,12 @@ static inline struct super_block *fsnotify_conn_sb(
return conn->obj;
}
+static inline struct mnt_namespace *fsnotify_conn_mntns(
+ struct fsnotify_mark_connector *conn)
+{
+ return conn->obj;
+}
+
static inline struct super_block *fsnotify_object_sb(void *obj,
enum fsnotify_obj_type obj_type)
{
@@ -89,6 +95,11 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
fsnotify_destroy_marks(fsnotify_sb_marks(sb));
}
+static inline void fsnotify_clear_marks_by_mntns(struct mnt_namespace *mntns)
+{
+ fsnotify_destroy_marks(&mntns->n_fsnotify_marks);
+}
+
/*
* update the dentry->d_flags of all of inode's children to indicate if inode cares
* about events that happen to its children.
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 4981439e6209..798340db69d7 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -107,6 +107,8 @@ static fsnotify_connp_t *fsnotify_object_connp(void *obj,
return &real_mount(obj)->mnt_fsnotify_marks;
case FSNOTIFY_OBJ_TYPE_SB:
return fsnotify_sb_marks(obj);
+ case FSNOTIFY_OBJ_TYPE_MNTNS:
+ return &((struct mnt_namespace *)obj)->n_fsnotify_marks;
default:
return NULL;
}
@@ -120,6 +122,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask;
else if (conn->type == FSNOTIFY_OBJ_TYPE_SB)
return &fsnotify_conn_sb(conn)->s_fsnotify_mask;
+ else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS)
+ return &fsnotify_conn_mntns(conn)->n_fsnotify_mask;
return NULL;
}
@@ -346,12 +350,15 @@ static void *fsnotify_detach_connector_from_object(
fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
+ } else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) {
+ fsnotify_conn_mntns(conn)->n_fsnotify_mask = 0;
}
rcu_assign_pointer(*connp, NULL);
conn->obj = NULL;
conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
- fsnotify_update_sb_watchers(sb, conn);
+ if (sb)
+ fsnotify_update_sb_watchers(sb, conn);
return inode;
}
@@ -724,7 +731,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj,
* Attach the sb info before attaching a connector to any object on sb.
* The sb info will remain attached as long as sb lives.
*/
- if (!fsnotify_sb_info(sb)) {
+ if (sb && !fsnotify_sb_info(sb)) {
err = fsnotify_attach_info_to_sb(sb);
if (err)
return err;
@@ -770,7 +777,8 @@ restart:
/* mark should be the last entry. last is the current last entry */
hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
added:
- fsnotify_update_sb_watchers(sb, conn);
+ if (sb)
+ fsnotify_update_sb_watchers(sb, conn);
/*
* Since connector is attached to object using cmpxchg() we are
* guaranteed that connector initialization is fully visible by anyone
diff --git a/fs/pnode.c b/fs/pnode.c
index ef048f008bdd..82d809c785ec 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -549,8 +549,10 @@ static void restore_mounts(struct list_head *to_restore)
mp = parent->mnt_mp;
parent = parent->mnt_parent;
}
- if (parent != mnt->mnt_parent)
+ if (parent != mnt->mnt_parent) {
mnt_change_mountpoint(parent, mp, mnt);
+ mnt_notify_add(mnt);
+ }
}
}